superhome (paginated JSON API)
Reference AI-product-sync parser. Paginated JSON API. fetch_next_chunk returns one page per call; cursor (page number) lives in state['parser']['page']. Demonstrates the natural shape for paginated REST sources where one HTTP call gives you one chunk.
Reference parser for the aiGenerated product-sync provider. Use it as a starting point when the source is a paginated REST API that returns one chunk of products per request. Demonstrates the natural pattern: fetch_next_chunk makes one HTTP call and bumps the page cursor in state['parser'].
Source shape: paginated-json-api
Runtime helper note. The _fetch_page helper uses a try/except that falls back to fetch_via_proxy() — a globally-injected function that routes the request through Octocom's HTTP proxy IP (74.242.171.127). Use it when the origin blocks the Azure sandbox IP (symptoms: HTTPError 403, HTTPError 429, unexplained urlopen timeouts). Signature:
fetch_via_proxy(url, *, headers=None, timeout=60, method='GET', data=None) -> bytesFull source
"""Superhome (superhome.com.cy) parser. Paginated JSON API.
Demonstrates the natural shape for paginated sources:
fetch_next_chunk() = one page (or batch of pages) per call.
state['parser']['page'] holds the cursor across calls.
The kernel persists `state` between executePython calls, so this just
works without serializing/deserializing the cursor through the wire.
"""
import json
import re
import unicodedata
import urllib.error
import urllib.parse
import urllib.request
# -----------------------------------------------------------------------------
# Helpers (slugify, decode_greek_text, sanitize_description)
# -----------------------------------------------------------------------------
GREEK_ENTITIES = {
"Α": "Α", "Β": "Β", "Γ": "Γ", "Δ": "Δ",
"Ε": "Ε", "Ζ": "Ζ", "Η": "Η", "Θ": "Θ",
"Ι": "Ι", "Κ": "Κ", "Λ": "Λ", "Μ": "Μ",
"Ν": "Ν", "Ξ": "Ξ", "Ο": "Ο", "Π": "Π",
"Ρ": "Ρ", "Σ": "Σ", "Τ": "Τ", "Υ": "Υ",
"Φ": "Φ", "Χ": "Χ", "Ψ": "Ψ", "Ω": "Ω",
"α": "α", "β": "β", "γ": "γ", "δ": "δ",
"ε": "ε", "ζ": "ζ", "η": "η", "θ": "θ",
"ι": "ι", "κ": "κ", "λ": "λ", "μ": "μ",
"ν": "ν", "ξ": "ξ", "ο": "ο", "π": "π",
"ρ": "ρ", "ς": "ς", "σ": "σ", "τ": "τ",
"υ": "υ", "φ": "φ", "χ": "χ", "ψ": "ψ",
"ω": "ω", " ": " ", "&": "&", "´": "´",
"¨": "¨",
}
def slugify(text, max_length=None):
s = (text or "").strip().lower()
s = unicodedata.normalize("NFD", s)
s = "".join(c for c in s if unicodedata.category(c) != "Mn")
s = re.sub(r"^[\W_]+|[\W_]+$", "", s)
s = re.sub(r"[\W_]+", "-", s)
s = re.sub(r"-+", "-", s).strip("-")
if max_length:
s = s[:max_length].rstrip("-")
return s
def decode_greek_text(text):
decoded = (text or "").replace("&", "&")
for entity, char in GREEK_ENTITIES.items():
decoded = decoded.replace(entity, char)
decoded = re.sub(r"&[a-zA-Z]+;", "", decoded)
decoded = re.sub(r"\s+", " ", decoded).strip()
return decoded
MAX_DESC_LEN = 10_000
def sanitize_description(html):
cleaned = re.sub(
r"data:[a-zA-Z0-9+/.\-]+;base64,[A-Za-z0-9+/=\s]+", "", html
)
cleaned = re.sub(r"src\s*=\s*[\"']\s*[\"']", "", cleaned)
cleaned = re.sub(r"\s{2,}", " ", cleaned).strip()
return cleaned[:MAX_DESC_LEN]
# -----------------------------------------------------------------------------
# Source
# -----------------------------------------------------------------------------
FEED_URL = "https://superhome.com.cy/dwapi/Feeds/GetFeedOutput"
PAGE_SIZE = 100 # Superhome's API accepts large page sizes; bigger = fewer round-trips
USER_AGENT = "Mozilla/5.0 (compatible; OctocomSync/1.0)"
# URL-encoded Greek "προιοντα" path segment, used when constructing
# product URLs back into the storefront.
GREEK_PATH = "%CF%80%CF%81%CE%BF%CE%B9%CE%BF%CE%BD%CF%84%CE%B1"
def _fetch_page(page_num):
"""GET one page of the JSON feed.
Tries direct urllib first. On HTTP/URL errors falls back to
`fetch_via_proxy()`, the runtime-provided helper that routes
through Octocom's HTTP proxy IP. Use the proxy when the origin
blocks the Azure sandbox IP — symptoms include `HTTPError 403/429`
and unexplained `urlopen` timeouts.
Runtime helper signature:
fetch_via_proxy(url, *, headers=None, timeout=60,
method='GET', data=None) -> bytes
"""
params = {
"Id": 6,
"languageId": "LANG17",
"currencyId": "EUR",
"shopId": "SHOP1",
"LoadVariantInfoOnVariants": "false",
"PageSize": PAGE_SIZE,
"PageNum": page_num,
}
url = f"{FEED_URL}?{urllib.parse.urlencode(params)}"
headers = {"User-Agent": USER_AGENT, "Accept": "application/json"}
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req, timeout=30) as r:
body = r.read()
except (urllib.error.HTTPError, urllib.error.URLError):
body = fetch_via_proxy( # noqa: F821 - runtime-provided
url, headers=headers, timeout=60
)
return json.loads(body).get("Products", [])
# -----------------------------------------------------------------------------
# Contract
# -----------------------------------------------------------------------------
def fetch_next_chunk(state):
parser = state.setdefault("parser", {})
page = parser.get("page", 1)
products = _fetch_page(page)
parser["page"] = page + 1
return products
def map_one(raw, context):
if not raw.get("Active"):
return None
pid = raw.get("Id")
name = raw.get("Name") or ""
if not pid:
return None
groups = raw.get("Groups") or []
short_desc = sanitize_description(
decode_greek_text(decode_greek_text(raw.get("ShortDescription") or ""))
)
long_desc = sanitize_description(
decode_greek_text(decode_greek_text(raw.get("LongDescription") or ""))
)
# Price comes wrapped: {"Price": 12.34}
price_obj = raw.get("Price") or {}
price = price_obj.get("Price") if isinstance(price_obj, dict) else None
stock_level = raw.get("StockLevel")
try:
stock_quantity = int(stock_level) if stock_level is not None else None
except (ValueError, TypeError):
stock_quantity = None
in_stock = raw.get("StockStatus") == "Σε απόθεμα"
images = [
{
"url": (raw.get("DefaultImage") or {}).get("Value", ""),
"altText": name,
"isPrimary": True,
}
] + [
{"url": img.get("Value", ""), "altText": name}
for img in (raw.get("ImagePatternImages") or [])
]
metafields = []
for v in (raw.get("ProductFields") or {}).values():
label = v.get("Name")
if label:
metafields.append(
{
"key": label,
"value": json.dumps(v.get("Value"), ensure_ascii=False),
}
)
weight = raw.get("Weight")
if weight is not None:
metafields.append({"key": "weight", "value": str(weight)})
urls = [
{
"url": f"https://superhome.com.cy/greek/{GREEK_PATH}/{pid}",
"language": "el",
"salesChannel": "web",
},
{
"url": f"https://superhome.com.cy/english/products/{pid}",
"language": "en",
"salesChannel": "web",
},
] + [
{
"url": f"https://superhome.com.cy/greek/{GREEK_PATH}?GroupId={g['Id']}&ProductId={pid}",
"language": "el",
"salesChannel": "web",
}
for g in groups
if g.get("Id")
] + [
{
"url": f"https://superhome.com.cy/english/products?GroupId={g['Id']}&ProductId={pid}",
"language": "en",
"salesChannel": "web",
}
for g in groups
if g.get("Id")
]
collections = [
{"slug": slugify(g["Name"]), "name": g["Name"]}
for gp in (raw.get("GroupPaths") or [])
for g in gp
if g.get("Name")
]
return {
"name": name,
"slug": slugify(name) + "-" + str(pid),
"uniqueId": str(pid),
"shortDescription": short_desc or None,
"longDescription": long_desc or None,
"url": urls,
"collections": collections,
"variants": [
{
"title": "default",
"price": {
"currentPrice": price,
"onSale": False,
},
"stockQuantity": stock_quantity,
"inStock": in_stock,
}
],
"images": images,
"metafields": metafields,
"rawProduct": json.dumps(raw, ensure_ascii=False),
}