import json
from bs4 import BeautifulSoup
from sqlalchemy.orm import Session
from app.services.compliance import safe_public_get
from app.services.importer import upsert_products
from app.services.audit import log_event


def extract_jsonld_products(soup: BeautifulSoup, source_url: str, retailer_name: str) -> list[dict]:
    rows = []
    for script in soup.find_all("script", type="application/ld+json"):
        try:
            payload = json.loads(script.string or "{}")
        except Exception:
            continue
        candidates = payload if isinstance(payload, list) else [payload]
        for item in candidates:
            graph = item.get("@graph") if isinstance(item, dict) else None
            if isinstance(graph, list):
                candidates.extend(graph)
            if not isinstance(item, dict):
                continue
            typ = str(item.get("@type", "")).lower()
            if "product" not in typ:
                continue
            offers = item.get("offers") or {}
            if isinstance(offers, list):
                offers = offers[0] if offers else {}
            rows.append({
                "retailer_name": retailer_name,
                "product_name": item.get("name", ""),
                "brand": item.get("brand", {}).get("name", "") if isinstance(item.get("brand"), dict) else item.get("brand", ""),
                "price": offers.get("price", "") if isinstance(offers, dict) else "",
                "image_url": item.get("image", "")[0] if isinstance(item.get("image"), list) else item.get("image", ""),
                "product_url": item.get("url", source_url),
                "source_url": source_url,
                "source_type": "public_page",
                "barcode_or_gtin": item.get("gtin13") or item.get("gtin14") or item.get("gtin") or "",
                "barcode_source": "public schema.org json-ld" if (item.get("gtin13") or item.get("gtin14") or item.get("gtin")) else "not publicly available",
                "availability_status": offers.get("availability", "unknown") if isinstance(offers, dict) else "unknown",
            })
    return rows


def extract_meta_product(soup: BeautifulSoup, source_url: str, retailer_name: str) -> list[dict]:
    def meta(prop: str) -> str:
        tag = soup.find("meta", property=prop) or soup.find("meta", attrs={"name": prop})
        return tag.get("content", "") if tag else ""
    name = meta("og:title") or (soup.title.get_text(strip=True) if soup.title else "")
    price = meta("product:price:amount") or meta("og:price:amount")
    image = meta("og:image")
    if not name:
        return []
    return [{
        "retailer_name": retailer_name,
        "product_name": name,
        "price": price,
        "image_url": image,
        "product_url": source_url,
        "source_url": source_url,
        "source_type": "public_page_meta",
        "barcode_source": "not publicly available",
    }]


def scrape_public_page(db: Session, retailer_name: str, url: str) -> dict:
    ok, html, reason = safe_public_get(url)
    if not ok:
        log_event(db, event_type="public_scrape", source=url, source_type="public_page", status="blocked", error_reason=reason, compliance_decision="stopped_no_bypass")
        return {"status": "blocked", "reason": reason, "created": 0, "updated": 0, "skipped": 0}
    soup = BeautifulSoup(html, "lxml")
    rows = extract_jsonld_products(soup, url, retailer_name)
    if not rows:
        rows = extract_meta_product(soup, url, retailer_name)
    result = upsert_products(db, rows, url, "public_page")
    log_event(db, event_type="public_scrape", source=url, source_type="public_page", status="success", pages_processed=1, products_created=result["created"], products_updated=result["updated"], products_skipped=result["skipped"], compliance_decision=reason)
    return {"status": "success", "reason": reason, **result}
