#!/usr/bin/env python3
import json, os, re, hashlib, urllib.request, urllib.parse, xml.etree.ElementTree as ET
from datetime import date
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
TOOLS_PATH = ROOT / "public" / "tools.json"
TODAY = date.today().isoformat()

RSS_FEEDS = [
  "https://openai.com/news/rss.xml",
  "https://www.anthropic.com/news/rss.xml",
  "https://blog.google/technology/ai/rss/",
  "https://huggingface.co/blog/feed.xml",
]

def get(url, headers=None):
    req = urllib.request.Request(url, headers=headers or {"User-Agent":"AIMarsBot/1.0"})
    with urllib.request.urlopen(req, timeout=25) as r:
        return r.read().decode("utf-8", "replace")

def slugify(s):
    return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:80]

def summary(text):
    text = re.sub(r"<[^>]+>", " ", text or "")
    text = re.sub(r"\s+", " ", text).strip()
    if len(text) > 180: text = text[:177].rsplit(" ",1)[0] + "..."
    return text or "Public metadata imported for editorial review."

def load():
    return json.loads(TOOLS_PATH.read_text(encoding="utf-8"))

def save(tools):
    TOOLS_PATH.write_text(json.dumps(tools, indent=2, ensure_ascii=False), encoding="utf-8")

def base_tool(name, slug, category, desc, url, source, **extra):
    return {
      "name": name, "slug": slug, "category": category,
      "short_description": summary(desc),
      "long_description": summary(desc),
      "features": extra.pop("features", []),
      "pricing": "Unknown", "free_plan": "Unknown",
      "official_url": url, "cpa_url": "", "affiliate_url": "",
      "rating": 0, "pros": [], "cons": [], "best_for": category,
      "alternatives": [], "image": "assets/sample-circuit.svg",
      "source": source, "pending_review": True,
      "last_verified": TODAY, "last_updated": TODAY, **extra
    }

def merge(existing, incoming):
    by_slug = {t["slug"]: t for t in existing}
    changed = False
    for item in incoming:
        old = by_slug.get(item["slug"])
        if old:
            for protected in ("cpa_url","affiliate_url","manual_cpa_url","manual_affiliate_url"):
                if old.get(protected): item[protected] = old[protected]
            merged = {**old, **{k:v for k,v in item.items() if v not in ("", None, [])}}
            if merged != old:
                merged["last_updated"] = TODAY
                by_slug[item["slug"]] = merged
                changed = True
        else:
            existing.append(item)
            by_slug[item["slug"]] = item
            changed = True
    return existing, changed

def product_hunt():
    token = os.getenv("PRODUCT_HUNT_TOKEN")
    if not token: return []
    query = '{"query":"query { posts(first: 20, topic: \\"artificial-intelligence\\") { edges { node { name tagline url votesCount createdAt topics { edges { node { name } } } } } } }"}'
    data = json.loads(get("https://api.producthunt.com/v2/api/graphql", {"Authorization":"Bearer "+token, "Content-Type":"application/json", "User-Agent":"AIMarsBot/1.0"}))
    out = []
    for edge in data["data"]["posts"]["edges"]:
        n = edge["node"]; slug = "ph-" + slugify(n["name"])
        topics = [x["node"]["name"] for x in n.get("topics",{}).get("edges",[])]
        out.append(base_tool(n["name"], slug, "Product Hunt launch", n.get("tagline"), n.get("url"), "Product Hunt API", votes=n.get("votesCount"), launch_date=(n.get("createdAt") or "")[:10], tags=topics))
    return out

def huggingface():
    out = []
    for kind, url in {
        "Hugging Face model":"https://huggingface.co/api/models?sort=likes&direction=-1&limit=20",
        "Hugging Face Space":"https://huggingface.co/api/spaces?sort=likes&direction=-1&limit=20",
        "Hugging Face dataset":"https://huggingface.co/api/datasets?sort=likes&direction=-1&limit=10",
    }.items():
        try: data = json.loads(get(url))
        except Exception: continue
        for n in data:
            name = n.get("modelId") or n.get("id") or n.get("name")
            if not name: continue
            out.append(base_tool(name, "hf-" + slugify(name), kind, "Trending public Hugging Face metadata for editorial review.", "https://huggingface.co/" + name, "Hugging Face API", tags=n.get("tags",[]), likes=n.get("likes"), downloads=n.get("downloads"), last_modified=(n.get("lastModified") or "")[:10]))
    return out

def github():
    q = urllib.parse.quote('topic:ai topic:artificial-intelligence stars:>500')
    headers = {"User-Agent":"AIMarsBot/1.0"}
    if os.getenv("GITHUB_TOKEN"): headers["Authorization"] = "Bearer " + os.getenv("GITHUB_TOKEN")
    data = json.loads(get(f"https://api.github.com/search/repositories?q={q}&sort=stars&order=desc&per_page=30", headers))
    out = []
    for r in data.get("items", []):
        out.append(base_tool(r["name"], "gh-" + slugify(r["full_name"]), "Open-source AI tool", r.get("description"), r.get("homepage") or r.get("html_url"), "GitHub API", repo_url=r.get("html_url"), stars=r.get("stargazers_count"), forks=r.get("forks_count"), language=r.get("language"), last_modified=(r.get("updated_at") or "")[:10]))
    return out

def rss():
    out = []
    for feed in RSS_FEEDS:
        try: root = ET.fromstring(get(feed))
        except Exception: continue
        for item in root.findall(".//item")[:8]:
            title = item.findtext("title") or "AI update"
            link = item.findtext("link") or feed
            desc = item.findtext("description") or title
            slug = "rss-" + hashlib.sha1(link.encode()).hexdigest()[:12]
            out.append(base_tool(title, slug, "AI news metadata", desc, link, "RSS feed", launch_date=(item.findtext("pubDate") or "")[:16]))
    return out

def main():
    existing = load()
    incoming = product_hunt() + huggingface() + github() + rss()
    merged, changed = merge(existing, incoming)
    if changed: save(merged)
    print(json.dumps({"fetched": len(incoming), "changed": changed, "tools": len(merged)}, indent=2))

if __name__ == "__main__":
    main()
