from __future__ import annotations

import json
import re
from dataclasses import dataclass, field


@dataclass
class ParsedArticle:
    seo: dict[str, str] = field(default_factory=dict)
    meta: dict[str, str] = field(default_factory=dict)
    html: str = ""
    social: dict[str, str] = field(default_factory=dict)
    raw: str = ""


_FENCE = re.compile(r"```(\w+)?\s*\n(.*?)```", re.DOTALL | re.IGNORECASE)


def _parse_yaml_block(text: str) -> dict[str, str]:
    out: dict[str, str] = {}
    for line in text.splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        if ":" not in line:
            continue
        key, _, val = line.partition(":")
        key = key.strip()
        val = val.strip().strip('"').strip("'")
        if key:
            out[key] = val
    return out


def parse_llm_output(raw: str) -> ParsedArticle:
    text = (raw or "").strip()
    result = ParsedArticle(raw=text)

    blocks: dict[str, str] = {}
    for match in _FENCE.finditer(text):
        lang = (match.group(1) or "").lower()
        body = match.group(2).strip()
        blocks[lang or "block"] = body

    if "yaml" in blocks:
        result.seo = _parse_yaml_block(blocks["yaml"])
    elif "yml" in blocks:
        result.seo = _parse_yaml_block(blocks["yml"])

    if "json" in blocks:
        try:
            result.meta = json.loads(blocks["json"])
        except json.JSONDecodeError:
            pass

    if "html" in blocks:
        result.html = blocks["html"]
    else:
        # Fallback : HTML après le dernier bloc structuré
        tail = text
        for match in _FENCE.finditer(text):
            tail = tail[match.end() :]
        tail = tail.strip()
        if tail.startswith("<"):
            result.html = tail

    if "social" in blocks:
        result.social = _parse_social_block(blocks["social"])

    # Compléter meta depuis seo si besoin
    if not result.meta.get("title") and result.seo.get("titre_h1"):
        result.meta["title"] = result.seo["titre_h1"]
    if not result.meta.get("title") and result.seo.get("title"):
        result.meta["title"] = result.seo["title"]

    slug = result.seo.get("slug") or result.meta.get("slug") or ""
    if slug:
        result.meta.setdefault("slug", slug)

    return result


def _parse_social_block(text: str) -> dict[str, str]:
    sections: dict[str, str] = {}
    current = None
    buf: list[str] = []

    def flush() -> None:
        nonlocal current, buf
        if current and buf:
            sections[current] = "\n".join(buf).strip()
        buf = []

    for line in text.splitlines():
        m = re.match(r"^#+\s*(LinkedIn|X|Twitter|Facebook)\s*$", line.strip(), re.I)
        if m:
            flush()
            name = m.group(1).lower()
            current = "x" if name in ("x", "twitter") else name
            continue
        if current is not None:
            buf.append(line)
    flush()
    return sections


def article_url(base: str, slug: str) -> str:
    return f"{base.rstrip('/')}/{slug.strip('/')}/"