#!/usr/bin/env python3
"""
gig_digest.py — Rank and summarize open Reddit gig opportunities from pipeline.db.

Reads ~/income-pipeline/data/pipeline.db, scores unapplied opportunities by:
  - pay_amount (USD-equivalent normalized)
  - estimated hourly rate (when minutes available)
  - freshness (age penalty after 24h)
  - subreddit-prior (slavelabour/forhire/signupsforpay weighting)
  - skill-fit keywords (software, edit, write, vid, social, virtual assistant, AI)

Outputs a markdown digest to stdout (and optionally a file) with the top-N
bid-ready opportunities, plus a one-line $bid template per row.

Designed as a portfolio-worthy single-file deliverable: no third-party deps,
stdlib only. Demonstrates SQL, scoring, CLI hygiene, and Markdown formatting.

Usage:
  python3 gig_digest.py                       # top 10 to stdout
  python3 gig_digest.py --top 20              # top 20
  python3 gig_digest.py --out digest.md       # also write file
  python3 gig_digest.py --db /path/to/db.sqlite
"""
from __future__ import annotations

import argparse
import os
import re
import sqlite3
import sys
from datetime import datetime, timezone
from typing import Iterable

DEFAULT_DB = os.path.expanduser("~/income-pipeline/data/pipeline.db")

SKILL_KEYWORDS = {
    "software": 1.3,
    "engineer": 1.3,
    "developer": 1.25,
    "python": 1.25,
    "automation": 1.2,
    "script": 1.15,
    "ai": 1.15,
    "data": 1.1,
    "writer": 1.05,
    "content": 1.05,
    "edit": 1.05,
    "video": 1.0,
    "clip": 1.0,
    "social": 0.95,
    "virtual assistant": 0.95,
    "va ": 0.95,
}

SUB_PRIORS = {
    "slavelabour": 1.10,
    "forhire": 1.05,
    "signupsforpay": 0.90,
    "beermoney": 0.70,
}

BID_TEMPLATE = (
    "$bid — u/piaigmt. {pitch} Portfolio: ~/income-pipeline/work/ "
    "(auto_data_cleaner.py, table_scraper.py, excel_automation.py, gig_digest.py). "
    "DM piaigmt@proton.me. Can start today."
)


def parse_age_hours(created_at: str | None) -> float:
    if not created_at:
        return 9999.0
    for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f"):
        try:
            dt = datetime.strptime(created_at.split("+")[0].strip(), fmt)
            dt = dt.replace(tzinfo=timezone.utc)
            return max(0.0, (datetime.now(timezone.utc) - dt).total_seconds() / 3600.0)
        except ValueError:
            continue
    return 9999.0


def freshness_factor(age_hours: float) -> float:
    if age_hours <= 24:
        return 1.0
    if age_hours <= 72:
        return 0.75
    if age_hours <= 168:
        return 0.5
    return 0.25


def subreddit_of(url: str | None) -> str:
    if not url:
        return ""
    m = re.search(r"reddit\.com/r/([^/]+)/", url)
    return m.group(1).lower() if m else ""


def skill_bonus(text: str) -> tuple[float, list[str]]:
    t = (text or "").lower()
    matched: list[str] = []
    mult = 1.0
    for kw, m in SKILL_KEYWORDS.items():
        if kw in t:
            matched.append(kw.strip())
            mult *= 1.0 + (m - 1.0) * 0.5  # dampen compounding
    return mult, matched


def score_row(row: sqlite3.Row) -> tuple[float, dict]:
    pay = float(row["pay_amount"] or 0)
    mins = float(row["estimated_minutes"] or 0)
    hourly = (pay / (mins / 60.0)) if mins > 0 else 0.0
    age = parse_age_hours(row["created_at"])
    fresh = freshness_factor(age)
    sub = subreddit_of(row["url"])
    sub_w = SUB_PRIORS.get(sub, 1.0)
    skill_w, matched = skill_bonus(
        f"{row['title']} {row['description'] or ''} {row['tags'] or ''}"
    )

    # Composite: pay dominates, then hourly bonus, then freshness, then skill, then subreddit.
    score = (pay + hourly * 2.0) * fresh * skill_w * sub_w

    meta = {
        "pay": pay,
        "hourly": hourly,
        "age_hours": age,
        "fresh": fresh,
        "subreddit": sub,
        "skill_matches": matched,
    }
    return score, meta


def short_title(t: str, n: int = 70) -> str:
    t = re.sub(r"\s+", " ", t or "").strip()
    return t if len(t) <= n else t[: n - 1] + "…"


def build_pitch(title: str, matched: Iterable[str]) -> str:
    m = list(matched)
    if any(k in m for k in ("software", "engineer", "developer", "python", "automation", "script")):
        return "Python automation specialist — I ship single-file stdlib tools (data cleaning, scraping, Excel/CSV). Live receipts available."
    if any(k in m for k in ("ai", "data")):
        return "AI/data tooling — I build pragmatic Python scripts to clean, transform, and report. Have working samples."
    if any(k in m for k in ("edit", "video", "clip")):
        return "Shortform editor — fast turnaround on clip/VOD trims. Can deliver a sample today."
    if "content" in m or "writer" in m:
        return "Writer/researcher — clean structured drafts, fast iteration on revisions."
    if "social" in m or any("virtual" in x for x in m):
        return "VA/social ops — Reddit-savvy, daily availability, organized."
    return "Generalist — pragmatic, fast, evidence-first. Ask for a sample, I'll send one."


def fetch_open(conn: sqlite3.Connection, limit: int) -> list[sqlite3.Row]:
    conn.row_factory = sqlite3.Row
    rows = conn.execute(
        """
        SELECT id, external_id, title, description, url, pay_amount, pay_currency,
               estimated_minutes, status, tags, applied_at, created_at
          FROM opportunities
         WHERE status = 'open' AND applied_at IS NULL
         ORDER BY pay_amount DESC
         LIMIT ?
        """,
        (limit * 6,),  # over-fetch so scoring can reorder
    ).fetchall()
    return rows


def render_digest(scored: list[tuple[float, dict, sqlite3.Row]], top: int) -> str:
    now = datetime.now().strftime("%Y-%m-%d %H:%M %Z").strip()
    lines = [
        f"# Gig digest — top {top} open opportunities",
        f"_Generated: {now} • source: ~/income-pipeline/data/pipeline.db_",
        "",
        "| # | Score | Pay | $/hr | Age (h) | Sub | Title | URL |",
        "|---|------:|----:|-----:|--------:|-----|-------|-----|",
    ]
    for i, (s, meta, row) in enumerate(scored[:top], 1):
        lines.append(
            f"| {i} | {s:0.1f} | ${meta['pay']:0.0f} | "
            f"{('$' + format(meta['hourly'], '0.0f')) if meta['hourly'] else '—'} | "
            f"{meta['age_hours']:0.0f} | {meta['subreddit']} | "
            f"{short_title(row['title'])} | {row['url'] or ''} |"
        )

    lines.append("")
    lines.append("## Bid templates (top 5)")
    for i, (s, meta, row) in enumerate(scored[:5], 1):
        pitch = build_pitch(row["title"], meta["skill_matches"])
        bid = BID_TEMPLATE.format(pitch=pitch)
        lines.append(f"### {i}. [#{row['id']}] {short_title(row['title'])}")
        lines.append(f"- URL: {row['url']}")
        lines.append(f"- Pay: ${meta['pay']:0.0f}  •  Sub: r/{meta['subreddit']}  •  Age: {meta['age_hours']:0.0f}h")
        if meta["skill_matches"]:
            lines.append(f"- Skill matches: {', '.join(sorted(set(meta['skill_matches'])))}")
        lines.append("")
        lines.append("```")
        lines.append(bid)
        lines.append("```")
        lines.append("")
    return "\n".join(lines) + "\n"


def main(argv: list[str]) -> int:
    ap = argparse.ArgumentParser(description="Rank open gig opportunities.")
    ap.add_argument("--db", default=DEFAULT_DB, help="Path to pipeline.db")
    ap.add_argument("--top", type=int, default=10, help="How many rows to surface")
    ap.add_argument("--out", default=None, help="Optional file path to also write")
    args = ap.parse_args(argv)

    if not os.path.exists(args.db):
        print(f"ERROR: db not found: {args.db}", file=sys.stderr)
        return 2

    with sqlite3.connect(args.db) as conn:
        rows = fetch_open(conn, args.top)

    scored = []
    for row in rows:
        s, meta = score_row(row)
        scored.append((s, meta, row))
    scored.sort(key=lambda x: x[0], reverse=True)

    digest = render_digest(scored, args.top)
    sys.stdout.write(digest)
    if args.out:
        with open(args.out, "w", encoding="utf-8") as f:
            f.write(digest)
        print(f"[wrote {args.out}]", file=sys.stderr)
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))