"""
EXP-025 Clean Decomposition: Specificity × Quality Demands
============================================================
Separates three confounded effects from the original d=2.34:
1. Specificity content (reference Northvane's assets vs generic)
2. Quality demands (detailed/thorough vs no quality instruction)
3. Output length (forced to 500 words)

Design: 2×2 × 10 runs × 1 generator = 40 outputs
All instructions ~20 words. All outputs forced to 500 words.

Conditions:
  SPEC+QUAL: Specific + quality demands (20 words)
  SPEC_ONLY: Specific, no quality demands (20 words)
  QUAL_ONLY: Quality demands, no specificity (20 words)
  BARE:      Minimal instruction (20 words, neutral filler)

The test:
  SPEC+QUAL > QUAL_ONLY → specificity adds value beyond quality demands
  SPEC_ONLY > BARE      → specificity alone works (no quality demands needed)
  QUAL_ONLY > BARE      → quality demands alone work (no specificity needed)
  SPEC_ONLY ≈ SPEC+QUAL → quality demands don't add to specificity
"""

import json
import sys
import time
import re
import random
import argparse
from pathlib import Path
from datetime import datetime, timezone

sys.path.insert(0, str(Path(__file__).parent))
from _config import get_xai_client, call_generator, bootstrap_ci, hedges_g

DATA_DIR = Path(__file__).parent

# ── Conditions (all ~20 words, matched length) ──────────────────────────────

CONDITIONS = {
    "SPEC_QUAL": {
        "label": "Specific + Quality demands (20 words)",
        "instruction": (
            "Ground every point in Northvane's specific assets and current situation. "
            "Provide detailed and thorough analysis with clear reasoning throughout."
        ),
        "has_specificity": True,
        "has_quality_demands": True,
    },
    "SPEC_ONLY": {
        "label": "Specific only, no quality demands (20 words)",
        "instruction": (
            "Ground every point in Northvane's specific assets and current situation. "
            "Write your strategic recommendation for their next twelve months."
        ),
        "has_specificity": True,
        "has_quality_demands": False,
    },
    "QUAL_ONLY": {
        "label": "Quality demands only, no specificity (19 words)",
        "instruction": (
            "Provide detailed and thorough analysis with clear reasoning throughout. "
            "Write your strategic recommendation for the company's next twelve months."
        ),
        "has_specificity": False,
        "has_quality_demands": True,
    },
    "BARE": {
        "label": "Neutral filler, no specificity or quality demands (19 words)",
        "instruction": (
            "Write your strategic recommendation for the company's next twelve months. "
            "Cover the key strategic considerations and options for their situation."
        ),
        "has_specificity": False,
        "has_quality_demands": False,
    },
}

# Verify word counts
for k, v in CONDITIONS.items():
    wc = len(v["instruction"].split())
    assert 17 <= wc <= 22, f"{k}: {wc} words (expected ~19-20)"

CONTEXT = """Northvane Analytics is a B2B data infrastructure company serving mid-market logistics firms.

Key facts:
- 47 employees, $8M ARR, 40% YoY growth
- 12 engineers (capacity constraint)
- Proprietary route optimization algorithm built on 5 years of regional shipping data
- Competitors: broad-spectrum analytics platforms without logistics specialization
- Strategic question: Expand to supply chain visibility or deepen core logistics offering?"""

PROMPT_TEMPLATE = """{instruction}

{context}

Write exactly 500 words. Do not exceed 500 words."""

RUNS_PER_CONDITION = 10
GENERATOR = "grok-4-1-fast"


# ── Specificity Scoring (same heuristic as EXP-025) ─────────────────────────

def score_specificity(text):
    """6-component specificity score matching EXP-025 methodology."""
    text_lower = text.lower()

    # M1: Company name mentions
    m1 = len(re.findall(r'northvane', text_lower))

    # M2: Scenario numbers used
    scenario_nums = [r'\b47\b', r'\$?8\s*m\b|8\s*million', r'40\s*%', r'\b12\b\s*engineer',
                     r'5[\s-]year|five[\s-]year']
    m2 = sum(1 for p in scenario_nums if re.search(p, text_lower))

    # M3: Market specifics
    market_terms = [r'regional', r'3pl|third[\s-]party\s+logistic', r'trucking',
                    r'logistics', r'shipping', r'mid[\s-]market', r'supply\s+chain']
    m3 = sum(len(re.findall(p, text_lower)) for p in market_terms)

    # M4: Algorithm/proprietary mentions
    algo_terms = [r'algorithm', r'proprietary', r'route\s+optim', r'data\s+moat',
                  r'competitive\s+advantage']
    m4 = sum(1 for p in algo_terms if re.search(p, text_lower))

    # M5: Named competitors
    m5 = len(re.findall(r'(?:project44|fourkites|descartes|oracle|sap|flexport|convoy)',
                        text_lower))

    # M6: Specific recommendations tied to unique assets
    specific_recs = [r'deepen\s+(?:core|logistics)', r'expand.*?supply\s+chain',
                     r'partner', r'api\s+(?:access|integration|layer)',
                     r'verticali[sz]e', r'enterprise', r'upsell|cross[\s-]sell']
    m6 = sum(1 for p in specific_recs if re.search(p, text_lower))

    total = m1 + m2 + m3 + m4 + m5 + m6
    word_count = len(text.split())
    density = total / (word_count / 1000) if word_count > 0 else 0

    return {
        "total": total,
        "m1_company": m1,
        "m2_scenario": m2,
        "m3_market": m3,
        "m4_algorithm": m4,
        "m5_competitors": m5,
        "m6_specific_recs": m6,
        "word_count": word_count,
        "density_per_1kw": round(density, 1),
    }


# ── Generation ───────────────────────────────────────────────────────────────

def generate_all(client, dry_run=False):
    """Generate all 40 outputs in randomized order."""
    # Build run list
    runs = []
    for cond_key, cond in CONDITIONS.items():
        for run_num in range(1, RUNS_PER_CONDITION + 1):
            runs.append({
                "condition": cond_key,
                "run": run_num,
                "instruction": cond["instruction"],
            })

    # Randomize
    random.seed(42)
    random.shuffle(runs)

    results = {k: [] for k in CONDITIONS}

    for i, run in enumerate(runs):
        prompt = PROMPT_TEMPLATE.format(
            instruction=run["instruction"],
            context=CONTEXT,
        )

        print(f"  [{i+1:2d}/{len(runs)}] {run['condition']:10s} run {run['run']:2d}: ", end="", flush=True)

        if dry_run:
            # Synthetic output with condition-dependent variation
            words = 500
            if CONDITIONS[run["condition"]]["has_specificity"]:
                text = ("Northvane Analytics should deepen its core logistics offering. " * 25 +
                        "The 12-engineer team and $8M ARR position them for regional expansion. " * 10)
            else:
                text = ("The company should focus on strategic growth opportunities. " * 25 +
                        "A comprehensive analysis suggests several key considerations. " * 10)
            text = " ".join(text.split()[:words])
            print(f"dry-run ({len(text.split())}w)")
        else:
            messages = [{"role": "user", "content": prompt}]
            text = call_generator(client, GENERATOR, messages)
            print(f"done ({len(text.split())}w)")
            time.sleep(3)

        score = score_specificity(text)
        results[run["condition"]].append({
            "run": run["run"],
            "text": text,
            "score": score,
        })

    return results


# ── Analysis ─────────────────────────────────────────────────────────────────

def analyze(results):
    """Compute all pairwise comparisons."""
    print("\n" + "=" * 70)
    print("  RESULTS: 2×2 Specificity × Quality Demands")
    print("=" * 70)

    # Per-condition summary
    print("\n  Condition     | Spec | Qual | Score  | Density | Words")
    print("  " + "-" * 60)
    for k in ["SPEC_QUAL", "SPEC_ONLY", "QUAL_ONLY", "BARE"]:
        entries = results[k]
        scores = [e["score"]["total"] for e in entries]
        densities = [e["score"]["density_per_1kw"] for e in entries]
        words = [e["score"]["word_count"] for e in entries]
        c = CONDITIONS[k]
        spec = "Y" if c["has_specificity"] else "N"
        qual = "Y" if c["has_quality_demands"] else "N"
        print(f"  {k:14s} |  {spec}   |  {qual}   | {sum(scores)/len(scores):5.1f}  | "
              f"{sum(densities)/len(densities):6.1f} | {sum(words)/len(words):.0f}")

    # 2×2 cell means
    print("\n  2×2 Design:")
    print("                    Specific    Not Specific")
    sq = [e["score"]["total"] for e in results["SPEC_QUAL"]]
    so = [e["score"]["total"] for e in results["SPEC_ONLY"]]
    qo = [e["score"]["total"] for e in results["QUAL_ONLY"]]
    ba = [e["score"]["total"] for e in results["BARE"]]
    print(f"  Quality demands:  {sum(sq)/len(sq):5.1f}       {sum(qo)/len(qo):5.1f}")
    print(f"  No quality:       {sum(so)/len(so):5.1f}       {sum(ba)/len(ba):5.1f}")

    # Same for density
    sq_d = [e["score"]["density_per_1kw"] for e in results["SPEC_QUAL"]]
    so_d = [e["score"]["density_per_1kw"] for e in results["SPEC_ONLY"]]
    qo_d = [e["score"]["density_per_1kw"] for e in results["QUAL_ONLY"]]
    ba_d = [e["score"]["density_per_1kw"] for e in results["BARE"]]
    print(f"\n  Density (/1Kw):")
    print(f"                    Specific    Not Specific")
    print(f"  Quality demands:  {sum(sq_d)/len(sq_d):5.1f}       {sum(qo_d)/len(qo_d):5.1f}")
    print(f"  No quality:       {sum(so_d)/len(so_d):5.1f}       {sum(ba_d)/len(ba_d):5.1f}")

    # Main effects
    print("\n" + "=" * 70)
    print("  MAIN EFFECTS (raw score)")
    print("=" * 70)

    spec_all = sq + so
    nospec_all = qo + ba
    ci_spec = bootstrap_ci(spec_all, nospec_all)
    g_spec = hedges_g(spec_all, nospec_all)
    print(f"\n  Specificity main effect:")
    print(f"    Specific: {sum(spec_all)/len(spec_all):.1f}  Not: {sum(nospec_all)/len(nospec_all):.1f}")
    print(f"    d={ci_spec['d']:.3f} [{ci_spec['ci_lower']:.3f}, {ci_spec['ci_upper']:.3f}]  g={g_spec['g']:.3f}")

    qual_all = sq + qo
    noqual_all = so + ba
    ci_qual = bootstrap_ci(qual_all, noqual_all)
    g_qual = hedges_g(qual_all, noqual_all)
    print(f"\n  Quality demands main effect:")
    print(f"    Quality: {sum(qual_all)/len(qual_all):.1f}  Not: {sum(noqual_all)/len(noqual_all):.1f}")
    print(f"    d={ci_qual['d']:.3f} [{ci_qual['ci_lower']:.3f}, {ci_qual['ci_upper']:.3f}]  g={g_qual['g']:.3f}")

    # Same for density
    print("\n" + "=" * 70)
    print("  MAIN EFFECTS (density per 1Kw)")
    print("=" * 70)

    spec_d = sq_d + so_d
    nospec_d = qo_d + ba_d
    ci_spec_d = bootstrap_ci(spec_d, nospec_d)
    g_spec_d = hedges_g(spec_d, nospec_d)
    print(f"\n  Specificity main (density):")
    print(f"    Specific: {sum(spec_d)/len(spec_d):.1f}  Not: {sum(nospec_d)/len(nospec_d):.1f}")
    print(f"    d={ci_spec_d['d']:.3f} [{ci_spec_d['ci_lower']:.3f}, {ci_spec_d['ci_upper']:.3f}]  g={g_spec_d['g']:.3f}")

    qual_d = sq_d + qo_d
    noqual_d = so_d + ba_d
    ci_qual_d = bootstrap_ci(qual_d, noqual_d)
    g_qual_d = hedges_g(qual_d, noqual_d)
    print(f"\n  Quality demands main (density):")
    print(f"    Quality: {sum(qual_d)/len(qual_d):.1f}  Not: {sum(noqual_d)/len(noqual_d):.1f}")
    print(f"    d={ci_qual_d['d']:.3f} [{ci_qual_d['ci_lower']:.3f}, {ci_qual_d['ci_upper']:.3f}]  g={g_qual_d['g']:.3f}")

    # Key pairwise
    print("\n" + "=" * 70)
    print("  KEY PAIRWISE COMPARISONS")
    print("=" * 70)

    pairs = [
        ("SPEC_ONLY vs BARE", so, ba, "Specificity alone (no quality demands)"),
        ("QUAL_ONLY vs BARE", qo, ba, "Quality demands alone (no specificity)"),
        ("SPEC_QUAL vs QUAL_ONLY", sq, qo, "Adding specificity to quality demands"),
        ("SPEC_QUAL vs SPEC_ONLY", sq, so, "Adding quality demands to specificity"),
    ]

    for label, a, b, desc in pairs:
        ci = bootstrap_ci(a, b)
        g = hedges_g(a, b)
        print(f"\n  {label}: ({desc})")
        print(f"    {sum(a)/len(a):.1f} vs {sum(b)/len(b):.1f}")
        print(f"    d={ci['d']:.3f} [{ci['ci_lower']:.3f}, {ci['ci_upper']:.3f}]  g={g['g']:.3f}")

    # Same at density
    pairs_d = [
        ("SPEC_ONLY vs BARE (density)", so_d, ba_d, "Specificity alone at density"),
        ("QUAL_ONLY vs BARE (density)", qo_d, ba_d, "Quality demands alone at density"),
        ("SPEC_QUAL vs QUAL_ONLY (density)", sq_d, qo_d, "Adding specificity at density"),
        ("SPEC_QUAL vs SPEC_ONLY (density)", sq_d, so_d, "Adding quality demands at density"),
    ]

    print("\n  --- Density comparisons ---")
    for label, a, b, desc in pairs_d:
        ci = bootstrap_ci(a, b)
        g = hedges_g(a, b)
        print(f"\n  {label}:")
        print(f"    {sum(a)/len(a):.1f} vs {sum(b)/len(b):.1f}")
        print(f"    d={ci['d']:.3f} [{ci['ci_lower']:.3f}, {ci['ci_upper']:.3f}]  g={g['g']:.3f}")

    # Interpretation
    print("\n" + "=" * 70)
    print("  INTERPRETATION")
    print("=" * 70)

    spec_d_val = ci_spec_d['d']
    qual_d_val = ci_qual_d['d']
    spec_excl = ci_spec_d['ci_lower'] > 0 or ci_spec_d['ci_upper'] < 0
    qual_excl = ci_qual_d['ci_lower'] > 0 or ci_qual_d['ci_upper'] < 0

    print(f"\n  Specificity main (density): d={spec_d_val:.3f} {'*' if spec_excl else '(CI includes 0)'}")
    print(f"  Quality demands main (density): d={qual_d_val:.3f} {'*' if qual_excl else '(CI includes 0)'}")

    if spec_excl and not qual_excl:
        print("\n  → SPECIFICITY is the mechanism. Quality demands alone don't help.")
        print("    The vault's C1 claim (interpretation gap) survives.")
    elif qual_excl and not spec_excl:
        print("\n  → QUALITY DEMANDS are the mechanism. Specificity alone doesn't help.")
        print("    The vault's C1 claim needs revision: it's 'more instruction' not 'specificity.'")
    elif spec_excl and qual_excl:
        print("\n  → BOTH contribute independently. Specificity + quality demands = additive.")
    else:
        print("\n  → NEITHER reaches significance at density level. N=10 may be too small.")

    return {
        "main_effects": {
            "specificity_raw": {"d": ci_spec['d'], "ci": [ci_spec['ci_lower'], ci_spec['ci_upper']]},
            "quality_raw": {"d": ci_qual['d'], "ci": [ci_qual['ci_lower'], ci_qual['ci_upper']]},
            "specificity_density": {"d": ci_spec_d['d'], "ci": [ci_spec_d['ci_lower'], ci_spec_d['ci_upper']]},
            "quality_density": {"d": ci_qual_d['d'], "ci": [ci_qual_d['ci_lower'], ci_qual_d['ci_upper']]},
        }
    }


# ── Main ─────────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description="EXP-025 Clean 2×2 Decomposition")
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()

    print("=" * 70)
    print("  EXP-025 Clean Decomposition: Specificity × Quality Demands")
    print("  2×2 × 10 runs = 40 outputs, forced 500-word length")
    print("  Generator: grok-4-1-fast")
    print("=" * 70)

    if args.dry_run:
        print("\n  [DRY RUN]\n")

    # Print conditions
    print("\n  Conditions (~20 words each):")
    for k, v in CONDITIONS.items():
        wc = len(v["instruction"].split())
        print(f"    {k:10s} ({wc}w): {v['instruction'][:70]}...")

    client = get_xai_client() if not args.dry_run else None
    results = generate_all(client, dry_run=args.dry_run)

    analysis = analyze(results)

    # Save
    output = {
        "experiment": "EXP-025-clean-decomposition",
        "design": {
            "factors": "2×2: specificity (Y/N) × quality_demands (Y/N)",
            "runs_per_cell": RUNS_PER_CONDITION,
            "total_outputs": RUNS_PER_CONDITION * 4,
            "generator": GENERATOR,
            "length_control": "500 words forced",
            "instruction_length_control": "all ~20 words",
            "conditions": {k: v["instruction"] for k, v in CONDITIONS.items()},
        },
        "results": {k: [{"run": e["run"], "score": e["score"]} for e in v]
                    for k, v in results.items()},
        "raw_outputs": {k: [{"run": e["run"], "score": e["score"], "text": e["text"]}
                            for e in v] for k, v in results.items()},
        "analysis": analysis,
        "timestamp": datetime.now(timezone.utc).isoformat(),
    }

    output_path = DATA_DIR / "rerun_025_clean_results.json"
    with open(output_path, "w") as f:
        json.dump(output, f, indent=2, default=str)

    print(f"\n  Saved: {output_path}")


if __name__ == "__main__":
    main()