""" EXP-025 Clean Decomposition: Specificity × Quality Demands ============================================================ Separates three confounded effects from the original d=2.34: 1. Specificity content (reference Northvane's assets vs generic) 2. Quality demands (detailed/thorough vs no quality instruction) 3. Output length (forced to 500 words) Design: 2×2 × 10 runs × 1 generator = 40 outputs All instructions ~20 words. All outputs forced to 500 words. Conditions: SPEC+QUAL: Specific + quality demands (20 words) SPEC_ONLY: Specific, no quality demands (20 words) QUAL_ONLY: Quality demands, no specificity (20 words) BARE: Minimal instruction (20 words, neutral filler) The test: SPEC+QUAL > QUAL_ONLY → specificity adds value beyond quality demands SPEC_ONLY > BARE → specificity alone works (no quality demands needed) QUAL_ONLY > BARE → quality demands alone work (no specificity needed) SPEC_ONLY ≈ SPEC+QUAL → quality demands don't add to specificity """ import json import sys import time import re import random import argparse from pathlib import Path from datetime import datetime, timezone sys.path.insert(0, str(Path(__file__).parent)) from _config import get_xai_client, call_generator, bootstrap_ci, hedges_g DATA_DIR = Path(__file__).parent # ── Conditions (all ~20 words, matched length) ────────────────────────────── CONDITIONS = { "SPEC_QUAL": { "label": "Specific + Quality demands (20 words)", "instruction": ( "Ground every point in Northvane's specific assets and current situation. " "Provide detailed and thorough analysis with clear reasoning throughout." ), "has_specificity": True, "has_quality_demands": True, }, "SPEC_ONLY": { "label": "Specific only, no quality demands (20 words)", "instruction": ( "Ground every point in Northvane's specific assets and current situation. " "Write your strategic recommendation for their next twelve months." ), "has_specificity": True, "has_quality_demands": False, }, "QUAL_ONLY": { "label": "Quality demands only, no specificity (19 words)", "instruction": ( "Provide detailed and thorough analysis with clear reasoning throughout. " "Write your strategic recommendation for the company's next twelve months." ), "has_specificity": False, "has_quality_demands": True, }, "BARE": { "label": "Neutral filler, no specificity or quality demands (19 words)", "instruction": ( "Write your strategic recommendation for the company's next twelve months. " "Cover the key strategic considerations and options for their situation." ), "has_specificity": False, "has_quality_demands": False, }, } # Verify word counts for k, v in CONDITIONS.items(): wc = len(v["instruction"].split()) assert 17 <= wc <= 22, f"{k}: {wc} words (expected ~19-20)" CONTEXT = """Northvane Analytics is a B2B data infrastructure company serving mid-market logistics firms. Key facts: - 47 employees, $8M ARR, 40% YoY growth - 12 engineers (capacity constraint) - Proprietary route optimization algorithm built on 5 years of regional shipping data - Competitors: broad-spectrum analytics platforms without logistics specialization - Strategic question: Expand to supply chain visibility or deepen core logistics offering?""" PROMPT_TEMPLATE = """{instruction} {context} Write exactly 500 words. Do not exceed 500 words.""" RUNS_PER_CONDITION = 10 GENERATOR = "grok-4-1-fast" # ── Specificity Scoring (same heuristic as EXP-025) ───────────────────────── def score_specificity(text): """6-component specificity score matching EXP-025 methodology.""" text_lower = text.lower() # M1: Company name mentions m1 = len(re.findall(r'northvane', text_lower)) # M2: Scenario numbers used scenario_nums = [r'\b47\b', r'\$?8\s*m\b|8\s*million', r'40\s*%', r'\b12\b\s*engineer', r'5[\s-]year|five[\s-]year'] m2 = sum(1 for p in scenario_nums if re.search(p, text_lower)) # M3: Market specifics market_terms = [r'regional', r'3pl|third[\s-]party\s+logistic', r'trucking', r'logistics', r'shipping', r'mid[\s-]market', r'supply\s+chain'] m3 = sum(len(re.findall(p, text_lower)) for p in market_terms) # M4: Algorithm/proprietary mentions algo_terms = [r'algorithm', r'proprietary', r'route\s+optim', r'data\s+moat', r'competitive\s+advantage'] m4 = sum(1 for p in algo_terms if re.search(p, text_lower)) # M5: Named competitors m5 = len(re.findall(r'(?:project44|fourkites|descartes|oracle|sap|flexport|convoy)', text_lower)) # M6: Specific recommendations tied to unique assets specific_recs = [r'deepen\s+(?:core|logistics)', r'expand.*?supply\s+chain', r'partner', r'api\s+(?:access|integration|layer)', r'verticali[sz]e', r'enterprise', r'upsell|cross[\s-]sell'] m6 = sum(1 for p in specific_recs if re.search(p, text_lower)) total = m1 + m2 + m3 + m4 + m5 + m6 word_count = len(text.split()) density = total / (word_count / 1000) if word_count > 0 else 0 return { "total": total, "m1_company": m1, "m2_scenario": m2, "m3_market": m3, "m4_algorithm": m4, "m5_competitors": m5, "m6_specific_recs": m6, "word_count": word_count, "density_per_1kw": round(density, 1), } # ── Generation ─────────────────────────────────────────────────────────────── def generate_all(client, dry_run=False): """Generate all 40 outputs in randomized order.""" # Build run list runs = [] for cond_key, cond in CONDITIONS.items(): for run_num in range(1, RUNS_PER_CONDITION + 1): runs.append({ "condition": cond_key, "run": run_num, "instruction": cond["instruction"], }) # Randomize random.seed(42) random.shuffle(runs) results = {k: [] for k in CONDITIONS} for i, run in enumerate(runs): prompt = PROMPT_TEMPLATE.format( instruction=run["instruction"], context=CONTEXT, ) print(f" [{i+1:2d}/{len(runs)}] {run['condition']:10s} run {run['run']:2d}: ", end="", flush=True) if dry_run: # Synthetic output with condition-dependent variation words = 500 if CONDITIONS[run["condition"]]["has_specificity"]: text = ("Northvane Analytics should deepen its core logistics offering. " * 25 + "The 12-engineer team and $8M ARR position them for regional expansion. " * 10) else: text = ("The company should focus on strategic growth opportunities. " * 25 + "A comprehensive analysis suggests several key considerations. " * 10) text = " ".join(text.split()[:words]) print(f"dry-run ({len(text.split())}w)") else: messages = [{"role": "user", "content": prompt}] text = call_generator(client, GENERATOR, messages) print(f"done ({len(text.split())}w)") time.sleep(3) score = score_specificity(text) results[run["condition"]].append({ "run": run["run"], "text": text, "score": score, }) return results # ── Analysis ───────────────────────────────────────────────────────────────── def analyze(results): """Compute all pairwise comparisons.""" print("\n" + "=" * 70) print(" RESULTS: 2×2 Specificity × Quality Demands") print("=" * 70) # Per-condition summary print("\n Condition | Spec | Qual | Score | Density | Words") print(" " + "-" * 60) for k in ["SPEC_QUAL", "SPEC_ONLY", "QUAL_ONLY", "BARE"]: entries = results[k] scores = [e["score"]["total"] for e in entries] densities = [e["score"]["density_per_1kw"] for e in entries] words = [e["score"]["word_count"] for e in entries] c = CONDITIONS[k] spec = "Y" if c["has_specificity"] else "N" qual = "Y" if c["has_quality_demands"] else "N" print(f" {k:14s} | {spec} | {qual} | {sum(scores)/len(scores):5.1f} | " f"{sum(densities)/len(densities):6.1f} | {sum(words)/len(words):.0f}") # 2×2 cell means print("\n 2×2 Design:") print(" Specific Not Specific") sq = [e["score"]["total"] for e in results["SPEC_QUAL"]] so = [e["score"]["total"] for e in results["SPEC_ONLY"]] qo = [e["score"]["total"] for e in results["QUAL_ONLY"]] ba = [e["score"]["total"] for e in results["BARE"]] print(f" Quality demands: {sum(sq)/len(sq):5.1f} {sum(qo)/len(qo):5.1f}") print(f" No quality: {sum(so)/len(so):5.1f} {sum(ba)/len(ba):5.1f}") # Same for density sq_d = [e["score"]["density_per_1kw"] for e in results["SPEC_QUAL"]] so_d = [e["score"]["density_per_1kw"] for e in results["SPEC_ONLY"]] qo_d = [e["score"]["density_per_1kw"] for e in results["QUAL_ONLY"]] ba_d = [e["score"]["density_per_1kw"] for e in results["BARE"]] print(f"\n Density (/1Kw):") print(f" Specific Not Specific") print(f" Quality demands: {sum(sq_d)/len(sq_d):5.1f} {sum(qo_d)/len(qo_d):5.1f}") print(f" No quality: {sum(so_d)/len(so_d):5.1f} {sum(ba_d)/len(ba_d):5.1f}") # Main effects print("\n" + "=" * 70) print(" MAIN EFFECTS (raw score)") print("=" * 70) spec_all = sq + so nospec_all = qo + ba ci_spec = bootstrap_ci(spec_all, nospec_all) g_spec = hedges_g(spec_all, nospec_all) print(f"\n Specificity main effect:") print(f" Specific: {sum(spec_all)/len(spec_all):.1f} Not: {sum(nospec_all)/len(nospec_all):.1f}") print(f" d={ci_spec['d']:.3f} [{ci_spec['ci_lower']:.3f}, {ci_spec['ci_upper']:.3f}] g={g_spec['g']:.3f}") qual_all = sq + qo noqual_all = so + ba ci_qual = bootstrap_ci(qual_all, noqual_all) g_qual = hedges_g(qual_all, noqual_all) print(f"\n Quality demands main effect:") print(f" Quality: {sum(qual_all)/len(qual_all):.1f} Not: {sum(noqual_all)/len(noqual_all):.1f}") print(f" d={ci_qual['d']:.3f} [{ci_qual['ci_lower']:.3f}, {ci_qual['ci_upper']:.3f}] g={g_qual['g']:.3f}") # Same for density print("\n" + "=" * 70) print(" MAIN EFFECTS (density per 1Kw)") print("=" * 70) spec_d = sq_d + so_d nospec_d = qo_d + ba_d ci_spec_d = bootstrap_ci(spec_d, nospec_d) g_spec_d = hedges_g(spec_d, nospec_d) print(f"\n Specificity main (density):") print(f" Specific: {sum(spec_d)/len(spec_d):.1f} Not: {sum(nospec_d)/len(nospec_d):.1f}") print(f" d={ci_spec_d['d']:.3f} [{ci_spec_d['ci_lower']:.3f}, {ci_spec_d['ci_upper']:.3f}] g={g_spec_d['g']:.3f}") qual_d = sq_d + qo_d noqual_d = so_d + ba_d ci_qual_d = bootstrap_ci(qual_d, noqual_d) g_qual_d = hedges_g(qual_d, noqual_d) print(f"\n Quality demands main (density):") print(f" Quality: {sum(qual_d)/len(qual_d):.1f} Not: {sum(noqual_d)/len(noqual_d):.1f}") print(f" d={ci_qual_d['d']:.3f} [{ci_qual_d['ci_lower']:.3f}, {ci_qual_d['ci_upper']:.3f}] g={g_qual_d['g']:.3f}") # Key pairwise print("\n" + "=" * 70) print(" KEY PAIRWISE COMPARISONS") print("=" * 70) pairs = [ ("SPEC_ONLY vs BARE", so, ba, "Specificity alone (no quality demands)"), ("QUAL_ONLY vs BARE", qo, ba, "Quality demands alone (no specificity)"), ("SPEC_QUAL vs QUAL_ONLY", sq, qo, "Adding specificity to quality demands"), ("SPEC_QUAL vs SPEC_ONLY", sq, so, "Adding quality demands to specificity"), ] for label, a, b, desc in pairs: ci = bootstrap_ci(a, b) g = hedges_g(a, b) print(f"\n {label}: ({desc})") print(f" {sum(a)/len(a):.1f} vs {sum(b)/len(b):.1f}") print(f" d={ci['d']:.3f} [{ci['ci_lower']:.3f}, {ci['ci_upper']:.3f}] g={g['g']:.3f}") # Same at density pairs_d = [ ("SPEC_ONLY vs BARE (density)", so_d, ba_d, "Specificity alone at density"), ("QUAL_ONLY vs BARE (density)", qo_d, ba_d, "Quality demands alone at density"), ("SPEC_QUAL vs QUAL_ONLY (density)", sq_d, qo_d, "Adding specificity at density"), ("SPEC_QUAL vs SPEC_ONLY (density)", sq_d, so_d, "Adding quality demands at density"), ] print("\n --- Density comparisons ---") for label, a, b, desc in pairs_d: ci = bootstrap_ci(a, b) g = hedges_g(a, b) print(f"\n {label}:") print(f" {sum(a)/len(a):.1f} vs {sum(b)/len(b):.1f}") print(f" d={ci['d']:.3f} [{ci['ci_lower']:.3f}, {ci['ci_upper']:.3f}] g={g['g']:.3f}") # Interpretation print("\n" + "=" * 70) print(" INTERPRETATION") print("=" * 70) spec_d_val = ci_spec_d['d'] qual_d_val = ci_qual_d['d'] spec_excl = ci_spec_d['ci_lower'] > 0 or ci_spec_d['ci_upper'] < 0 qual_excl = ci_qual_d['ci_lower'] > 0 or ci_qual_d['ci_upper'] < 0 print(f"\n Specificity main (density): d={spec_d_val:.3f} {'*' if spec_excl else '(CI includes 0)'}") print(f" Quality demands main (density): d={qual_d_val:.3f} {'*' if qual_excl else '(CI includes 0)'}") if spec_excl and not qual_excl: print("\n → SPECIFICITY is the mechanism. Quality demands alone don't help.") print(" The vault's C1 claim (interpretation gap) survives.") elif qual_excl and not spec_excl: print("\n → QUALITY DEMANDS are the mechanism. Specificity alone doesn't help.") print(" The vault's C1 claim needs revision: it's 'more instruction' not 'specificity.'") elif spec_excl and qual_excl: print("\n → BOTH contribute independently. Specificity + quality demands = additive.") else: print("\n → NEITHER reaches significance at density level. N=10 may be too small.") return { "main_effects": { "specificity_raw": {"d": ci_spec['d'], "ci": [ci_spec['ci_lower'], ci_spec['ci_upper']]}, "quality_raw": {"d": ci_qual['d'], "ci": [ci_qual['ci_lower'], ci_qual['ci_upper']]}, "specificity_density": {"d": ci_spec_d['d'], "ci": [ci_spec_d['ci_lower'], ci_spec_d['ci_upper']]}, "quality_density": {"d": ci_qual_d['d'], "ci": [ci_qual_d['ci_lower'], ci_qual_d['ci_upper']]}, } } # ── Main ───────────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="EXP-025 Clean 2×2 Decomposition") parser.add_argument("--dry-run", action="store_true") args = parser.parse_args() print("=" * 70) print(" EXP-025 Clean Decomposition: Specificity × Quality Demands") print(" 2×2 × 10 runs = 40 outputs, forced 500-word length") print(" Generator: grok-4-1-fast") print("=" * 70) if args.dry_run: print("\n [DRY RUN]\n") # Print conditions print("\n Conditions (~20 words each):") for k, v in CONDITIONS.items(): wc = len(v["instruction"].split()) print(f" {k:10s} ({wc}w): {v['instruction'][:70]}...") client = get_xai_client() if not args.dry_run else None results = generate_all(client, dry_run=args.dry_run) analysis = analyze(results) # Save output = { "experiment": "EXP-025-clean-decomposition", "design": { "factors": "2×2: specificity (Y/N) × quality_demands (Y/N)", "runs_per_cell": RUNS_PER_CONDITION, "total_outputs": RUNS_PER_CONDITION * 4, "generator": GENERATOR, "length_control": "500 words forced", "instruction_length_control": "all ~20 words", "conditions": {k: v["instruction"] for k, v in CONDITIONS.items()}, }, "results": {k: [{"run": e["run"], "score": e["score"]} for e in v] for k, v in results.items()}, "raw_outputs": {k: [{"run": e["run"], "score": e["score"], "text": e["text"]} for e in v] for k, v in results.items()}, "analysis": analysis, "timestamp": datetime.now(timezone.utc).isoformat(), } output_path = DATA_DIR / "rerun_025_clean_results.json" with open(output_path, "w") as f: json.dump(output, f, indent=2, default=str) print(f"\n Saved: {output_path}") if __name__ == "__main__": main()