{
  "version": 3,
  "study": "Track A: expanded corpus (n=28) + v3 signal-level changes",
  "corpus_size": 28,
  "per_stratum_n": {
    "A": 8,
    "B": 10,
    "C": 10
  },
  "aggregate": {
    "macro_f1_v1_vs_maj_union": 0.167,
    "macro_f1_v3_vs_maj_union": 0.36,
    "macro_f1_v3_vs_curator": 0.391,
    "macro_recall_v3_vs_maj_union": 0.271,
    "delta_macro_f1": 0.193,
    "per_stratum_v1_f1": {
      "A": 0.317,
      "B": 0.171,
      "C": 0.09
    },
    "per_stratum_v3_f1": {
      "A": 0.33,
      "B": 0.326,
      "C": 0.487
    }
  },
  "per_frame": {
    "FVS-001": {
      "n_A_exhibits": 1,
      "n_B_exhibits": 0,
      "n_v1_exhibits": 8,
      "n_v3_exhibits": 0,
      "n_majority_union": 1,
      "f1_v1_vs_maj_u": 0.0,
      "f1_v3_vs_maj_u": 0.0,
      "f1_v3_vs_A": 0.0,
      "precision_v3": 0.0,
      "recall_v3": 0.0,
      "delta_f1": 0.0,
      "tp_fp_fn_tn_v3": [
        0,
        0,
        1,
        27
      ]
    },
    "FVS-002": {
      "n_A_exhibits": 1,
      "n_B_exhibits": 20,
      "n_v1_exhibits": 4,
      "n_v3_exhibits": 4,
      "n_majority_union": 20,
      "f1_v1_vs_maj_u": 0.25,
      "f1_v3_vs_maj_u": 0.25,
      "f1_v3_vs_A": 0.4,
      "precision_v3": 0.75,
      "recall_v3": 0.15,
      "delta_f1": 0.0,
      "tp_fp_fn_tn_v3": [
        3,
        1,
        17,
        7
      ]
    },
    "FVS-007": {
      "n_A_exhibits": 2,
      "n_B_exhibits": 0,
      "n_v1_exhibits": 10,
      "n_v3_exhibits": 3,
      "n_majority_union": 2,
      "f1_v1_vs_maj_u": 0.0,
      "f1_v3_vs_maj_u": 0.0,
      "f1_v3_vs_A": 0.0,
      "precision_v3": 0.0,
      "recall_v3": 0.0,
      "delta_f1": 0.0,
      "tp_fp_fn_tn_v3": [
        0,
        3,
        2,
        23
      ]
    },
    "FVS-008": {
      "n_A_exhibits": 3,
      "n_B_exhibits": 7,
      "n_v1_exhibits": 9,
      "n_v3_exhibits": 3,
      "n_majority_union": 7,
      "f1_v1_vs_maj_u": 0.25,
      "f1_v3_vs_maj_u": 0.4,
      "f1_v3_vs_A": 0.333,
      "precision_v3": 0.667,
      "recall_v3": 0.286,
      "delta_f1": 0.15,
      "tp_fp_fn_tn_v3": [
        2,
        1,
        5,
        20
      ]
    },
    "FVS-009": {
      "n_A_exhibits": 6,
      "n_B_exhibits": 16,
      "n_v1_exhibits": 3,
      "n_v3_exhibits": 10,
      "n_majority_union": 16,
      "f1_v1_vs_maj_u": 0.211,
      "f1_v3_vs_maj_u": 0.615,
      "f1_v3_vs_A": 0.5,
      "precision_v3": 0.8,
      "recall_v3": 0.5,
      "delta_f1": 0.405,
      "tp_fp_fn_tn_v3": [
        8,
        2,
        8,
        10
      ]
    },
    "FVS-010": {
      "n_A_exhibits": 2,
      "n_B_exhibits": 22,
      "n_v1_exhibits": 5,
      "n_v3_exhibits": 5,
      "n_majority_union": 22,
      "f1_v1_vs_maj_u": 0.296,
      "f1_v3_vs_maj_u": 0.296,
      "f1_v3_vs_A": 0.286,
      "precision_v3": 0.8,
      "recall_v3": 0.182,
      "delta_f1": 0.0,
      "tp_fp_fn_tn_v3": [
        4,
        1,
        18,
        5
      ]
    },
    "FVS-011": {
      "n_A_exhibits": 6,
      "n_B_exhibits": 7,
      "n_v1_exhibits": 9,
      "n_v3_exhibits": 7,
      "n_majority_union": 10,
      "f1_v1_vs_maj_u": 0.632,
      "f1_v3_vs_maj_u": 0.588,
      "f1_v3_vs_A": 0.462,
      "precision_v3": 0.714,
      "recall_v3": 0.5,
      "delta_f1": -0.043,
      "tp_fp_fn_tn_v3": [
        5,
        2,
        5,
        16
      ]
    },
    "FVS-012": {
      "n_A_exhibits": 6,
      "n_B_exhibits": 17,
      "n_v1_exhibits": 2,
      "n_v3_exhibits": 8,
      "n_majority_union": 18,
      "f1_v1_vs_maj_u": 0.2,
      "f1_v3_vs_maj_u": 0.538,
      "f1_v3_vs_A": 0.571,
      "precision_v3": 0.875,
      "recall_v3": 0.389,
      "delta_f1": 0.338,
      "tp_fp_fn_tn_v3": [
        7,
        1,
        11,
        9
      ]
    },
    "FVS-014": {
      "n_A_exhibits": 7,
      "n_B_exhibits": 22,
      "n_v1_exhibits": 0,
      "n_v3_exhibits": 5,
      "n_majority_union": 22,
      "f1_v1_vs_maj_u": 0.0,
      "f1_v3_vs_maj_u": 0.37,
      "f1_v3_vs_A": 0.833,
      "precision_v3": 1.0,
      "recall_v3": 0.227,
      "delta_f1": 0.37,
      "tp_fp_fn_tn_v3": [
        5,
        0,
        17,
        6
      ]
    },
    "FVS-015": {
      "n_A_exhibits": 2,
      "n_B_exhibits": 1,
      "n_v1_exhibits": 3,
      "n_v3_exhibits": 2,
      "n_majority_union": 2,
      "f1_v1_vs_maj_u": 0.0,
      "f1_v3_vs_maj_u": 0.5,
      "f1_v3_vs_A": 0.5,
      "precision_v3": 0.5,
      "recall_v3": 0.5,
      "delta_f1": 0.5,
      "tp_fp_fn_tn_v3": [
        1,
        1,
        1,
        25
      ]
    },
    "FVS-016": {
      "n_A_exhibits": 15,
      "n_B_exhibits": 6,
      "n_v1_exhibits": 0,
      "n_v3_exhibits": 4,
      "n_majority_union": 16,
      "f1_v1_vs_maj_u": 0.0,
      "f1_v3_vs_maj_u": 0.4,
      "f1_v3_vs_A": 0.421,
      "precision_v3": 1.0,
      "recall_v3": 0.25,
      "delta_f1": 0.4,
      "tp_fp_fn_tn_v3": [
        4,
        0,
        12,
        12
      ]
    }
  },
  "per_document": {
    "a01_altman_intelligence_age": {
      "stratum": "A",
      "A_labels": [
        "FVS-001",
        "FVS-002",
        "FVS-007",
        "FVS-008",
        "FVS-010",
        "FVS-014"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-010",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-002"
      ],
      "v3_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-014"
      ],
      "f1_v1": 0.25,
      "f1_v3": 0.6
    },
    "a02_fomc_march_2026": {
      "stratum": "A",
      "A_labels": [
        "FVS-009",
        "FVS-012"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-009",
        "FVS-012"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-012"
      ],
      "f1_v1": 0.571,
      "f1_v3": 0.571
    },
    "b01_nvidia_investment": {
      "stratum": "B",
      "A_labels": [
        "FVS-008",
        "FVS-009",
        "FVS-012"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-012"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.444
    },
    "b02_automation_policy": {
      "stratum": "B",
      "A_labels": [
        "FVS-010",
        "FVS-011"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-011",
        "FVS-012"
      ],
      "v1_labels": [
        "FVS-009",
        "FVS-010",
        "FVS-011",
        "FVS-012"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-010",
        "FVS-011",
        "FVS-012",
        "FVS-015"
      ],
      "f1_v1": 0.889,
      "f1_v3": 0.8
    },
    "b03_social_media_adolescents": {
      "stratum": "B",
      "A_labels": [
        "FVS-012",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "v1_labels": [
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-011",
        "FVS-012"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.444
    },
    "b04_llm_customer_support": {
      "stratum": "B",
      "A_labels": [
        "FVS-011",
        "FVS-015"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-009",
        "FVS-010",
        "FVS-015"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-011"
      ],
      "f1_v1": 0.25,
      "f1_v3": 0.5
    },
    "b05_remote_work_productivity": {
      "stratum": "B",
      "A_labels": [
        "FVS-011"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-010"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-007",
        "FVS-008",
        "FVS-011"
      ],
      "v3_labels": [],
      "f1_v1": 0.286,
      "f1_v3": 0.0
    },
    "b06_quantum_computing_outlook": {
      "stratum": "B",
      "A_labels": [
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-015"
      ],
      "v3_labels": [
        "FVS-008",
        "FVS-012"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.25
    },
    "c01_wikipedia_semaglutide": {
      "stratum": "C",
      "A_labels": [
        "FVS-016"
      ],
      "B_labels": [
        "FVS-009",
        "FVS-012"
      ],
      "v1_labels": [
        "FVS-015"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-016"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.8
    },
    "c02_wikipedia_eu_ai_act": {
      "stratum": "C",
      "A_labels": [
        "FVS-009",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-009",
        "FVS-011",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-010",
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-010",
        "FVS-011",
        "FVS-016"
      ],
      "f1_v1": 0.333,
      "f1_v3": 0.75
    },
    "c03_wikipedia_quantum_supremacy": {
      "stratum": "C",
      "A_labels": [
        "FVS-014",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-009",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-007",
        "FVS-008"
      ],
      "v3_labels": [
        "FVS-007",
        "FVS-014",
        "FVS-016"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.571
    },
    "c04_wikipedia_ubi": {
      "stratum": "C",
      "A_labels": [
        "FVS-014",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-010",
        "FVS-011",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-007",
        "FVS-008",
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-011"
      ],
      "f1_v1": 0.286,
      "f1_v3": 0.4
    },
    "a03_pg_how_to_do_great_work": {
      "stratum": "A",
      "A_labels": [
        "FVS-007"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-010",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-010"
      ],
      "v3_labels": [
        "FVS-010"
      ],
      "f1_v1": 0.4,
      "f1_v3": 0.4
    },
    "b07_traditional_publishing": {
      "stratum": "B",
      "A_labels": [
        "FVS-011"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-015"
      ],
      "v3_labels": [],
      "f1_v1": 0.0,
      "f1_v3": 0.0
    },
    "b08_ai_medical_ethics": {
      "stratum": "B",
      "A_labels": [
        "FVS-009",
        "FVS-011"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-011",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-011"
      ],
      "f1_v1": 0.286,
      "f1_v3": 0.571
    },
    "b09_online_degrees": {
      "stratum": "B",
      "A_labels": [
        "FVS-011"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-011"
      ],
      "v1_labels": [],
      "v3_labels": [],
      "f1_v1": 0.0,
      "f1_v3": 0.0
    },
    "b10_carbon_accounting": {
      "stratum": "B",
      "A_labels": [
        "FVS-015",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-010",
        "FVS-012",
        "FVS-016"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-011"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-011",
        "FVS-015"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.25
    },
    "c05_wikipedia_nuclear_fusion": {
      "stratum": "C",
      "A_labels": [
        "FVS-014",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-007",
        "FVS-008"
      ],
      "v3_labels": [
        "FVS-014"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.667
    },
    "c06_wikipedia_inflation": {
      "stratum": "C",
      "A_labels": [
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-010",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-009",
        "FVS-010"
      ],
      "v3_labels": [
        "FVS-009",
        "FVS-010",
        "FVS-012"
      ],
      "f1_v1": 0.286,
      "f1_v3": 0.5
    },
    "c07_wikipedia_climate_mitigation": {
      "stratum": "C",
      "A_labels": [
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "v1_labels": [
        "FVS-007",
        "FVS-008"
      ],
      "v3_labels": [],
      "f1_v1": 0.0,
      "f1_v3": 0.0
    },
    "c08_wikipedia_genai": {
      "stratum": "C",
      "A_labels": [
        "FVS-014",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-008",
        "FVS-009",
        "FVS-010",
        "FVS-014"
      ],
      "v1_labels": [],
      "v3_labels": [
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.5
    },
    "c09_wikipedia_gerrymandering": {
      "stratum": "C",
      "A_labels": [
        "FVS-014",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-011",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-007"
      ],
      "v3_labels": [
        "FVS-014"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.4
    },
    "c10_wikipedia_microplastic": {
      "stratum": "C",
      "A_labels": [
        "FVS-009",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-009",
        "FVS-010",
        "FVS-012",
        "FVS-014",
        "FVS-016"
      ],
      "v1_labels": [
        "FVS-008"
      ],
      "v3_labels": [
        "FVS-012"
      ],
      "f1_v1": 0.0,
      "f1_v3": 0.286
    },
    "a06_wolfram_chatgpt": {
      "stratum": "A",
      "A_labels": [
        "FVS-012",
        "FVS-016"
      ],
      "B_labels": [
        "FVS-010",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-002",
        "FVS-010"
      ],
      "v3_labels": [
        "FVS-002",
        "FVS-010"
      ],
      "f1_v1": 0.333,
      "f1_v3": 0.333
    },
    "a08_arxiv_gpt3": {
      "stratum": "A",
      "A_labels": [
        "FVS-008"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-010",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-002",
        "FVS-007"
      ],
      "v3_labels": [
        "FVS-002",
        "FVS-007"
      ],
      "f1_v1": 0.333,
      "f1_v3": 0.333
    },
    "a09_arxiv_foundation_models": {
      "stratum": "A",
      "A_labels": [
        "FVS-009",
        "FVS-012"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-009",
        "FVS-010",
        "FVS-011",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-001",
        "FVS-002",
        "FVS-007",
        "FVS-008"
      ],
      "v3_labels": [
        "FVS-002",
        "FVS-007",
        "FVS-008"
      ],
      "f1_v1": 0.364,
      "f1_v3": 0.4
    },
    "a04_pg_cities_and_ambition": {
      "stratum": "A",
      "A_labels": [],
      "B_labels": [
        "FVS-002",
        "FVS-008",
        "FVS-010",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-007",
        "FVS-008"
      ],
      "v3_labels": [],
      "f1_v1": 0.286,
      "f1_v3": 0.0
    },
    "a05_pg_how_you_know": {
      "stratum": "A",
      "A_labels": [
        "FVS-016"
      ],
      "B_labels": [
        "FVS-002",
        "FVS-012",
        "FVS-014"
      ],
      "v1_labels": [
        "FVS-007",
        "FVS-008"
      ],
      "v3_labels": [],
      "f1_v1": 0.0,
      "f1_v3": 0.0
    }
  },
  "hypothesis_tests": {
    "H-A1_FVS-012_F1_>=_0.35": {
      "observed": 0.538,
      "threshold": 0.35,
      "verdict": "PASS"
    },
    "H-A2_FVS-016_F1_>=_0.35": {
      "observed": 0.4,
      "threshold": 0.35,
      "verdict": "PASS"
    },
    "H-A3_FVS-008_F1_>=_0.35": {
      "observed": 0.4,
      "threshold": 0.35,
      "verdict": "PASS"
    },
    "H-A4_FVS-015_F1_>=_0.35": {
      "observed": 0.5,
      "threshold": 0.35,
      "verdict": "PASS"
    },
    "H-A5_macro-F1_>=_0.4": {
      "observed": 0.36,
      "threshold": 0.4,
      "verdict": "FAIL"
    },
    "H-A7_macro-recall_>=_0.5": {
      "observed": 0.271,
      "threshold": 0.5,
      "verdict": "FAIL"
    }
  }
}