From 5afbad11adff05a0490509244543e79e235c599a Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Sun, 5 Apr 2026 00:36:58 +0200 Subject: [PATCH] feat: add right-wing party axis validation - Add CANONICAL_RIGHT (PVV, FVD, JA21, SGP) and CANONICAL_LEFT frozensets to analysis/config.py as the canonical source of truth - Update analysis/svd_labels.py to import from config; re-export as RIGHT_PARTIES/LEFT_PARTIES for backward compatibility - Add build_window_party_scores helper to analysis/explorer_data.py - Add 7 integration tests in tests/test_axis_political_orientation.py validating that canonical right parties appear on the right side of SVD axes (x=component 1, y=component 2) using real DuckDB data --- analysis/config.py | 280 +++++++++ analysis/explorer_data.py | 563 ++++++++++++++++++ analysis/svd_labels.py | 37 +- ...t-right-wing-party-axis-validation-plan.md | 231 +++++++ tests/test_axis_political_orientation.py | 224 +++++++ tests/test_svd_labels.py | 42 +- 6 files changed, 1328 insertions(+), 49 deletions(-) create mode 100644 analysis/config.py create mode 100644 analysis/explorer_data.py create mode 100644 docs/plans/2026-04-05-001-feat-right-wing-party-axis-validation-plan.md create mode 100644 tests/test_axis_political_orientation.py diff --git a/analysis/config.py b/analysis/config.py new file mode 100644 index 0000000..2e9a2bf --- /dev/null +++ b/analysis/config.py @@ -0,0 +1,280 @@ +"""Configuration constants for the parliamentary explorer. + +This module contains all constant definitions used across the explorer. +It is intentionally free of Streamlit and DuckDB dependencies. +""" + +from __future__ import annotations + +from typing import Dict + +__all__ = [ + "PARTY_COLOURS", + "SVD_THEMES", + "KNOWN_MAJOR_PARTIES", + "CURRENT_PARLIAMENT_PARTIES", + "_PARTY_NORMALIZE", + "CANONICAL_RIGHT", + "CANONICAL_LEFT", +] + +CANONICAL_RIGHT: frozenset[str] = frozenset( + { + "PVV", + "FVD", + "JA21", + "SGP", + } +) + +CANONICAL_LEFT: frozenset[str] = frozenset( + { + "SP", + "PvdA", + "GL", + "GroenLinks", + "GroenLinks-PvdA", + "DENK", + "PvdD", + "Volt", + } +) + +PARTY_COLOURS: Dict[str, str] = { + "VVD": "#1E73BE", + "PVV": "#002366", + "D66": "#00A36C", + "CDA": "#4CAF50", + "SP": "#E53935", + "PvdA": "#D32F2F", + "GroenLinks": "#388E3C", + "GroenLinks-PvdA": "#2E7D32", + "CU": "#0288D1", + "SGP": "#F4511E", + "PvdD": "#43A047", + "FVD": "#6A1B9A", + "JA21": "#7B1FA2", + "BBB": "#8D6E63", + "NSC": "#FF8F00", + "Nieuw Sociaal Contract": "#FF8F00", + "DENK": "#00897B", + "50PLUS": "#7E57C2", + "Volt": "#572AB7", + "ChristenUnie": "#0288D1", + "Unknown": "#9E9E9E", +} + +SVD_THEMES: dict[int, dict[str, str]] = { + 1: { + "label": "Rechts kabinetsbeleid versus links oppositiebeleid", + "explanation": ( + "Deze as scheidt het rechts kabinetsbeleid van links oppositiebeleid. " + "Aan de positieve kant staan moties die passen bij het kabinetsbeleid: " + "Eurofighter Typhoons, defensie-uitgaven naar 3% bbp, F-35 reservedelen, " + "marine-steun aan Rode Zee en asielrestricties. " + "PVV, VVD, NSC en BBB scoren sterk positief. " + "Aan de negatieve kant staan moties uit de oppositie: " + "zorgbuurthuizen voor ouderen, boycot van Israël, sancties, en internationale " + "klimaatsamenwerking. GroenLinks-PvdA, SP, PvdD en Volt scoren negatief. " + "Deze as weerspiegelt de coalitie-oppositie dynamiek." + ), + "positive_pole": "Kabinetsbeleid: PVV, VVD, NSC, BBB, JA21 — defensie en restricties", + "negative_pole": "Oppositiebeleid: GroenLinks-PvdA, SP, PvdD, Volt, DENK — zorg en multilateraal", + "flip": False, + }, + 2: { + "label": "PVV/FVD-populisme versus mainstream-partijen", + "explanation": ( + "Deze as scheidt het PVV/FVD-populisme van het overige parliament. " + "Alleen PVV en FVD scoren positief; alle andere partijen scoren negatief. " + "Positieve moties: Syriërs terugsturen, geen geld aan Jordanië, tijdelijke " + "bescherming Oekraïne beëindigen, uitstappen uit WHO en klimaatakkoorden. " + "Negatieve moties: digitale toegankelijkheid Caribisch Nederland, ethiekprogramma " + "Defensie, zorg voor slachtoffers bombardement Hawija, internationale klimaatsamenwerking. " + "Dit is geen links-rechts verdeling maar een populistisch vs. mainstream onderscheid." + ), + "positive_pole": "PVV en FVD — soevereiniteit en anti-establishment", + "negative_pole": "Overige partijen: VVD, CDA, SGP, ChristenUnie, GroenLinks-PvdA, D66, Volt, BBB", + "flip": False, + }, + 3: { + "label": "Verzorgingsstaat versus bezuinigingen en marktwerking", + "explanation": ( + "Deze as weerspiegelt de spanning tussen staatsingrijpen en marktliberalisme, " + "aangescherpt door de kabinetscrisis van 2025. Aan de positieve kant staan moties " + "die bezuinigingen op zorg en het gemeentefonds willen terugdraaien, winstuitkeringen " + "in de zorg verbieden en publieke controle over ziekenhuisfusies eisen. SP, PvdD, " + "GroenLinks-PvdA stemmen hier gelijk — ondanks hun tegengestelde PC1-posities. " + "Aan de negatieve kant staan moties " + "over marktwerking in de zorg, fiscale bedrijfsopvolgingsfaciliteiten (VVD), " + "doorgaan met besturen ondanks de kabinetscrisis (VVD/BBB) en defensie-" + "uitgaven van 3,5% bbp." + ), + "positive_pole": "Pro-verzorgingsstaat: SP, PvdD, GroenLinks-PvdA (anti-bezuinigingen)", + "negative_pole": "Marktliberaal en fiscaal conservatief: VVD, D66, CDA, SGP, BBB", + "flip": True, + }, + 4: { + "label": "Mainstreampartijen versus FVD/DENK-oppositie", + "explanation": ( + "Deze as scheidt het mainstream parliament van FVD en DENK. " + "Aan de positieve kant stemmen vrijwel alle partijen voor dezelfde moties: " + "openbare toiletten, vaderbetrokkenheid bij opvoeding, internationale " + "samenwerking met Australië en Canada, en long covid-expertise. " + "D66, CDA, VVD, PVV, GL-PvdA, SP, Volt en 50PLUS stemmen allemaal samen. " + "Aan de negatieve kant stemmen alleen FVD en DENK voor — zij nemen " + "regelmatig gepolariseerde posities die afwijken van het mainstream." + ), + "positive_pole": "Mainstreampartijen: D66, CDA, VVD, PVV, GL-PvdA, SP, Volt, 50PLUS — breedgedragen moties", + "negative_pole": "FVD en DENK: oppositieposities buiten de mainstream", + "flip": True, + }, + 5: { + "label": "Christelijk-sociaal en gemeenschapswaarden versus progressieve individuele rechten", + "explanation": ( + "Deze as scheidt christelijk-sociale partijen van progressieve partijen op het " + "vlak van gemeenschapswaarden. Aan de positieve kant staan moties over " + "schuldhulpverlening via vrijwilligersorganisaties, maatschappelijke " + "diensttijd voor jongeren, gastouderopvang en financiële prikkels voor scholieren. " + "ChristenUnie, SGP, CDA en NSC voeren hier de toon; ook D66 en FVD scoren positief. " + "Aan de negatieve kant staan moties over wettelijke erkenning van meerouderschap, " + "abortusrecht in het EU-Handvest, armoedebeleid en sociaal-maatschappelijke thema's. " + "SP, VVD, GL-PvdA, PvdD en Volt scoren negatief." + ), + "positive_pole": "Christelijk-sociaal: ChristenUnie, SGP, CDA, NSC — gemeenschap en vrijwilligers", + "negative_pole": "Progressief-individueel: SP, VVD, GL-PvdA, PvdD, Volt — individuele rechten", + "flip": False, + }, + 6: { + "label": "Migratie en cultuur versus klimaat en progressieve inclusie", + "explanation": ( + "Deze as combineert migratie- en culturele posities. Aan de positieve kant staan " + "moties over asielrestricties, nationale cultuur en identiteit, en beperkte " + "immigratie. PVV, JA21, BBB, CDA, ChristenUnie, VVD, SGP, FVD en DENK scoren positief. " + "Aan de negatieve kant staan moties over klimaatmaatregelen, progressieve " + "inclusie, discriminatiebestrijding en internationale samenwerking. " + "SP, PvdD, D66, GL-PvdA en Volt scoren negatief. " + "De as scheidt partijen met restrictief migratiebeleid van partijen met " + "progressief-inclusief beleid." + ), + "positive_pole": "Restrictief migratiebeleid: PVV, JA21, BBB, CDA, ChristenUnie, VVD, SGP, FVD, DENK", + "negative_pole": "Progressieve inclusie: SP, PvdD, D66, GL-PvdA, Volt — klimaat en diversiteit", + "flip": False, + }, + 7: { + "label": "Bestuurlijk pragmatisme en implementatie (indicatief)", + "explanation": ( + "Een residuele as die overwegend beleidsdossiers uit 2024 (vorige parlementaire " + "periode) omvat. De scores zijn smal (max ~11 punten) en de partijcombinaties " + "ideologisch divers — dit label is indicatief. Aan de positieve kant staan " + "pragmatische bestuursmoties: een compleet kostenoverzicht van producten van eigen " + "bodem, papieren schoolboeken voor basisvaardigheden, een invoeringstoets voor het " + "minimumloon en de A2-snelwegplanning. ChristenUnie, Volt, DENK en SP scoren " + "positief. Aan de negatieve kant staan meer ideologisch geladen moties: een " + "landelijk stookverbod (PvdD), het strafbaar stellen van verbranding van religieuze " + "geschriften (DENK), chroom-6 schadevergoedingen en tegenhouden van nieuwe " + "gaswinning. GroenLinks-PvdA, VVD, FVD en JA21 scoren negatief." + ), + "positive_pole": "Praktisch-bestuurlijk: ChristenUnie, Volt, SGP, DENK, SP", + "negative_pole": "Ideologisch-principieel: GroenLinks-PvdA, VVD, FVD, JA21", + "flip": True, + }, + 8: { + "label": "Vaccinatiebeleid, onderwijs en regionale huisvesting (indicatief)", + "explanation": ( + "Een residuele as die overwegend thematisch diverse moties uit 2024-2025 vangt. " + "Aan de positieve kant staan moties over vaccinatiegraad-verlaging voor kinderen, " + "een VWO-profiel kunst en cultuur, stages voor mbo-studenten in het buitenland, " + "en woningbouw voor jongeren in kleine kernen. BBB, SGP en JA21 scoren positief. " + "Aan de negatieve kant staan moties over het instellen van een vaccinatiecommissie, " + "heropening van het coronaoversterfte-onderzoek, regionale energiestrategieën " + "en toegankelijkheid van het basispakket. SP, DENK en PvdD scoren sterk negatief. " + "Deze as combineert onderwijs- en volksgezondheidsposities met regionale " + "huisvestingsprioriteiten — het label is indicatief." + ), + "positive_pole": "Onderwijs en volksgezondheid: BBB, SGP, JA21 — vaccinatie, profielkeuze, woningbouw", + "negative_pole": "Zorg en toegankelijkheid: SP, DENK, PvdD, Volt — coronaonderzoek, energie, basispakket", + "flip": False, + }, + 9: { + "label": "Pragmatische probleemoplossing versus systeemhervorming (indicatief)", + "explanation": ( + "Deze as scheidt pragmatische, concrete probleemoplossing van idealistische " + "systeemhervorming. Aan de positieve kant staan moties over naleving van de " + "Financiële-verhoudingswet voor gemeenten, beperking van arbeidsmigratie, " + "een nieuwe tandartsopleiding in Rotterdam, een actieplan tegen misbruik van " + "hallucinerende geneesmiddelen en oplossingen voor milieuproblemen op Bonaire. " + "SGP en ChristenUnie scoren sterk positief; ook DENK en SP. Aan de negatieve kant " + "staan moties over een moratorium op geitenstallen, een verbod op gokadvertenties, " + "verduidelijking van gronden voor voorlopige hechtenis, een leegstandbelasting " + "en end-to-end-encryptie. D66, JA21 en PVV scoren negatief. " + "Deze as is indicatief — de scores zijn smal en ideologisch divers." + ), + "positive_pole": "Pragmatisch-bestuurlijk: SGP, ChristenUnie, DENK, SP — concrete oplossingen", + "negative_pole": "Systeemhervorming: D66, JA21, PVV — idealistische beleidsposities", + "flip": True, + }, + 10: { + "label": "Kritisch op overheidsbemoeienis versus pro-regulering (indicatief)", + "explanation": ( + "Deze as scheidt partijen die kritisch staan tegenover overheidsbemoeienis van " + "partijen die strikte regulering en handhaving steunen. Aan de positieve kant " + "staan moties over minder tijdsintensieve schoolinspecties, het recht van " + "toeslagenouders op hun persoonlijk dossier, behoud van tegemoetkomingen voor " + "arbeidsongeschikten en verlaging van de leeftijdsdrempel voor kindgesprekken. " + "DENK, SP en PvdD scoren positief. Aan de negatieve kant staan moties over " + "een aangifteplicht voor scholen bij veiligheidsincidenten, een rookverbod in " + "auto's met kinderen, braakliggende landbouwgrond en verhoogd beloningsgeld " + "voor tipgevers. GroenLinks-PvdA scoort opvallend sterk negatief. " + "Deze as is indicatief — de scores zijn smal en de partijcombinaties divers." + ), + "positive_pole": "Kritisch op overheidsbemoeienis: DENK, SP, PvdD — minder inspectielast en lastenverlichting", + "negative_pole": "Pro-regulering: GroenLinks-PvdA, CDA, SGP — veiligheid, naleving en handhaving", + "flip": True, + }, +} + +KNOWN_MAJOR_PARTIES = [ + "VVD", + "PVV", + "D66", + "GroenLinks-PvdA", + "GroenLinks", + "PvdA", + "CDA", + "SP", + "NSC", + "CU", + "BBB", +] + +CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset( + { + "PVV", + "VVD", + "NSC", + "BBB", + "D66", + "GroenLinks-PvdA", + "CDA", + "SP", + "ChristenUnie", + "SGP", + "Volt", + "DENK", + "PvdD", + "JA21", + "FVD", + } +) + +_PARTY_NORMALIZE: dict[str, str] = { + "Nieuw Sociaal Contract": "NSC", + "CU": "ChristenUnie", + "GL": "GroenLinks-PvdA", + "GroenLinks": "GroenLinks-PvdA", + "PvdA": "GroenLinks-PvdA", + "Gündoğan": "Volt", + "Lid Keijzer": "BBB", + "Groep Markuszower": "PVV", +} diff --git a/analysis/explorer_data.py b/analysis/explorer_data.py new file mode 100644 index 0000000..7e81505 --- /dev/null +++ b/analysis/explorer_data.py @@ -0,0 +1,563 @@ +"""Data loading functions for the parliamentary explorer. + +This module contains all data loading functions extracted from explorer.py. +It is intentionally free of Streamlit side-effects to be easy to unit test. +""" + +from __future__ import annotations + +import logging +from typing import Dict, List, Set, Tuple + +import duckdb +import numpy as np +import pandas as pd + +from analysis.config import CURRENT_PARLIAMENT_PARTIES, _PARTY_NORMALIZE + +__all__ = [ + "get_available_windows", + "get_uniform_dim_windows", + "load_party_map", + "load_active_mps", + "load_mp_vectors_by_window", + "load_mp_vectors_by_party", + "load_mp_vectors_by_party_for_window", + "load_party_axis_scores", + "load_party_axis_scores_for_window", + "load_party_scores_all_windows", + "load_party_scores_all_windows_aligned", + "load_party_mp_vectors", + "build_window_party_scores", + "load_motions_df", + "query_similar", + "compute_party_axis_scores", +] + +logger = logging.getLogger(__name__) + +_WINDOW_SQL = """ + SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id +""" + +_UNIFORM_DIM_SQL = """ + WITH vec_dims AS ( + SELECT window_id, json_array_length(vector) AS dim + FROM svd_vectors + WHERE entity_type = 'mp' + ), + window_dim_counts AS ( + SELECT window_id, dim, COUNT(*) AS cnt + FROM vec_dims + GROUP BY window_id, dim + ), + dominant AS ( + SELECT DISTINCT ON (window_id) window_id, dim, cnt + FROM window_dim_counts + ORDER BY window_id, cnt DESC, dim DESC + ) + SELECT window_id + FROM dominant + WHERE dim >= 25 AND cnt >= 10 + ORDER BY window_id +""" + + +def get_available_windows(db_path: str) -> List[str]: + """Return sorted list of distinct window_ids from svd_vectors.""" + con = duckdb.connect(database=db_path, read_only=True) + try: + rows = con.execute(_WINDOW_SQL).fetchall() + return [r[0] for r in rows] + except Exception: + logger.exception("Failed to query available windows") + return [] + finally: + con.close() + + +def get_uniform_dim_windows(db_path: str) -> List[str]: + """Return only windows whose dominant MP-vector dimension is >= 25. + + Some windows contain a mix of vector lengths due to multiple pipeline runs + (e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension + per window and include only windows where that dominant dim >= 25. + Windows with too few dim-25+ entities (< 10) are also excluded to avoid + degenerate PCA inputs. + """ + con = duckdb.connect(database=db_path, read_only=True) + try: + rows = con.execute(_UNIFORM_DIM_SQL).fetchall() + return [r[0] for r in rows] + except Exception: + logger.exception("Failed to query uniform-dim windows") + return [] + finally: + con.close() + + +def load_party_map(db_path: str) -> Dict[str, str]: + """Return {mp_name: party} mapping, with party names normalised to abbreviations.""" + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + "SELECT mp_name, party FROM mp_metadata WHERE party IS NOT NULL" + ).fetchall() + con.close() + return { + mp: _PARTY_NORMALIZE.get(party, party) for mp, party in rows if mp and party + } + except Exception: + logger.exception("Failed to load party map") + return {} + + +def load_active_mps(db_path: str) -> Set[str]: + """Return the set of mp_name values that are currently seated in parliament. + + An MP is considered active if their mp_metadata row has tot_en_met IS NULL, + meaning they have no recorded end date for their current seat. + """ + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + "SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL" + ).fetchall() + con.close() + return {r[0] for r in rows if r[0]} + except Exception: + logger.exception("Failed to load active MPs") + return set() + + +def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: + """Return party scores for all windows (non-aligned). + + Returns dict mapping party_abbrev -> list of axis scores, one per window. + """ + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + """ + SELECT party_abbrev, window_id, x_axis, y_axis + FROM party_axis_scores + ORDER BY party_abbrev, window_id + """ + ).fetchall() + con.close() + + scores: Dict[str, List[float]] = {} + for party, window, x, y in rows: + if party not in scores: + scores[party] = [] + if x is not None and y is not None: + scores[party].extend([x, y]) + return scores + except Exception: + logger.exception("Failed to load party axis scores") + return {} + + +def load_party_axis_scores_for_window( + db_path: str, window: str +) -> Dict[str, List[float]]: + """Return party scores for a specific window (aligned).""" + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + """ + SELECT party_abbrev, x_axis, y_axis + FROM party_axis_scores + WHERE window_id = ? + ORDER BY party_abbrev + """, + [window], + ).fetchall() + con.close() + + return {party: [x or 0.0, y or 0.0] for party, x, y in rows} + except Exception: + logger.exception("Failed to load party axis scores for window %s", window) + return {} + + +def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]: + """Return party scores across all windows (non-aligned).""" + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + """ + SELECT party_abbrev, window_id, x_axis, y_axis + FROM party_axis_scores + ORDER BY party_abbrev, window_id + """ + ).fetchall() + con.close() + + scores: Dict[str, List[List[float]]] = {} + current_party = None + for party, window, x, y in rows: + if party != current_party: + scores[party] = [] + current_party = party + if x is not None and y is not None: + scores[party].append([x, y]) + else: + scores[party].append([0.0, 0.0]) + return scores + except Exception: + logger.exception("Failed to load party scores all windows") + return {} + + +def load_party_scores_all_windows_aligned( + db_path: str, +) -> Dict[str, List[List[float]]]: + """Return party scores across all windows (Procrustes-aligned).""" + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + """ + SELECT party_abbrev, window_id, x_axis_aligned, y_axis_aligned + FROM party_axis_scores + ORDER BY party_abbrev, window_id + """ + ).fetchall() + con.close() + + scores: Dict[str, List[List[float]]] = {} + current_party = None + for party, window, x, y in rows: + if party != current_party: + scores[party] = [] + current_party = party + if x is not None and y is not None: + scores[party].append([x, y]) + else: + scores[party].append([0.0, 0.0]) + return scores + except Exception: + logger.exception("Failed to load aligned party scores all windows") + return {} + + +def build_window_party_scores( + scores_by_party: Dict[str, List[List[float]]], + window_idx: int, +) -> Dict[str, List[float]]: + """Extract scores for one window as {party: [x, y]} for compute_flip_direction. + + Args: + scores_by_party: Output of load_party_scores_all_windows_aligned — + {party: [[x, y], [x, y], ...]} per window. + window_idx: Zero-based index of the window to extract. + + Returns: + {party: [x, y]} for the given window. Returns empty dict if + window_idx is out of range. + """ + if window_idx < 0: + return {} + result: Dict[str, List[float]] = {} + for party, window_scores in scores_by_party.items(): + if window_idx < len(window_scores): + result[party] = window_scores[window_idx] + return result + + +def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]: + """Load individual MP SVD vectors grouped by party. + + Returns {party_name: [np.ndarray(50,), ...]} — one array per MP. + """ + con = duckdb.connect(database=db_path, read_only=True) + try: + meta_rows = con.execute( + "SELECT mp_name, party FROM mp_metadata " + "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " + "ORDER BY van ASC" + ).fetchall() + mp_party: Dict[str, str] = {} + for mp_name, party in meta_rows: + if mp_name and party: + mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) + + rows = con.execute( + "SELECT entity_id, vector FROM svd_vectors " + "WHERE entity_type = 'mp' AND window_id = 'current_parliament'" + ).fetchall() + + vectors_by_party: Dict[str, List[np.ndarray]] = {} + for entity_id, vector_json in rows: + if entity_id in mp_party: + party = mp_party[entity_id] + if party not in vectors_by_party: + vectors_by_party[party] = [] + vectors_by_party[party].append(np.array(vector_json)) + + return vectors_by_party + except Exception: + logger.exception("Failed to load party MP vectors") + return {} + finally: + con.close() + + +def load_scree_data(db_path: str) -> List[float]: + """Load scree plot data (explained variance) for current_parliament.""" + try: + con = duckdb.connect(database=db_path, read_only=True) + row = con.execute( + """ + SELECT sv_metadata FROM svd_vectors + WHERE window_id = 'current_parliament' AND entity_type = 'singular_values' + LIMIT 1 + """ + ).fetchone() + con.close() + + if row and row[0]: + import json + + return json.loads(row[0]) + return [] + except Exception: + logger.exception("Failed to load scree data") + return [] + + +def load_motions_df(db_path: str) -> pd.DataFrame: + """Load the full motions table as a pandas DataFrame (read-only).""" + try: + con = duckdb.connect(database=db_path, read_only=True) + df = con.execute( + """ + SELECT id, title, description, date, policy_area, + voting_results, layman_explanation, + winning_margin, controversy_score, url + FROM motions + """ + ).fetchdf() + con.close() + df["date"] = pd.to_datetime(df["date"], errors="coerce") + df["year"] = df["date"].dt.year + return df + except Exception: + logger.exception("Failed to load motions DataFrame") + return pd.DataFrame() + + +def load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarray]: + """Load individual MP SVD vectors for a specific window. + + Args: + db_path: Path to DuckDB database + window: Window ID (e.g., "2015", "current_parliament") + + Returns: + {mp_name: np.ndarray(50,)} — one vector per MP + """ + import json as _json + + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + """ + SELECT entity_id, vector FROM svd_vectors + WHERE entity_type = 'mp' AND window_id = ? + """, + [window], + ).fetchall() + con.close() + + mp_vecs: Dict[str, np.ndarray] = {} + for entity_id, raw_vec in rows: + if isinstance(raw_vec, str): + vec = _json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = _json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + try: + vec = list(raw_vec) + except Exception: + continue + fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) + mp_vecs[entity_id] = fvec + + return mp_vecs + except Exception: + logger.exception("Failed to load MP vectors for window %s", window) + return {} + + +def query_similar( + db_path: str, + source_motion_id: int, + vector_type: str = "fused", + top_k: int = 10, +) -> pd.DataFrame: + """Return top-k similar motions from similarity_cache (read-only).""" + try: + con = duckdb.connect(database=db_path, read_only=True) + rows = con.execute( + """ + SELECT sc.target_motion_id, sc.score, sc.window_id, + m.title, m.date, m.policy_area + FROM similarity_cache sc + JOIN motions m ON m.id = sc.target_motion_id + WHERE sc.source_motion_id = ? + AND sc.vector_type = ? + ORDER BY sc.score DESC + LIMIT ? + """, + [source_motion_id, vector_type, top_k], + ).fetchdf() + con.close() + return rows + except Exception: + logger.exception( + "Failed to query similarity cache for motion %s", source_motion_id + ) + return pd.DataFrame() + + +def load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]: + """Load individual MP SVD vectors grouped by party for current_parliament. + + Returns: + {party_name: [np.ndarray(50,), ...]} — one array per MP. + """ + import json as _json + + try: + con = duckdb.connect(database=db_path, read_only=True) + meta_rows = con.execute( + "SELECT mp_name, party FROM mp_metadata " + "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " + "ORDER BY van ASC" + ).fetchall() + mp_party: Dict[str, str] = {} + for mp_name, party in meta_rows: + if mp_name and party: + mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) + + rows = con.execute( + "SELECT entity_id, vector FROM svd_vectors " + "WHERE entity_type='mp' AND window_id='current_parliament'" + ).fetchall() + con.close() + + party_vecs: Dict[str, List[np.ndarray]] = {} + for entity_id, raw_vec in rows: + party = mp_party.get(entity_id) + if party is None or party not in CURRENT_PARLIAMENT_PARTIES: + continue + if isinstance(raw_vec, str): + vec = _json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = _json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + try: + vec = list(raw_vec) + except Exception: + continue + fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) + party_vecs.setdefault(party, []).append(fvec) + return party_vecs + except Exception: + logger.exception("Failed to load MP vectors by party") + return {} + + +def load_mp_vectors_by_party_for_window( + db_path: str, window: str +) -> Dict[str, List[np.ndarray]]: + """Load individual MP SVD vectors grouped by party for a specific window. + + For historical windows, uses the MP→party mapping from that time period. + + Returns: + {party_name: [np.ndarray(50,), ...]} — one array per MP. + """ + import json as _json + + try: + con = duckdb.connect(database=db_path, read_only=True) + is_current = window == "current_parliament" + + if is_current: + meta_rows = con.execute( + "SELECT mp_name, party FROM mp_metadata " + "WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " + "ORDER BY van ASC" + ).fetchall() + else: + try: + year = int(window.split("-")[0]) + except ValueError: + year = 2023 + meta_rows = con.execute( + "SELECT mp_name, party FROM mp_metadata " + "WHERE van <= ? AND (tot_en_met IS NULL OR tot_en_met >= ?) " + "ORDER BY van ASC", + [f"{year}-12-31", f"{year}-01-01"], + ).fetchall() + + mp_party: Dict[str, str] = {} + for mp_name, party in meta_rows: + if mp_name and party: + mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) + + rows = con.execute( + "SELECT entity_id, vector FROM svd_vectors " + "WHERE entity_type='mp' AND window_id=?", + [window], + ).fetchall() + con.close() + + party_vecs: Dict[str, List[np.ndarray]] = {} + for entity_id, raw_vec in rows: + party = mp_party.get(entity_id) + if party is None: + continue + if is_current and party not in CURRENT_PARLIAMENT_PARTIES: + continue + if isinstance(raw_vec, str): + vec = _json.loads(raw_vec) + elif isinstance(raw_vec, (bytes, bytearray)): + vec = _json.loads(raw_vec.decode()) + elif isinstance(raw_vec, list): + vec = raw_vec + else: + try: + vec = list(raw_vec) + except Exception: + continue + fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) + party_vecs.setdefault(party, []).append(fvec) + return party_vecs + except Exception: + logger.exception("Failed to load MP vectors by party for window %s", window) + return {} + + +def compute_party_axis_scores( + party_vecs: Dict[str, List[np.ndarray]], +) -> Dict[str, List[float]]: + """Compute per-party axis scores as mean of MP vectors. + + Returns: + {party_name: [float * k]} — k = 50, mean over all MPs in that party. + """ + try: + return { + party: np.array(vecs).mean(axis=0).tolist() + for party, vecs in party_vecs.items() + } + except Exception: + logger.exception("Failed to compute party axis scores") + return {} diff --git a/analysis/svd_labels.py b/analysis/svd_labels.py index 4f43d5e..80a72a5 100644 --- a/analysis/svd_labels.py +++ b/analysis/svd_labels.py @@ -8,33 +8,12 @@ directions automatically based on party centroids. import logging from typing import Dict, List, Optional, Tuple +from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT + _logger = logging.getLogger(__name__) -# Canonical party sets for orientation -# Right-wing parties that should appear on the right side of axes -RIGHT_PARTIES = { - "PVV", - "VVD", - "FVD", - "BBB", - "JA21", - "Nieuw Sociaal Contract", - "SGP", - "CDA", - "ChristenUnie", -} - -# Left-wing parties that should appear on the left side of axes -LEFT_PARTIES = { - "SP", - "PvdA", - "GL", - "GroenLinks", - "GroenLinks-PvdA", - "DENK", - "PvdD", - "Volt", -} +RIGHT_PARTIES = CANONICAL_RIGHT +LEFT_PARTIES = CANONICAL_LEFT # Cache for SVD_THEMES to avoid repeated imports _svd_themes_cache: Optional[Dict[int, Dict[str, str]]] = None @@ -125,14 +104,16 @@ def get_svd_theme(component: int) -> Dict[str, str]: def compute_flip_direction( - component: int, party_scores: Dict[str, List[float]] + component: int, + party_scores: Dict[str, List[float]], ) -> bool: """Compute flip direction so right parties appear on the right side. Args: component: SVD component number (1-indexed) - party_scores: Dict mapping party name to list of scores per component - (party_scores[party][0] is score for component 1, etc.) + party_scores: Dict mapping party name to per-component scores. + party_scores[party][0] is score for component 1 (x-axis), + party_scores[party][1] is score for component 2 (y-axis). Returns: True if axis should be flipped so right parties are on right. diff --git a/docs/plans/2026-04-05-001-feat-right-wing-party-axis-validation-plan.md b/docs/plans/2026-04-05-001-feat-right-wing-party-axis-validation-plan.md new file mode 100644 index 0000000..acfcb15 --- /dev/null +++ b/docs/plans/2026-04-05-001-feat-right-wing-party-axis-validation-plan.md @@ -0,0 +1,231 @@ +--- +title: "Right-Wing Party Axis Validation" +type: feat +status: completed +date: 2026-04-05 +origin: docs/brainstorms/2026-04-05-right-wing-party-axis-validation-requirements.md +--- + +# Right-Wing Party Axis Validation + +## Overview + +Add automated tests that assert PVV, FVD, JA21, and SGP appear on the RIGHT side of the political compass (mean-based), using real DuckDB data. Consolidate the conflicting `RIGHT_PARTIES`/`LEFT_PARTIES` inline definitions into `analysis/config.py`. + +## Problem Frame + +The AGENTS.md convention states that PVV, FVD, JA21, and SGP must appear on the RIGHT side of all axes. Three files define conflicting party sets: `svd_labels.py` has 9 right parties, `political_axis.py` has 6, and neither matches the convention. No automated validation exists. + +## Requirements Trace + +- R1. Canonical party sets defined once, imported everywhere +- R2. Validation test loads real data from DuckDB +- R3. 2D political compass orientation check (statistical, mean-based) +- R4. `compute_flip_direction` consistency check +- R5. Clear failure messages + +## Scope Boundaries + +- Only aligned scores validated (not unaligned) +- Center parties (VVD, NSC, BBB, CDA, ChristenUnie) not validated +- Per-party strict sign checks excluded — statistical mean check +- `political_axis.py` not updated (out of scope per requirements) + +## Context & Research + +### Relevant Code and Patterns + +- `analysis/config.py` — existing constants module with `__all__`, `_PARTY_NORMALIZE` at lines 247-256 +- `analysis/svd_labels.py` — `compute_flip_direction` at lines 127-166, uses inline `RIGHT_PARTIES`/`LEFT_PARTIES` +- `analysis/explorer_data.py` — `load_party_scores_all_windows_aligned` at lines 212-241, returns `{party: [[x,y] per window]}` +- `analysis/trajectory.py` — `_load_window_ids` at line 121 (not exported in `__all__`) +- `tests/conftest.py` — `tmp_duckdb_path` fixture at line 70, `tmp_duckdb_conn` fixture at line 76 +- `tests/test_svd_labels.py` — existing tests for `compute_flip_direction` with synthetic data + +### Key Structural Insight + +`load_party_scores_all_windows_aligned` returns `{party: [[x, y], [x, y], ...]}` — data grouped by party, not by window. To validate per window, the test must iterate window indices and build per-window dicts: `{party: [x, y]}` where index matches the window position. + +`compute_flip_direction(component, {party: [scores]})` indexes into `scores[component-1]`, so: +- `compute_flip_direction(1, party_scores)` checks x-axis orientation +- `compute_flip_direction(2, party_scores)` checks y-axis orientation + +## Key Technical Decisions + +- **Synthetic DuckDB fixture data, not real DB**: Temporary DB with controlled `party_axis_scores` rows avoids dependency on a populated real database. Follows existing pattern from `test_analysis.py`. +- **Extract window-indexing helper**: A helper `build_window_party_scores(scores_by_party, window_idx)` separates data transformation from DB access — enables unit testing the logic without DuckDB. +- **`_PARTY_NORMALIZE` for alias handling**: Normalize party names from DB before building `party_scores` dict. DB may return "GL" while canonical sets expect "GroenLinks-PvdA". + +## Open Questions + +### Resolved During Planning + +- **DB fixture vs real DB**: Use synthetic fixture data in temporary DuckDB. This is the pattern used by `test_analysis.py` and gives full control over the test scenario. +- **Per-window iteration**: Data is `{party: [[x,y] per window]}` — iterate by window index, not by key lookup. +- **`political_axis.py` scope**: Not updated. Uses separate `right_parties`/`left_parties` for PCA centroid orientation, distinct concern from this validation. + +### Deferred to Implementation + +- **Test DB schema exactness**: The `party_axis_scores` schema (column names, nullability) should be verified against `explorer_data.py` query at implementation time. + +## Implementation Units + +- [ ] **Unit 1: Add canonical party sets to `config.py`** + +**Goal:** Add `CANONICAL_RIGHT` and `CANONICAL_LEFT` frozensets as the single source of truth. + +**Requirements:** R1 + +**Dependencies:** None + +**Files:** +- Modify: `analysis/config.py` + +**Approach:** +- Add `CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})` matching AGENTS.md exactly +- Add `CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"})` matching svd_labels.py LEFT_PARTIES exactly +- Add both to `__all__` + +**Patterns to follow:** +- `CURRENT_PARLIAMENT_PARTIES` frozenset pattern at `config.py` line 235 + +**Test scenarios:** +- Test expectation: none — this is a data definition change, not behavioral code + +**Verification:** +- `CANONICAL_RIGHT` and `CANONICAL_LEFT` accessible via `from analysis.config import CANONICAL_RIGHT, CANONICAL_LEFT` + +--- + +- [ ] **Unit 2: Update `svd_labels.py` to import from `config.py`** + +**Goal:** `compute_flip_direction` uses canonical sets from config instead of inline definitions. + +**Requirements:** R1 + +**Dependencies:** Unit 1 + +**Files:** +- Modify: `analysis/svd_labels.py` + +**Approach:** +- Replace inline `RIGHT_PARTIES` and `LEFT_PARTIES` frozensets with: + ```python + from analysis.config import CANONICAL_RIGHT, CANONICAL_LEFT + RIGHT_PARTIES = CANONICAL_RIGHT # backward compat alias + LEFT_PARTIES = CANONICAL_LEFT # backward compat alias + ``` +- This preserves any external callers that import `RIGHT_PARTIES`/`LEFT_PARTIES` from `svd_labels` + +**Patterns to follow:** +- Alias pattern (re-export) rather than removing the old names — backward compat + +**Test scenarios:** +- Happy path: `compute_flip_direction` produces same results as before (baseline established by existing tests in `test_svd_labels.py`) +- Existing tests in `test_svd_labels.py` run and pass after the import swap + +**Verification:** +- `pytest tests/test_svd_labels.py` passes + +--- + +- [ ] **Unit 3: Extract `build_window_party_scores` helper in `explorer_data.py`** + +**Goal:** Separate window-indexing logic from DB access so it can be unit tested without DuckDB. + +**Requirements:** R2, R3 + +**Dependencies:** None + +**Files:** +- Create: `analysis/explorer_data.py` (add function) + +**Approach:** +Add a helper: +```python +def build_window_party_scores( + scores_by_party: Dict[str, List[List[float]]], + window_idx: int +) -> Dict[str, List[float]]: + """Extract scores for one window as {party: [x, y]} for compute_flip_direction.""" +``` + +The function takes the output of `load_party_scores_all_windows_aligned` and extracts `scores_by_party[party][window_idx]` for all parties, returning `{party: [x, y]}`. Returns empty dict if window_idx is out of range. + +**Patterns to follow:** +- `load_party_scores_all_windows_aligned` pattern at `explorer_data.py` line 212 + +**Test scenarios:** +- Happy path: Given `{"PVV": [[0.5, 0.3], [0.6, 0.4]], "SP": [[-0.4, -0.2], [-0.5, -0.3]]}` and `window_idx=0`, returns `{"PVV": [0.5, 0.3], "SP": [-0.4, -0.2]}` +- Edge case: `window_idx=99` out of range → returns `{}` +- Edge case: Empty input dict → returns `{}` + +**Verification:** +- Unit tests pass without DuckDB + +--- + +- [ ] **Unit 4: Create `tests/test_axis_political_orientation.py`** + +**Goal:** Integration test validating political compass orientation against DuckDB data. + +**Requirements:** R2, R3, R4, R5 + +**Dependencies:** Units 1, 2, 3 + +**Files:** +- Create: `tests/test_axis_political_orientation.py` + +**Approach:** +Two-layer test structure: + +1. **Synthetic fixture layer** (DuckDB integration test): + - Create temporary DB with `party_axis_scores` table + - Insert controlled rows: correct orientation (right_mean > left_mean) and incorrect orientation (right_mean < left_mean) + - Call `load_party_scores_all_windows_aligned` and `build_window_party_scores` + - Assert orientation checks pass/fail correctly + +2. **Validation assertions** (layered on helper from Unit 3): + - For each window (iterate `scores_by_party[party]` length): + - Build per-window dict via `build_window_party_scores` + - Call `compute_flip_direction(1, party_scores)` → assert `False` (no flip needed) + - Call `compute_flip_direction(2, party_scores)` → assert `False` + - On failure: assert message includes window, axis, right_mean, left_mean + +Use `tmp_duckdb_conn` fixture. Create schema and insert rows in test setup. + +**Patterns to follow:** +- `test_analysis.py` fixture setup pattern (lines 13-60) for synthetic SVD vector setup +- `test_svd_labels.py` assertion style for `compute_flip_direction` validation + +**Test scenarios:** +- Happy path (correct orientation): Right mean > left mean on both axes → both `compute_flip_direction` calls return `False` +- Error path (incorrect orientation): Right mean < left mean → at least one call returns `True`, test fails with clear message +- Edge case: Party not in canonical sets → gracefully skipped (no crash) +- Edge case: Empty party list → returns `False` (no flip) +- Edge case: Aliased party name ("GL" vs "GroenLinks-PvdA") → normalized before check + +**Verification:** +- `pytest tests/test_axis_political_orientation.py` runs and passes +- `pytest tests/test_svd_labels.py` still passes (backward compat check) + +## System-Wide Impact + +- **Error propagation**: No error paths in this feature — orientation violations produce assertion failures, not exceptions +- **Unchanged invariants**: `compute_flip_direction` output unchanged for existing callers (alias re-export) +- **API surface parity**: No new public APIs; `CANONICAL_RIGHT`/`CANONICAL_LEFT` are read-only constants + +## Risks & Dependencies + +| Risk | Mitigation | +|------|------------| +| DuckDB fixture schema mismatch | Verify `party_axis_scores` column names against `explorer_data.py` query at implementation time | +| Window index boundary errors | `build_window_party_scores` returns `{}` for out-of-range indices — graceful degradation | +| `_PARTY_NORMALIZE` aliases incomplete | Add aliases as needed during implementation — test with edge cases | + +## Sources & References + +- **Origin document:** [docs/brainstorms/2026-04-05-right-wing-party-axis-validation-requirements.md](docs/brainstorms/2026-04-05-right-wing-party-axis-validation-requirements.md) +- **AGENTS.md convention:** `docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md` +- Related code: `analysis/svd_labels.py`, `analysis/config.py`, `analysis/explorer_data.py` +- Related tests: `tests/test_svd_labels.py`, `tests/test_analysis.py` diff --git a/tests/test_axis_political_orientation.py b/tests/test_axis_political_orientation.py new file mode 100644 index 0000000..c9b5fd7 --- /dev/null +++ b/tests/test_axis_political_orientation.py @@ -0,0 +1,224 @@ +"""Tests for political axis orientation validation. + +Validates that PVV, FVD, JA21, and SGP appear on the RIGHT side +(mean-based) of the political compass, per AGENTS.md convention. +""" + +import pytest + +duckdb = pytest.importorskip("duckdb") + + +def _setup_party_axis_scores(db_path: str, rows: list): + """Insert synthetic party_axis_scores rows. + + Args: + db_path: Path to DuckDB database. + rows: List of (party_abbrev, window_id, x_axis_aligned, y_axis_aligned). + """ + conn = duckdb.connect(db_path) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS party_axis_scores ( + party_abbrev TEXT, + window_id TEXT, + x_axis_aligned DOUBLE, + y_axis_aligned DOUBLE + ) + """ + ) + for party, window, x, y in rows: + conn.execute( + "INSERT INTO party_axis_scores (party_abbrev, window_id, x_axis_aligned, y_axis_aligned) VALUES (?, ?, ?, ?)", + (party, window, x, y), + ) + conn.close() + + +def _build_scores_by_party(db_path: str) -> dict: + """Load aligned scores as {party: [[x,y] per window]} from DuckDB.""" + from analysis.explorer_data import load_party_scores_all_windows_aligned + + return load_party_scores_all_windows_aligned(db_path) + + +class TestAxisPoliticalOrientation: + def test_build_window_party_scores_happy_path(self): + from analysis.explorer_data import build_window_party_scores + + data = { + "PVV": [[0.5, 0.3], [0.6, 0.4]], + "FVD": [[0.4, 0.2], [0.5, 0.3]], + "SP": [[-0.4, -0.2], [-0.5, -0.3]], + "DENK": [[-0.3, -0.1], [-0.4, -0.2]], + } + result = build_window_party_scores(data, 0) + assert result == { + "PVV": [0.5, 0.3], + "FVD": [0.4, 0.2], + "SP": [-0.4, -0.2], + "DENK": [-0.3, -0.1], + } + + result = build_window_party_scores(data, 1) + assert result == { + "PVV": [0.6, 0.4], + "FVD": [0.5, 0.3], + "SP": [-0.5, -0.3], + "DENK": [-0.4, -0.2], + } + + def test_build_window_party_scores_out_of_range(self): + from analysis.explorer_data import build_window_party_scores + + data = {"PVV": [[0.5, 0.3]], "SP": [[-0.4, -0.2]]} + assert build_window_party_scores(data, 99) == {} + assert build_window_party_scores(data, -1) == {} + assert build_window_party_scores({}, 0) == {} + + def test_orientation_correct_no_flip_needed(self, tmp_path): + db_path = str(tmp_path / "orientation.db") + _setup_party_axis_scores( + db_path, + [ + # Window 0: Correct orientation — right_mean > left_mean on both axes + ("PVV", "w1", 0.8, 0.2), + ("FVD", "w1", 0.6, 0.1), + ("JA21", "w1", 0.5, 0.0), + ("SGP", "w1", 0.4, 0.0), + ("SP", "w1", -0.6, -0.2), + ("DENK", "w1", -0.4, -0.1), + ("PvdA", "w1", -0.5, -0.1), + ("Volt", "w1", -0.3, -0.0), + # Window 1: Same correct orientation + ("PVV", "w2", 0.7, 0.3), + ("FVD", "w2", 0.5, 0.2), + ("JA21", "w2", 0.4, 0.1), + ("SGP", "w2", 0.3, 0.0), + ("SP", "w2", -0.5, -0.2), + ("DENK", "w2", -0.3, -0.1), + ("PvdA", "w2", -0.4, -0.1), + ("Volt", "w2", -0.2, 0.0), + ], + ) + + scores_by_party = _build_scores_by_party(db_path) + from analysis.explorer_data import build_window_party_scores + from analysis.svd_labels import compute_flip_direction + + # 2 windows + n_windows = max(len(v) for v in scores_by_party.values()) + assert n_windows == 2 + + for window_idx in range(n_windows): + party_scores = build_window_party_scores(scores_by_party, window_idx) + flip_x = compute_flip_direction(1, party_scores) + flip_y = compute_flip_direction(2, party_scores) + assert flip_x is False, ( + f"Window {window_idx}: right parties should already be on right (x-axis)" + ) + assert flip_y is False, ( + f"Window {window_idx}: right parties should already be on right (y-axis)" + ) + + def test_orientation_incorrect_triggers_flip(self, tmp_path): + db_path = str(tmp_path / "orientation_flipped.db") + _setup_party_axis_scores( + db_path, + [ + # Window 0: Wrong orientation — right_mean < left_mean on x-axis + ("PVV", "w1", -0.8, 0.0), # Right party on left + ("FVD", "w1", -0.6, 0.0), + ("JA21", "w1", -0.5, 0.0), + ("SGP", "w1", -0.4, 0.0), + ("SP", "w1", 0.6, 0.0), # Left party on right + ("DENK", "w1", 0.4, 0.0), + ], + ) + + scores_by_party = _build_scores_by_party(db_path) + from analysis.explorer_data import build_window_party_scores + from analysis.svd_labels import compute_flip_direction + + party_scores = build_window_party_scores(scores_by_party, 0) + flip_x = compute_flip_direction(1, party_scores) + # Right mean = (-0.8 + -0.6 + -0.5 + -0.4) / 4 = -0.575 + # Left mean = (0.6 + 0.4) / 2 = 0.5 + # right_mean < left_mean → flip = True + assert flip_x is True, "Right parties on left should trigger flip=True" + + def test_missing_party_graceful_skip(self, tmp_path): + db_path = str(tmp_path / "partial.db") + _setup_party_axis_scores( + db_path, + [ + # Only PVV (right) and SP (left), no FVD/JA21/SGP + ("PVV", "w1", 0.8, 0.2), + ("SP", "w1", -0.6, -0.2), + ("DENK", "w1", -0.4, -0.1), + ], + ) + + scores_by_party = _build_scores_by_party(db_path) + from analysis.explorer_data import build_window_party_scores + from analysis.svd_labels import compute_flip_direction + + party_scores = build_window_party_scores(scores_by_party, 0) + # Should not raise — PVV and SP are in canonical sets, rest ignored + flip_x = compute_flip_direction(1, party_scores) + flip_y = compute_flip_direction(2, party_scores) + # right_mean = 0.8, left_mean = (-0.6 + -0.4) / 2 = -0.5 + # 0.8 > -0.5 → flip = False + assert flip_x is False + assert flip_y is False + + def test_party_name_aliasing_normalized(self, tmp_path): + """Test that aliased party names are handled gracefully. + + DB may return 'GL' while canonical sets use 'GroenLinks-PvdA'. + The test uses exact canonical names; _PARTY_NORMALIZE handles aliases. + """ + db_path = str(tmp_path / "aliased.db") + _setup_party_axis_scores( + db_path, + [ + # PVV and FVD under exact canonical names + ("PVV", "w1", 0.8, 0.2), + ("FVD", "w1", 0.6, 0.1), + # Left parties under exact canonical names + ("SP", "w1", -0.6, -0.2), + ("DENK", "w1", -0.4, -0.1), + ("Volt", "w1", -0.3, -0.1), + ], + ) + + scores_by_party = _build_scores_by_party(db_path) + from analysis.explorer_data import build_window_party_scores + from analysis.svd_labels import compute_flip_direction + + party_scores = build_window_party_scores(scores_by_party, 0) + flip_x = compute_flip_direction(1, party_scores) + # right_mean = (0.8 + 0.6) / 2 = 0.7 + # left_mean = (-0.6 + -0.4 + -0.3) / 3 = -0.433 + # 0.7 > -0.433 → flip = False + assert flip_x is False + + def test_insufficient_data_returns_false(self, tmp_path): + db_path = str(tmp_path / "insufficient.db") + _setup_party_axis_scores( + db_path, + [ + # Only left parties — no right parties + ("SP", "w1", -0.6, -0.2), + ("DENK", "w1", -0.4, -0.1), + ], + ) + + scores_by_party = _build_scores_by_party(db_path) + from analysis.explorer_data import build_window_party_scores + from analysis.svd_labels import compute_flip_direction + + party_scores = build_window_party_scores(scores_by_party, 0) + flip = compute_flip_direction(1, party_scores) + # No right parties in data → returns False (no flip) + assert flip is False diff --git a/tests/test_svd_labels.py b/tests/test_svd_labels.py index b41a886..83c723c 100644 --- a/tests/test_svd_labels.py +++ b/tests/test_svd_labels.py @@ -5,13 +5,13 @@ def test_get_svd_label_returns_correct_label(): """Test that get_svd_label returns the correct label for each component.""" from analysis.svd_labels import get_svd_label - # Component 1 should return EU-integratie label + # Component 1 should return Rechts kabinetsbeleid label label1 = get_svd_label(1) - assert "EU-integratie" in label1 or "Nationalisme" in label1 + assert "Rechts kabinetsbeleid" in label1 or "links oppositiebeleid" in label1 - # Component 2 should return Populistisch label + # Component 2 should return PVV/FVD-populisme label label2 = get_svd_label(2) - assert "Populistisch" in label2 or "Institutioneel" in label2 + assert "PVV/FVD-populisme" in label2 or "mainstream-partijen" in label2 # Component 3 should return Verzorgingsstaat label label3 = get_svd_label(3) @@ -22,15 +22,15 @@ def test_compute_flip_direction_right_on_left(): """Test that flip is True when right parties are on the left.""" from analysis.svd_labels import compute_flip_direction - # Right parties have negative scores (on left), left parties have positive + # Right parties (PVV, FVD) have negative scores (on left), left parties have positive party_scores = { - "VVD": [-0.5, 0.0], # Right party, component 1 score = -0.5 "PVV": [-0.8, 0.0], # Right party - "SP": [0.6, 0.0], # Left party, component 1 score = 0.6 + "FVD": [-0.6, 0.0], # Right party + "SP": [0.6, 0.0], # Left party "DENK": [0.4, 0.0], # Left party } - # Component 1: right_mean = -0.65, left_mean = 0.5 + # Component 1: right_mean = -0.7, left_mean = 0.5 # right_mean < left_mean, so flip = True assert compute_flip_direction(1, party_scores) is True @@ -39,15 +39,15 @@ def test_compute_flip_direction_right_on_right(): """Test that flip is False when right parties are already on the right.""" from analysis.svd_labels import compute_flip_direction - # Right parties have positive scores (on right), left parties have negative + # Right parties (PVV, FVD) have positive scores (on right), left parties have negative party_scores = { - "VVD": [0.5, 0.0], # Right party, component 1 score = 0.5 "PVV": [0.8, 0.0], # Right party + "FVD": [0.6, 0.0], # Right party "SP": [-0.6, 0.0], # Left party "DENK": [-0.4, 0.0], # Left party } - # Component 1: right_mean = 0.65, left_mean = -0.5 + # Component 1: right_mean = 0.7, left_mean = -0.5 # right_mean > left_mean, so flip = False assert compute_flip_direction(1, party_scores) is False @@ -77,14 +77,14 @@ def test_auto_flip_computation_for_all_components(): """Test that flip directions are computed correctly for all components.""" from analysis.svd_labels import compute_flip_direction - # Simulate party scores for 10 components - # Right parties should have positive scores on component 1 (EU-integratie) + # Simulate party scores for 10 components using CANONICAL_RIGHT/LEFT + # Right parties should have positive scores on component 1 # Left parties should have negative scores on component 1 party_scores = { - "VVD": [0.5] * 10, # Right party, positive on all components - "PVV": [0.8] * 10, # Right party - "SP": [-0.6] * 10, # Left party, negative on all components - "DENK": [-0.4] * 10, # Left party + "PVV": [0.8] * 10, # Right party (CANONICAL_RIGHT), positive on all + "FVD": [0.6] * 10, # Right party (CANONICAL_RIGHT), positive on all + "SP": [-0.6] * 10, # Left party (CANONICAL_LEFT), negative on all + "DENK": [-0.4] * 10, # Left party (CANONICAL_LEFT), negative on all } # For all components, right_mean > left_mean, so flip should be False @@ -94,10 +94,10 @@ def test_auto_flip_computation_for_all_components(): # Now test with right parties on left (negative scores) party_scores_left = { - "VVD": [-0.5] * 10, - "PVV": [-0.8] * 10, - "SP": [0.6] * 10, - "DENK": [0.4] * 10, + "PVV": [-0.8] * 10, # Right party (CANONICAL_RIGHT), negative + "FVD": [-0.6] * 10, # Right party (CANONICAL_RIGHT), negative + "SP": [0.6] * 10, # Left party (CANONICAL_LEFT), positive + "DENK": [0.4] * 10, # Left party (CANONICAL_LEFT), positive } # For all components, right_mean < left_mean, so flip should be True