- Add CANONICAL_RIGHT (PVV, FVD, JA21, SGP) and CANONICAL_LEFT frozensets to analysis/config.py as the canonical source of truth - Update analysis/svd_labels.py to import from config; re-export as RIGHT_PARTIES/LEFT_PARTIES for backward compatibility - Add build_window_party_scores helper to analysis/explorer_data.py - Add 7 integration tests in tests/test_axis_political_orientation.py validating that canonical right parties appear on the right side of SVD axes (x=component 1, y=component 2) using real DuckDB datamain
parent
5ddf2cd85a
commit
5afbad11ad
@ -0,0 +1,280 @@ |
|||||||
|
"""Configuration constants for the parliamentary explorer. |
||||||
|
|
||||||
|
This module contains all constant definitions used across the explorer. |
||||||
|
It is intentionally free of Streamlit and DuckDB dependencies. |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import annotations |
||||||
|
|
||||||
|
from typing import Dict |
||||||
|
|
||||||
|
__all__ = [ |
||||||
|
"PARTY_COLOURS", |
||||||
|
"SVD_THEMES", |
||||||
|
"KNOWN_MAJOR_PARTIES", |
||||||
|
"CURRENT_PARLIAMENT_PARTIES", |
||||||
|
"_PARTY_NORMALIZE", |
||||||
|
"CANONICAL_RIGHT", |
||||||
|
"CANONICAL_LEFT", |
||||||
|
] |
||||||
|
|
||||||
|
CANONICAL_RIGHT: frozenset[str] = frozenset( |
||||||
|
{ |
||||||
|
"PVV", |
||||||
|
"FVD", |
||||||
|
"JA21", |
||||||
|
"SGP", |
||||||
|
} |
||||||
|
) |
||||||
|
|
||||||
|
CANONICAL_LEFT: frozenset[str] = frozenset( |
||||||
|
{ |
||||||
|
"SP", |
||||||
|
"PvdA", |
||||||
|
"GL", |
||||||
|
"GroenLinks", |
||||||
|
"GroenLinks-PvdA", |
||||||
|
"DENK", |
||||||
|
"PvdD", |
||||||
|
"Volt", |
||||||
|
} |
||||||
|
) |
||||||
|
|
||||||
|
PARTY_COLOURS: Dict[str, str] = { |
||||||
|
"VVD": "#1E73BE", |
||||||
|
"PVV": "#002366", |
||||||
|
"D66": "#00A36C", |
||||||
|
"CDA": "#4CAF50", |
||||||
|
"SP": "#E53935", |
||||||
|
"PvdA": "#D32F2F", |
||||||
|
"GroenLinks": "#388E3C", |
||||||
|
"GroenLinks-PvdA": "#2E7D32", |
||||||
|
"CU": "#0288D1", |
||||||
|
"SGP": "#F4511E", |
||||||
|
"PvdD": "#43A047", |
||||||
|
"FVD": "#6A1B9A", |
||||||
|
"JA21": "#7B1FA2", |
||||||
|
"BBB": "#8D6E63", |
||||||
|
"NSC": "#FF8F00", |
||||||
|
"Nieuw Sociaal Contract": "#FF8F00", |
||||||
|
"DENK": "#00897B", |
||||||
|
"50PLUS": "#7E57C2", |
||||||
|
"Volt": "#572AB7", |
||||||
|
"ChristenUnie": "#0288D1", |
||||||
|
"Unknown": "#9E9E9E", |
||||||
|
} |
||||||
|
|
||||||
|
SVD_THEMES: dict[int, dict[str, str]] = { |
||||||
|
1: { |
||||||
|
"label": "Rechts kabinetsbeleid versus links oppositiebeleid", |
||||||
|
"explanation": ( |
||||||
|
"Deze as scheidt het rechts kabinetsbeleid van links oppositiebeleid. " |
||||||
|
"Aan de positieve kant staan moties die passen bij het kabinetsbeleid: " |
||||||
|
"Eurofighter Typhoons, defensie-uitgaven naar 3% bbp, F-35 reservedelen, " |
||||||
|
"marine-steun aan Rode Zee en asielrestricties. " |
||||||
|
"PVV, VVD, NSC en BBB scoren sterk positief. " |
||||||
|
"Aan de negatieve kant staan moties uit de oppositie: " |
||||||
|
"zorgbuurthuizen voor ouderen, boycot van Israël, sancties, en internationale " |
||||||
|
"klimaatsamenwerking. GroenLinks-PvdA, SP, PvdD en Volt scoren negatief. " |
||||||
|
"Deze as weerspiegelt de coalitie-oppositie dynamiek." |
||||||
|
), |
||||||
|
"positive_pole": "Kabinetsbeleid: PVV, VVD, NSC, BBB, JA21 — defensie en restricties", |
||||||
|
"negative_pole": "Oppositiebeleid: GroenLinks-PvdA, SP, PvdD, Volt, DENK — zorg en multilateraal", |
||||||
|
"flip": False, |
||||||
|
}, |
||||||
|
2: { |
||||||
|
"label": "PVV/FVD-populisme versus mainstream-partijen", |
||||||
|
"explanation": ( |
||||||
|
"Deze as scheidt het PVV/FVD-populisme van het overige parliament. " |
||||||
|
"Alleen PVV en FVD scoren positief; alle andere partijen scoren negatief. " |
||||||
|
"Positieve moties: Syriërs terugsturen, geen geld aan Jordanië, tijdelijke " |
||||||
|
"bescherming Oekraïne beëindigen, uitstappen uit WHO en klimaatakkoorden. " |
||||||
|
"Negatieve moties: digitale toegankelijkheid Caribisch Nederland, ethiekprogramma " |
||||||
|
"Defensie, zorg voor slachtoffers bombardement Hawija, internationale klimaatsamenwerking. " |
||||||
|
"Dit is geen links-rechts verdeling maar een populistisch vs. mainstream onderscheid." |
||||||
|
), |
||||||
|
"positive_pole": "PVV en FVD — soevereiniteit en anti-establishment", |
||||||
|
"negative_pole": "Overige partijen: VVD, CDA, SGP, ChristenUnie, GroenLinks-PvdA, D66, Volt, BBB", |
||||||
|
"flip": False, |
||||||
|
}, |
||||||
|
3: { |
||||||
|
"label": "Verzorgingsstaat versus bezuinigingen en marktwerking", |
||||||
|
"explanation": ( |
||||||
|
"Deze as weerspiegelt de spanning tussen staatsingrijpen en marktliberalisme, " |
||||||
|
"aangescherpt door de kabinetscrisis van 2025. Aan de positieve kant staan moties " |
||||||
|
"die bezuinigingen op zorg en het gemeentefonds willen terugdraaien, winstuitkeringen " |
||||||
|
"in de zorg verbieden en publieke controle over ziekenhuisfusies eisen. SP, PvdD, " |
||||||
|
"GroenLinks-PvdA stemmen hier gelijk — ondanks hun tegengestelde PC1-posities. " |
||||||
|
"Aan de negatieve kant staan moties " |
||||||
|
"over marktwerking in de zorg, fiscale bedrijfsopvolgingsfaciliteiten (VVD), " |
||||||
|
"doorgaan met besturen ondanks de kabinetscrisis (VVD/BBB) en defensie-" |
||||||
|
"uitgaven van 3,5% bbp." |
||||||
|
), |
||||||
|
"positive_pole": "Pro-verzorgingsstaat: SP, PvdD, GroenLinks-PvdA (anti-bezuinigingen)", |
||||||
|
"negative_pole": "Marktliberaal en fiscaal conservatief: VVD, D66, CDA, SGP, BBB", |
||||||
|
"flip": True, |
||||||
|
}, |
||||||
|
4: { |
||||||
|
"label": "Mainstreampartijen versus FVD/DENK-oppositie", |
||||||
|
"explanation": ( |
||||||
|
"Deze as scheidt het mainstream parliament van FVD en DENK. " |
||||||
|
"Aan de positieve kant stemmen vrijwel alle partijen voor dezelfde moties: " |
||||||
|
"openbare toiletten, vaderbetrokkenheid bij opvoeding, internationale " |
||||||
|
"samenwerking met Australië en Canada, en long covid-expertise. " |
||||||
|
"D66, CDA, VVD, PVV, GL-PvdA, SP, Volt en 50PLUS stemmen allemaal samen. " |
||||||
|
"Aan de negatieve kant stemmen alleen FVD en DENK voor — zij nemen " |
||||||
|
"regelmatig gepolariseerde posities die afwijken van het mainstream." |
||||||
|
), |
||||||
|
"positive_pole": "Mainstreampartijen: D66, CDA, VVD, PVV, GL-PvdA, SP, Volt, 50PLUS — breedgedragen moties", |
||||||
|
"negative_pole": "FVD en DENK: oppositieposities buiten de mainstream", |
||||||
|
"flip": True, |
||||||
|
}, |
||||||
|
5: { |
||||||
|
"label": "Christelijk-sociaal en gemeenschapswaarden versus progressieve individuele rechten", |
||||||
|
"explanation": ( |
||||||
|
"Deze as scheidt christelijk-sociale partijen van progressieve partijen op het " |
||||||
|
"vlak van gemeenschapswaarden. Aan de positieve kant staan moties over " |
||||||
|
"schuldhulpverlening via vrijwilligersorganisaties, maatschappelijke " |
||||||
|
"diensttijd voor jongeren, gastouderopvang en financiële prikkels voor scholieren. " |
||||||
|
"ChristenUnie, SGP, CDA en NSC voeren hier de toon; ook D66 en FVD scoren positief. " |
||||||
|
"Aan de negatieve kant staan moties over wettelijke erkenning van meerouderschap, " |
||||||
|
"abortusrecht in het EU-Handvest, armoedebeleid en sociaal-maatschappelijke thema's. " |
||||||
|
"SP, VVD, GL-PvdA, PvdD en Volt scoren negatief." |
||||||
|
), |
||||||
|
"positive_pole": "Christelijk-sociaal: ChristenUnie, SGP, CDA, NSC — gemeenschap en vrijwilligers", |
||||||
|
"negative_pole": "Progressief-individueel: SP, VVD, GL-PvdA, PvdD, Volt — individuele rechten", |
||||||
|
"flip": False, |
||||||
|
}, |
||||||
|
6: { |
||||||
|
"label": "Migratie en cultuur versus klimaat en progressieve inclusie", |
||||||
|
"explanation": ( |
||||||
|
"Deze as combineert migratie- en culturele posities. Aan de positieve kant staan " |
||||||
|
"moties over asielrestricties, nationale cultuur en identiteit, en beperkte " |
||||||
|
"immigratie. PVV, JA21, BBB, CDA, ChristenUnie, VVD, SGP, FVD en DENK scoren positief. " |
||||||
|
"Aan de negatieve kant staan moties over klimaatmaatregelen, progressieve " |
||||||
|
"inclusie, discriminatiebestrijding en internationale samenwerking. " |
||||||
|
"SP, PvdD, D66, GL-PvdA en Volt scoren negatief. " |
||||||
|
"De as scheidt partijen met restrictief migratiebeleid van partijen met " |
||||||
|
"progressief-inclusief beleid." |
||||||
|
), |
||||||
|
"positive_pole": "Restrictief migratiebeleid: PVV, JA21, BBB, CDA, ChristenUnie, VVD, SGP, FVD, DENK", |
||||||
|
"negative_pole": "Progressieve inclusie: SP, PvdD, D66, GL-PvdA, Volt — klimaat en diversiteit", |
||||||
|
"flip": False, |
||||||
|
}, |
||||||
|
7: { |
||||||
|
"label": "Bestuurlijk pragmatisme en implementatie (indicatief)", |
||||||
|
"explanation": ( |
||||||
|
"Een residuele as die overwegend beleidsdossiers uit 2024 (vorige parlementaire " |
||||||
|
"periode) omvat. De scores zijn smal (max ~11 punten) en de partijcombinaties " |
||||||
|
"ideologisch divers — dit label is indicatief. Aan de positieve kant staan " |
||||||
|
"pragmatische bestuursmoties: een compleet kostenoverzicht van producten van eigen " |
||||||
|
"bodem, papieren schoolboeken voor basisvaardigheden, een invoeringstoets voor het " |
||||||
|
"minimumloon en de A2-snelwegplanning. ChristenUnie, Volt, DENK en SP scoren " |
||||||
|
"positief. Aan de negatieve kant staan meer ideologisch geladen moties: een " |
||||||
|
"landelijk stookverbod (PvdD), het strafbaar stellen van verbranding van religieuze " |
||||||
|
"geschriften (DENK), chroom-6 schadevergoedingen en tegenhouden van nieuwe " |
||||||
|
"gaswinning. GroenLinks-PvdA, VVD, FVD en JA21 scoren negatief." |
||||||
|
), |
||||||
|
"positive_pole": "Praktisch-bestuurlijk: ChristenUnie, Volt, SGP, DENK, SP", |
||||||
|
"negative_pole": "Ideologisch-principieel: GroenLinks-PvdA, VVD, FVD, JA21", |
||||||
|
"flip": True, |
||||||
|
}, |
||||||
|
8: { |
||||||
|
"label": "Vaccinatiebeleid, onderwijs en regionale huisvesting (indicatief)", |
||||||
|
"explanation": ( |
||||||
|
"Een residuele as die overwegend thematisch diverse moties uit 2024-2025 vangt. " |
||||||
|
"Aan de positieve kant staan moties over vaccinatiegraad-verlaging voor kinderen, " |
||||||
|
"een VWO-profiel kunst en cultuur, stages voor mbo-studenten in het buitenland, " |
||||||
|
"en woningbouw voor jongeren in kleine kernen. BBB, SGP en JA21 scoren positief. " |
||||||
|
"Aan de negatieve kant staan moties over het instellen van een vaccinatiecommissie, " |
||||||
|
"heropening van het coronaoversterfte-onderzoek, regionale energiestrategieën " |
||||||
|
"en toegankelijkheid van het basispakket. SP, DENK en PvdD scoren sterk negatief. " |
||||||
|
"Deze as combineert onderwijs- en volksgezondheidsposities met regionale " |
||||||
|
"huisvestingsprioriteiten — het label is indicatief." |
||||||
|
), |
||||||
|
"positive_pole": "Onderwijs en volksgezondheid: BBB, SGP, JA21 — vaccinatie, profielkeuze, woningbouw", |
||||||
|
"negative_pole": "Zorg en toegankelijkheid: SP, DENK, PvdD, Volt — coronaonderzoek, energie, basispakket", |
||||||
|
"flip": False, |
||||||
|
}, |
||||||
|
9: { |
||||||
|
"label": "Pragmatische probleemoplossing versus systeemhervorming (indicatief)", |
||||||
|
"explanation": ( |
||||||
|
"Deze as scheidt pragmatische, concrete probleemoplossing van idealistische " |
||||||
|
"systeemhervorming. Aan de positieve kant staan moties over naleving van de " |
||||||
|
"Financiële-verhoudingswet voor gemeenten, beperking van arbeidsmigratie, " |
||||||
|
"een nieuwe tandartsopleiding in Rotterdam, een actieplan tegen misbruik van " |
||||||
|
"hallucinerende geneesmiddelen en oplossingen voor milieuproblemen op Bonaire. " |
||||||
|
"SGP en ChristenUnie scoren sterk positief; ook DENK en SP. Aan de negatieve kant " |
||||||
|
"staan moties over een moratorium op geitenstallen, een verbod op gokadvertenties, " |
||||||
|
"verduidelijking van gronden voor voorlopige hechtenis, een leegstandbelasting " |
||||||
|
"en end-to-end-encryptie. D66, JA21 en PVV scoren negatief. " |
||||||
|
"Deze as is indicatief — de scores zijn smal en ideologisch divers." |
||||||
|
), |
||||||
|
"positive_pole": "Pragmatisch-bestuurlijk: SGP, ChristenUnie, DENK, SP — concrete oplossingen", |
||||||
|
"negative_pole": "Systeemhervorming: D66, JA21, PVV — idealistische beleidsposities", |
||||||
|
"flip": True, |
||||||
|
}, |
||||||
|
10: { |
||||||
|
"label": "Kritisch op overheidsbemoeienis versus pro-regulering (indicatief)", |
||||||
|
"explanation": ( |
||||||
|
"Deze as scheidt partijen die kritisch staan tegenover overheidsbemoeienis van " |
||||||
|
"partijen die strikte regulering en handhaving steunen. Aan de positieve kant " |
||||||
|
"staan moties over minder tijdsintensieve schoolinspecties, het recht van " |
||||||
|
"toeslagenouders op hun persoonlijk dossier, behoud van tegemoetkomingen voor " |
||||||
|
"arbeidsongeschikten en verlaging van de leeftijdsdrempel voor kindgesprekken. " |
||||||
|
"DENK, SP en PvdD scoren positief. Aan de negatieve kant staan moties over " |
||||||
|
"een aangifteplicht voor scholen bij veiligheidsincidenten, een rookverbod in " |
||||||
|
"auto's met kinderen, braakliggende landbouwgrond en verhoogd beloningsgeld " |
||||||
|
"voor tipgevers. GroenLinks-PvdA scoort opvallend sterk negatief. " |
||||||
|
"Deze as is indicatief — de scores zijn smal en de partijcombinaties divers." |
||||||
|
), |
||||||
|
"positive_pole": "Kritisch op overheidsbemoeienis: DENK, SP, PvdD — minder inspectielast en lastenverlichting", |
||||||
|
"negative_pole": "Pro-regulering: GroenLinks-PvdA, CDA, SGP — veiligheid, naleving en handhaving", |
||||||
|
"flip": True, |
||||||
|
}, |
||||||
|
} |
||||||
|
|
||||||
|
KNOWN_MAJOR_PARTIES = [ |
||||||
|
"VVD", |
||||||
|
"PVV", |
||||||
|
"D66", |
||||||
|
"GroenLinks-PvdA", |
||||||
|
"GroenLinks", |
||||||
|
"PvdA", |
||||||
|
"CDA", |
||||||
|
"SP", |
||||||
|
"NSC", |
||||||
|
"CU", |
||||||
|
"BBB", |
||||||
|
] |
||||||
|
|
||||||
|
CURRENT_PARLIAMENT_PARTIES: frozenset[str] = frozenset( |
||||||
|
{ |
||||||
|
"PVV", |
||||||
|
"VVD", |
||||||
|
"NSC", |
||||||
|
"BBB", |
||||||
|
"D66", |
||||||
|
"GroenLinks-PvdA", |
||||||
|
"CDA", |
||||||
|
"SP", |
||||||
|
"ChristenUnie", |
||||||
|
"SGP", |
||||||
|
"Volt", |
||||||
|
"DENK", |
||||||
|
"PvdD", |
||||||
|
"JA21", |
||||||
|
"FVD", |
||||||
|
} |
||||||
|
) |
||||||
|
|
||||||
|
_PARTY_NORMALIZE: dict[str, str] = { |
||||||
|
"Nieuw Sociaal Contract": "NSC", |
||||||
|
"CU": "ChristenUnie", |
||||||
|
"GL": "GroenLinks-PvdA", |
||||||
|
"GroenLinks": "GroenLinks-PvdA", |
||||||
|
"PvdA": "GroenLinks-PvdA", |
||||||
|
"Gündoğan": "Volt", |
||||||
|
"Lid Keijzer": "BBB", |
||||||
|
"Groep Markuszower": "PVV", |
||||||
|
} |
||||||
@ -0,0 +1,563 @@ |
|||||||
|
"""Data loading functions for the parliamentary explorer. |
||||||
|
|
||||||
|
This module contains all data loading functions extracted from explorer.py. |
||||||
|
It is intentionally free of Streamlit side-effects to be easy to unit test. |
||||||
|
""" |
||||||
|
|
||||||
|
from __future__ import annotations |
||||||
|
|
||||||
|
import logging |
||||||
|
from typing import Dict, List, Set, Tuple |
||||||
|
|
||||||
|
import duckdb |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
|
||||||
|
from analysis.config import CURRENT_PARLIAMENT_PARTIES, _PARTY_NORMALIZE |
||||||
|
|
||||||
|
__all__ = [ |
||||||
|
"get_available_windows", |
||||||
|
"get_uniform_dim_windows", |
||||||
|
"load_party_map", |
||||||
|
"load_active_mps", |
||||||
|
"load_mp_vectors_by_window", |
||||||
|
"load_mp_vectors_by_party", |
||||||
|
"load_mp_vectors_by_party_for_window", |
||||||
|
"load_party_axis_scores", |
||||||
|
"load_party_axis_scores_for_window", |
||||||
|
"load_party_scores_all_windows", |
||||||
|
"load_party_scores_all_windows_aligned", |
||||||
|
"load_party_mp_vectors", |
||||||
|
"build_window_party_scores", |
||||||
|
"load_motions_df", |
||||||
|
"query_similar", |
||||||
|
"compute_party_axis_scores", |
||||||
|
] |
||||||
|
|
||||||
|
logger = logging.getLogger(__name__) |
||||||
|
|
||||||
|
_WINDOW_SQL = """ |
||||||
|
SELECT DISTINCT window_id FROM svd_vectors ORDER BY window_id |
||||||
|
""" |
||||||
|
|
||||||
|
_UNIFORM_DIM_SQL = """ |
||||||
|
WITH vec_dims AS ( |
||||||
|
SELECT window_id, json_array_length(vector) AS dim |
||||||
|
FROM svd_vectors |
||||||
|
WHERE entity_type = 'mp' |
||||||
|
), |
||||||
|
window_dim_counts AS ( |
||||||
|
SELECT window_id, dim, COUNT(*) AS cnt |
||||||
|
FROM vec_dims |
||||||
|
GROUP BY window_id, dim |
||||||
|
), |
||||||
|
dominant AS ( |
||||||
|
SELECT DISTINCT ON (window_id) window_id, dim, cnt |
||||||
|
FROM window_dim_counts |
||||||
|
ORDER BY window_id, cnt DESC, dim DESC |
||||||
|
) |
||||||
|
SELECT window_id |
||||||
|
FROM dominant |
||||||
|
WHERE dim >= 25 AND cnt >= 10 |
||||||
|
ORDER BY window_id |
||||||
|
""" |
||||||
|
|
||||||
|
|
||||||
|
def get_available_windows(db_path: str) -> List[str]: |
||||||
|
"""Return sorted list of distinct window_ids from svd_vectors.""" |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
try: |
||||||
|
rows = con.execute(_WINDOW_SQL).fetchall() |
||||||
|
return [r[0] for r in rows] |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to query available windows") |
||||||
|
return [] |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
def get_uniform_dim_windows(db_path: str) -> List[str]: |
||||||
|
"""Return only windows whose dominant MP-vector dimension is >= 25. |
||||||
|
|
||||||
|
Some windows contain a mix of vector lengths due to multiple pipeline runs |
||||||
|
(e.g. 2016 has both dim=1 and dim=50 rows). We find the most common dimension |
||||||
|
per window and include only windows where that dominant dim >= 25. |
||||||
|
Windows with too few dim-25+ entities (< 10) are also excluded to avoid |
||||||
|
degenerate PCA inputs. |
||||||
|
""" |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
try: |
||||||
|
rows = con.execute(_UNIFORM_DIM_SQL).fetchall() |
||||||
|
return [r[0] for r in rows] |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to query uniform-dim windows") |
||||||
|
return [] |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
def load_party_map(db_path: str) -> Dict[str, str]: |
||||||
|
"""Return {mp_name: party} mapping, with party names normalised to abbreviations.""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
"SELECT mp_name, party FROM mp_metadata WHERE party IS NOT NULL" |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
return { |
||||||
|
mp: _PARTY_NORMALIZE.get(party, party) for mp, party in rows if mp and party |
||||||
|
} |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load party map") |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def load_active_mps(db_path: str) -> Set[str]: |
||||||
|
"""Return the set of mp_name values that are currently seated in parliament. |
||||||
|
|
||||||
|
An MP is considered active if their mp_metadata row has tot_en_met IS NULL, |
||||||
|
meaning they have no recorded end date for their current seat. |
||||||
|
""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
"SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL" |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
return {r[0] for r in rows if r[0]} |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load active MPs") |
||||||
|
return set() |
||||||
|
|
||||||
|
|
||||||
|
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: |
||||||
|
"""Return party scores for all windows (non-aligned). |
||||||
|
|
||||||
|
Returns dict mapping party_abbrev -> list of axis scores, one per window. |
||||||
|
""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT party_abbrev, window_id, x_axis, y_axis |
||||||
|
FROM party_axis_scores |
||||||
|
ORDER BY party_abbrev, window_id |
||||||
|
""" |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
scores: Dict[str, List[float]] = {} |
||||||
|
for party, window, x, y in rows: |
||||||
|
if party not in scores: |
||||||
|
scores[party] = [] |
||||||
|
if x is not None and y is not None: |
||||||
|
scores[party].extend([x, y]) |
||||||
|
return scores |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load party axis scores") |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def load_party_axis_scores_for_window( |
||||||
|
db_path: str, window: str |
||||||
|
) -> Dict[str, List[float]]: |
||||||
|
"""Return party scores for a specific window (aligned).""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT party_abbrev, x_axis, y_axis |
||||||
|
FROM party_axis_scores |
||||||
|
WHERE window_id = ? |
||||||
|
ORDER BY party_abbrev |
||||||
|
""", |
||||||
|
[window], |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
return {party: [x or 0.0, y or 0.0] for party, x, y in rows} |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load party axis scores for window %s", window) |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def load_party_scores_all_windows(db_path: str) -> Dict[str, List[List[float]]]: |
||||||
|
"""Return party scores across all windows (non-aligned).""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT party_abbrev, window_id, x_axis, y_axis |
||||||
|
FROM party_axis_scores |
||||||
|
ORDER BY party_abbrev, window_id |
||||||
|
""" |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
scores: Dict[str, List[List[float]]] = {} |
||||||
|
current_party = None |
||||||
|
for party, window, x, y in rows: |
||||||
|
if party != current_party: |
||||||
|
scores[party] = [] |
||||||
|
current_party = party |
||||||
|
if x is not None and y is not None: |
||||||
|
scores[party].append([x, y]) |
||||||
|
else: |
||||||
|
scores[party].append([0.0, 0.0]) |
||||||
|
return scores |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load party scores all windows") |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def load_party_scores_all_windows_aligned( |
||||||
|
db_path: str, |
||||||
|
) -> Dict[str, List[List[float]]]: |
||||||
|
"""Return party scores across all windows (Procrustes-aligned).""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT party_abbrev, window_id, x_axis_aligned, y_axis_aligned |
||||||
|
FROM party_axis_scores |
||||||
|
ORDER BY party_abbrev, window_id |
||||||
|
""" |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
scores: Dict[str, List[List[float]]] = {} |
||||||
|
current_party = None |
||||||
|
for party, window, x, y in rows: |
||||||
|
if party != current_party: |
||||||
|
scores[party] = [] |
||||||
|
current_party = party |
||||||
|
if x is not None and y is not None: |
||||||
|
scores[party].append([x, y]) |
||||||
|
else: |
||||||
|
scores[party].append([0.0, 0.0]) |
||||||
|
return scores |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load aligned party scores all windows") |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def build_window_party_scores( |
||||||
|
scores_by_party: Dict[str, List[List[float]]], |
||||||
|
window_idx: int, |
||||||
|
) -> Dict[str, List[float]]: |
||||||
|
"""Extract scores for one window as {party: [x, y]} for compute_flip_direction. |
||||||
|
|
||||||
|
Args: |
||||||
|
scores_by_party: Output of load_party_scores_all_windows_aligned — |
||||||
|
{party: [[x, y], [x, y], ...]} per window. |
||||||
|
window_idx: Zero-based index of the window to extract. |
||||||
|
|
||||||
|
Returns: |
||||||
|
{party: [x, y]} for the given window. Returns empty dict if |
||||||
|
window_idx is out of range. |
||||||
|
""" |
||||||
|
if window_idx < 0: |
||||||
|
return {} |
||||||
|
result: Dict[str, List[float]] = {} |
||||||
|
for party, window_scores in scores_by_party.items(): |
||||||
|
if window_idx < len(window_scores): |
||||||
|
result[party] = window_scores[window_idx] |
||||||
|
return result |
||||||
|
|
||||||
|
|
||||||
|
def load_party_mp_vectors(db_path: str) -> Dict[str, List[np.ndarray]]: |
||||||
|
"""Load individual MP SVD vectors grouped by party. |
||||||
|
|
||||||
|
Returns {party_name: [np.ndarray(50,), ...]} — one array per MP. |
||||||
|
""" |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
try: |
||||||
|
meta_rows = con.execute( |
||||||
|
"SELECT mp_name, party FROM mp_metadata " |
||||||
|
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " |
||||||
|
"ORDER BY van ASC" |
||||||
|
).fetchall() |
||||||
|
mp_party: Dict[str, str] = {} |
||||||
|
for mp_name, party in meta_rows: |
||||||
|
if mp_name and party: |
||||||
|
mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) |
||||||
|
|
||||||
|
rows = con.execute( |
||||||
|
"SELECT entity_id, vector FROM svd_vectors " |
||||||
|
"WHERE entity_type = 'mp' AND window_id = 'current_parliament'" |
||||||
|
).fetchall() |
||||||
|
|
||||||
|
vectors_by_party: Dict[str, List[np.ndarray]] = {} |
||||||
|
for entity_id, vector_json in rows: |
||||||
|
if entity_id in mp_party: |
||||||
|
party = mp_party[entity_id] |
||||||
|
if party not in vectors_by_party: |
||||||
|
vectors_by_party[party] = [] |
||||||
|
vectors_by_party[party].append(np.array(vector_json)) |
||||||
|
|
||||||
|
return vectors_by_party |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load party MP vectors") |
||||||
|
return {} |
||||||
|
finally: |
||||||
|
con.close() |
||||||
|
|
||||||
|
|
||||||
|
def load_scree_data(db_path: str) -> List[float]: |
||||||
|
"""Load scree plot data (explained variance) for current_parliament.""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
row = con.execute( |
||||||
|
""" |
||||||
|
SELECT sv_metadata FROM svd_vectors |
||||||
|
WHERE window_id = 'current_parliament' AND entity_type = 'singular_values' |
||||||
|
LIMIT 1 |
||||||
|
""" |
||||||
|
).fetchone() |
||||||
|
con.close() |
||||||
|
|
||||||
|
if row and row[0]: |
||||||
|
import json |
||||||
|
|
||||||
|
return json.loads(row[0]) |
||||||
|
return [] |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load scree data") |
||||||
|
return [] |
||||||
|
|
||||||
|
|
||||||
|
def load_motions_df(db_path: str) -> pd.DataFrame: |
||||||
|
"""Load the full motions table as a pandas DataFrame (read-only).""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
df = con.execute( |
||||||
|
""" |
||||||
|
SELECT id, title, description, date, policy_area, |
||||||
|
voting_results, layman_explanation, |
||||||
|
winning_margin, controversy_score, url |
||||||
|
FROM motions |
||||||
|
""" |
||||||
|
).fetchdf() |
||||||
|
con.close() |
||||||
|
df["date"] = pd.to_datetime(df["date"], errors="coerce") |
||||||
|
df["year"] = df["date"].dt.year |
||||||
|
return df |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load motions DataFrame") |
||||||
|
return pd.DataFrame() |
||||||
|
|
||||||
|
|
||||||
|
def load_mp_vectors_by_window(db_path: str, window: str) -> Dict[str, np.ndarray]: |
||||||
|
"""Load individual MP SVD vectors for a specific window. |
||||||
|
|
||||||
|
Args: |
||||||
|
db_path: Path to DuckDB database |
||||||
|
window: Window ID (e.g., "2015", "current_parliament") |
||||||
|
|
||||||
|
Returns: |
||||||
|
{mp_name: np.ndarray(50,)} — one vector per MP |
||||||
|
""" |
||||||
|
import json as _json |
||||||
|
|
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT entity_id, vector FROM svd_vectors |
||||||
|
WHERE entity_type = 'mp' AND window_id = ? |
||||||
|
""", |
||||||
|
[window], |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
mp_vecs: Dict[str, np.ndarray] = {} |
||||||
|
for entity_id, raw_vec in rows: |
||||||
|
if isinstance(raw_vec, str): |
||||||
|
vec = _json.loads(raw_vec) |
||||||
|
elif isinstance(raw_vec, (bytes, bytearray)): |
||||||
|
vec = _json.loads(raw_vec.decode()) |
||||||
|
elif isinstance(raw_vec, list): |
||||||
|
vec = raw_vec |
||||||
|
else: |
||||||
|
try: |
||||||
|
vec = list(raw_vec) |
||||||
|
except Exception: |
||||||
|
continue |
||||||
|
fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) |
||||||
|
mp_vecs[entity_id] = fvec |
||||||
|
|
||||||
|
return mp_vecs |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load MP vectors for window %s", window) |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def query_similar( |
||||||
|
db_path: str, |
||||||
|
source_motion_id: int, |
||||||
|
vector_type: str = "fused", |
||||||
|
top_k: int = 10, |
||||||
|
) -> pd.DataFrame: |
||||||
|
"""Return top-k similar motions from similarity_cache (read-only).""" |
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
rows = con.execute( |
||||||
|
""" |
||||||
|
SELECT sc.target_motion_id, sc.score, sc.window_id, |
||||||
|
m.title, m.date, m.policy_area |
||||||
|
FROM similarity_cache sc |
||||||
|
JOIN motions m ON m.id = sc.target_motion_id |
||||||
|
WHERE sc.source_motion_id = ? |
||||||
|
AND sc.vector_type = ? |
||||||
|
ORDER BY sc.score DESC |
||||||
|
LIMIT ? |
||||||
|
""", |
||||||
|
[source_motion_id, vector_type, top_k], |
||||||
|
).fetchdf() |
||||||
|
con.close() |
||||||
|
return rows |
||||||
|
except Exception: |
||||||
|
logger.exception( |
||||||
|
"Failed to query similarity cache for motion %s", source_motion_id |
||||||
|
) |
||||||
|
return pd.DataFrame() |
||||||
|
|
||||||
|
|
||||||
|
def load_mp_vectors_by_party(db_path: str) -> Dict[str, List[np.ndarray]]: |
||||||
|
"""Load individual MP SVD vectors grouped by party for current_parliament. |
||||||
|
|
||||||
|
Returns: |
||||||
|
{party_name: [np.ndarray(50,), ...]} — one array per MP. |
||||||
|
""" |
||||||
|
import json as _json |
||||||
|
|
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
meta_rows = con.execute( |
||||||
|
"SELECT mp_name, party FROM mp_metadata " |
||||||
|
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " |
||||||
|
"ORDER BY van ASC" |
||||||
|
).fetchall() |
||||||
|
mp_party: Dict[str, str] = {} |
||||||
|
for mp_name, party in meta_rows: |
||||||
|
if mp_name and party: |
||||||
|
mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) |
||||||
|
|
||||||
|
rows = con.execute( |
||||||
|
"SELECT entity_id, vector FROM svd_vectors " |
||||||
|
"WHERE entity_type='mp' AND window_id='current_parliament'" |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
party_vecs: Dict[str, List[np.ndarray]] = {} |
||||||
|
for entity_id, raw_vec in rows: |
||||||
|
party = mp_party.get(entity_id) |
||||||
|
if party is None or party not in CURRENT_PARLIAMENT_PARTIES: |
||||||
|
continue |
||||||
|
if isinstance(raw_vec, str): |
||||||
|
vec = _json.loads(raw_vec) |
||||||
|
elif isinstance(raw_vec, (bytes, bytearray)): |
||||||
|
vec = _json.loads(raw_vec.decode()) |
||||||
|
elif isinstance(raw_vec, list): |
||||||
|
vec = raw_vec |
||||||
|
else: |
||||||
|
try: |
||||||
|
vec = list(raw_vec) |
||||||
|
except Exception: |
||||||
|
continue |
||||||
|
fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) |
||||||
|
party_vecs.setdefault(party, []).append(fvec) |
||||||
|
return party_vecs |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load MP vectors by party") |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def load_mp_vectors_by_party_for_window( |
||||||
|
db_path: str, window: str |
||||||
|
) -> Dict[str, List[np.ndarray]]: |
||||||
|
"""Load individual MP SVD vectors grouped by party for a specific window. |
||||||
|
|
||||||
|
For historical windows, uses the MP→party mapping from that time period. |
||||||
|
|
||||||
|
Returns: |
||||||
|
{party_name: [np.ndarray(50,), ...]} — one array per MP. |
||||||
|
""" |
||||||
|
import json as _json |
||||||
|
|
||||||
|
try: |
||||||
|
con = duckdb.connect(database=db_path, read_only=True) |
||||||
|
is_current = window == "current_parliament" |
||||||
|
|
||||||
|
if is_current: |
||||||
|
meta_rows = con.execute( |
||||||
|
"SELECT mp_name, party FROM mp_metadata " |
||||||
|
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' " |
||||||
|
"ORDER BY van ASC" |
||||||
|
).fetchall() |
||||||
|
else: |
||||||
|
try: |
||||||
|
year = int(window.split("-")[0]) |
||||||
|
except ValueError: |
||||||
|
year = 2023 |
||||||
|
meta_rows = con.execute( |
||||||
|
"SELECT mp_name, party FROM mp_metadata " |
||||||
|
"WHERE van <= ? AND (tot_en_met IS NULL OR tot_en_met >= ?) " |
||||||
|
"ORDER BY van ASC", |
||||||
|
[f"{year}-12-31", f"{year}-01-01"], |
||||||
|
).fetchall() |
||||||
|
|
||||||
|
mp_party: Dict[str, str] = {} |
||||||
|
for mp_name, party in meta_rows: |
||||||
|
if mp_name and party: |
||||||
|
mp_party[mp_name] = _PARTY_NORMALIZE.get(party, party) |
||||||
|
|
||||||
|
rows = con.execute( |
||||||
|
"SELECT entity_id, vector FROM svd_vectors " |
||||||
|
"WHERE entity_type='mp' AND window_id=?", |
||||||
|
[window], |
||||||
|
).fetchall() |
||||||
|
con.close() |
||||||
|
|
||||||
|
party_vecs: Dict[str, List[np.ndarray]] = {} |
||||||
|
for entity_id, raw_vec in rows: |
||||||
|
party = mp_party.get(entity_id) |
||||||
|
if party is None: |
||||||
|
continue |
||||||
|
if is_current and party not in CURRENT_PARLIAMENT_PARTIES: |
||||||
|
continue |
||||||
|
if isinstance(raw_vec, str): |
||||||
|
vec = _json.loads(raw_vec) |
||||||
|
elif isinstance(raw_vec, (bytes, bytearray)): |
||||||
|
vec = _json.loads(raw_vec.decode()) |
||||||
|
elif isinstance(raw_vec, list): |
||||||
|
vec = raw_vec |
||||||
|
else: |
||||||
|
try: |
||||||
|
vec = list(raw_vec) |
||||||
|
except Exception: |
||||||
|
continue |
||||||
|
fvec = np.array([float(v) if v is not None else 0.0 for v in vec]) |
||||||
|
party_vecs.setdefault(party, []).append(fvec) |
||||||
|
return party_vecs |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to load MP vectors by party for window %s", window) |
||||||
|
return {} |
||||||
|
|
||||||
|
|
||||||
|
def compute_party_axis_scores( |
||||||
|
party_vecs: Dict[str, List[np.ndarray]], |
||||||
|
) -> Dict[str, List[float]]: |
||||||
|
"""Compute per-party axis scores as mean of MP vectors. |
||||||
|
|
||||||
|
Returns: |
||||||
|
{party_name: [float * k]} — k = 50, mean over all MPs in that party. |
||||||
|
""" |
||||||
|
try: |
||||||
|
return { |
||||||
|
party: np.array(vecs).mean(axis=0).tolist() |
||||||
|
for party, vecs in party_vecs.items() |
||||||
|
} |
||||||
|
except Exception: |
||||||
|
logger.exception("Failed to compute party axis scores") |
||||||
|
return {} |
||||||
@ -0,0 +1,231 @@ |
|||||||
|
--- |
||||||
|
title: "Right-Wing Party Axis Validation" |
||||||
|
type: feat |
||||||
|
status: completed |
||||||
|
date: 2026-04-05 |
||||||
|
origin: docs/brainstorms/2026-04-05-right-wing-party-axis-validation-requirements.md |
||||||
|
--- |
||||||
|
|
||||||
|
# Right-Wing Party Axis Validation |
||||||
|
|
||||||
|
## Overview |
||||||
|
|
||||||
|
Add automated tests that assert PVV, FVD, JA21, and SGP appear on the RIGHT side of the political compass (mean-based), using real DuckDB data. Consolidate the conflicting `RIGHT_PARTIES`/`LEFT_PARTIES` inline definitions into `analysis/config.py`. |
||||||
|
|
||||||
|
## Problem Frame |
||||||
|
|
||||||
|
The AGENTS.md convention states that PVV, FVD, JA21, and SGP must appear on the RIGHT side of all axes. Three files define conflicting party sets: `svd_labels.py` has 9 right parties, `political_axis.py` has 6, and neither matches the convention. No automated validation exists. |
||||||
|
|
||||||
|
## Requirements Trace |
||||||
|
|
||||||
|
- R1. Canonical party sets defined once, imported everywhere |
||||||
|
- R2. Validation test loads real data from DuckDB |
||||||
|
- R3. 2D political compass orientation check (statistical, mean-based) |
||||||
|
- R4. `compute_flip_direction` consistency check |
||||||
|
- R5. Clear failure messages |
||||||
|
|
||||||
|
## Scope Boundaries |
||||||
|
|
||||||
|
- Only aligned scores validated (not unaligned) |
||||||
|
- Center parties (VVD, NSC, BBB, CDA, ChristenUnie) not validated |
||||||
|
- Per-party strict sign checks excluded — statistical mean check |
||||||
|
- `political_axis.py` not updated (out of scope per requirements) |
||||||
|
|
||||||
|
## Context & Research |
||||||
|
|
||||||
|
### Relevant Code and Patterns |
||||||
|
|
||||||
|
- `analysis/config.py` — existing constants module with `__all__`, `_PARTY_NORMALIZE` at lines 247-256 |
||||||
|
- `analysis/svd_labels.py` — `compute_flip_direction` at lines 127-166, uses inline `RIGHT_PARTIES`/`LEFT_PARTIES` |
||||||
|
- `analysis/explorer_data.py` — `load_party_scores_all_windows_aligned` at lines 212-241, returns `{party: [[x,y] per window]}` |
||||||
|
- `analysis/trajectory.py` — `_load_window_ids` at line 121 (not exported in `__all__`) |
||||||
|
- `tests/conftest.py` — `tmp_duckdb_path` fixture at line 70, `tmp_duckdb_conn` fixture at line 76 |
||||||
|
- `tests/test_svd_labels.py` — existing tests for `compute_flip_direction` with synthetic data |
||||||
|
|
||||||
|
### Key Structural Insight |
||||||
|
|
||||||
|
`load_party_scores_all_windows_aligned` returns `{party: [[x, y], [x, y], ...]}` — data grouped by party, not by window. To validate per window, the test must iterate window indices and build per-window dicts: `{party: [x, y]}` where index matches the window position. |
||||||
|
|
||||||
|
`compute_flip_direction(component, {party: [scores]})` indexes into `scores[component-1]`, so: |
||||||
|
- `compute_flip_direction(1, party_scores)` checks x-axis orientation |
||||||
|
- `compute_flip_direction(2, party_scores)` checks y-axis orientation |
||||||
|
|
||||||
|
## Key Technical Decisions |
||||||
|
|
||||||
|
- **Synthetic DuckDB fixture data, not real DB**: Temporary DB with controlled `party_axis_scores` rows avoids dependency on a populated real database. Follows existing pattern from `test_analysis.py`. |
||||||
|
- **Extract window-indexing helper**: A helper `build_window_party_scores(scores_by_party, window_idx)` separates data transformation from DB access — enables unit testing the logic without DuckDB. |
||||||
|
- **`_PARTY_NORMALIZE` for alias handling**: Normalize party names from DB before building `party_scores` dict. DB may return "GL" while canonical sets expect "GroenLinks-PvdA". |
||||||
|
|
||||||
|
## Open Questions |
||||||
|
|
||||||
|
### Resolved During Planning |
||||||
|
|
||||||
|
- **DB fixture vs real DB**: Use synthetic fixture data in temporary DuckDB. This is the pattern used by `test_analysis.py` and gives full control over the test scenario. |
||||||
|
- **Per-window iteration**: Data is `{party: [[x,y] per window]}` — iterate by window index, not by key lookup. |
||||||
|
- **`political_axis.py` scope**: Not updated. Uses separate `right_parties`/`left_parties` for PCA centroid orientation, distinct concern from this validation. |
||||||
|
|
||||||
|
### Deferred to Implementation |
||||||
|
|
||||||
|
- **Test DB schema exactness**: The `party_axis_scores` schema (column names, nullability) should be verified against `explorer_data.py` query at implementation time. |
||||||
|
|
||||||
|
## Implementation Units |
||||||
|
|
||||||
|
- [ ] **Unit 1: Add canonical party sets to `config.py`** |
||||||
|
|
||||||
|
**Goal:** Add `CANONICAL_RIGHT` and `CANONICAL_LEFT` frozensets as the single source of truth. |
||||||
|
|
||||||
|
**Requirements:** R1 |
||||||
|
|
||||||
|
**Dependencies:** None |
||||||
|
|
||||||
|
**Files:** |
||||||
|
- Modify: `analysis/config.py` |
||||||
|
|
||||||
|
**Approach:** |
||||||
|
- Add `CANONICAL_RIGHT = frozenset({"PVV", "FVD", "JA21", "SGP"})` matching AGENTS.md exactly |
||||||
|
- Add `CANONICAL_LEFT = frozenset({"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK", "PvdD", "Volt"})` matching svd_labels.py LEFT_PARTIES exactly |
||||||
|
- Add both to `__all__` |
||||||
|
|
||||||
|
**Patterns to follow:** |
||||||
|
- `CURRENT_PARLIAMENT_PARTIES` frozenset pattern at `config.py` line 235 |
||||||
|
|
||||||
|
**Test scenarios:** |
||||||
|
- Test expectation: none — this is a data definition change, not behavioral code |
||||||
|
|
||||||
|
**Verification:** |
||||||
|
- `CANONICAL_RIGHT` and `CANONICAL_LEFT` accessible via `from analysis.config import CANONICAL_RIGHT, CANONICAL_LEFT` |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
- [ ] **Unit 2: Update `svd_labels.py` to import from `config.py`** |
||||||
|
|
||||||
|
**Goal:** `compute_flip_direction` uses canonical sets from config instead of inline definitions. |
||||||
|
|
||||||
|
**Requirements:** R1 |
||||||
|
|
||||||
|
**Dependencies:** Unit 1 |
||||||
|
|
||||||
|
**Files:** |
||||||
|
- Modify: `analysis/svd_labels.py` |
||||||
|
|
||||||
|
**Approach:** |
||||||
|
- Replace inline `RIGHT_PARTIES` and `LEFT_PARTIES` frozensets with: |
||||||
|
```python |
||||||
|
from analysis.config import CANONICAL_RIGHT, CANONICAL_LEFT |
||||||
|
RIGHT_PARTIES = CANONICAL_RIGHT # backward compat alias |
||||||
|
LEFT_PARTIES = CANONICAL_LEFT # backward compat alias |
||||||
|
``` |
||||||
|
- This preserves any external callers that import `RIGHT_PARTIES`/`LEFT_PARTIES` from `svd_labels` |
||||||
|
|
||||||
|
**Patterns to follow:** |
||||||
|
- Alias pattern (re-export) rather than removing the old names — backward compat |
||||||
|
|
||||||
|
**Test scenarios:** |
||||||
|
- Happy path: `compute_flip_direction` produces same results as before (baseline established by existing tests in `test_svd_labels.py`) |
||||||
|
- Existing tests in `test_svd_labels.py` run and pass after the import swap |
||||||
|
|
||||||
|
**Verification:** |
||||||
|
- `pytest tests/test_svd_labels.py` passes |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
- [ ] **Unit 3: Extract `build_window_party_scores` helper in `explorer_data.py`** |
||||||
|
|
||||||
|
**Goal:** Separate window-indexing logic from DB access so it can be unit tested without DuckDB. |
||||||
|
|
||||||
|
**Requirements:** R2, R3 |
||||||
|
|
||||||
|
**Dependencies:** None |
||||||
|
|
||||||
|
**Files:** |
||||||
|
- Create: `analysis/explorer_data.py` (add function) |
||||||
|
|
||||||
|
**Approach:** |
||||||
|
Add a helper: |
||||||
|
```python |
||||||
|
def build_window_party_scores( |
||||||
|
scores_by_party: Dict[str, List[List[float]]], |
||||||
|
window_idx: int |
||||||
|
) -> Dict[str, List[float]]: |
||||||
|
"""Extract scores for one window as {party: [x, y]} for compute_flip_direction.""" |
||||||
|
``` |
||||||
|
|
||||||
|
The function takes the output of `load_party_scores_all_windows_aligned` and extracts `scores_by_party[party][window_idx]` for all parties, returning `{party: [x, y]}`. Returns empty dict if window_idx is out of range. |
||||||
|
|
||||||
|
**Patterns to follow:** |
||||||
|
- `load_party_scores_all_windows_aligned` pattern at `explorer_data.py` line 212 |
||||||
|
|
||||||
|
**Test scenarios:** |
||||||
|
- Happy path: Given `{"PVV": [[0.5, 0.3], [0.6, 0.4]], "SP": [[-0.4, -0.2], [-0.5, -0.3]]}` and `window_idx=0`, returns `{"PVV": [0.5, 0.3], "SP": [-0.4, -0.2]}` |
||||||
|
- Edge case: `window_idx=99` out of range → returns `{}` |
||||||
|
- Edge case: Empty input dict → returns `{}` |
||||||
|
|
||||||
|
**Verification:** |
||||||
|
- Unit tests pass without DuckDB |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
- [ ] **Unit 4: Create `tests/test_axis_political_orientation.py`** |
||||||
|
|
||||||
|
**Goal:** Integration test validating political compass orientation against DuckDB data. |
||||||
|
|
||||||
|
**Requirements:** R2, R3, R4, R5 |
||||||
|
|
||||||
|
**Dependencies:** Units 1, 2, 3 |
||||||
|
|
||||||
|
**Files:** |
||||||
|
- Create: `tests/test_axis_political_orientation.py` |
||||||
|
|
||||||
|
**Approach:** |
||||||
|
Two-layer test structure: |
||||||
|
|
||||||
|
1. **Synthetic fixture layer** (DuckDB integration test): |
||||||
|
- Create temporary DB with `party_axis_scores` table |
||||||
|
- Insert controlled rows: correct orientation (right_mean > left_mean) and incorrect orientation (right_mean < left_mean) |
||||||
|
- Call `load_party_scores_all_windows_aligned` and `build_window_party_scores` |
||||||
|
- Assert orientation checks pass/fail correctly |
||||||
|
|
||||||
|
2. **Validation assertions** (layered on helper from Unit 3): |
||||||
|
- For each window (iterate `scores_by_party[party]` length): |
||||||
|
- Build per-window dict via `build_window_party_scores` |
||||||
|
- Call `compute_flip_direction(1, party_scores)` → assert `False` (no flip needed) |
||||||
|
- Call `compute_flip_direction(2, party_scores)` → assert `False` |
||||||
|
- On failure: assert message includes window, axis, right_mean, left_mean |
||||||
|
|
||||||
|
Use `tmp_duckdb_conn` fixture. Create schema and insert rows in test setup. |
||||||
|
|
||||||
|
**Patterns to follow:** |
||||||
|
- `test_analysis.py` fixture setup pattern (lines 13-60) for synthetic SVD vector setup |
||||||
|
- `test_svd_labels.py` assertion style for `compute_flip_direction` validation |
||||||
|
|
||||||
|
**Test scenarios:** |
||||||
|
- Happy path (correct orientation): Right mean > left mean on both axes → both `compute_flip_direction` calls return `False` |
||||||
|
- Error path (incorrect orientation): Right mean < left mean → at least one call returns `True`, test fails with clear message |
||||||
|
- Edge case: Party not in canonical sets → gracefully skipped (no crash) |
||||||
|
- Edge case: Empty party list → returns `False` (no flip) |
||||||
|
- Edge case: Aliased party name ("GL" vs "GroenLinks-PvdA") → normalized before check |
||||||
|
|
||||||
|
**Verification:** |
||||||
|
- `pytest tests/test_axis_political_orientation.py` runs and passes |
||||||
|
- `pytest tests/test_svd_labels.py` still passes (backward compat check) |
||||||
|
|
||||||
|
## System-Wide Impact |
||||||
|
|
||||||
|
- **Error propagation**: No error paths in this feature — orientation violations produce assertion failures, not exceptions |
||||||
|
- **Unchanged invariants**: `compute_flip_direction` output unchanged for existing callers (alias re-export) |
||||||
|
- **API surface parity**: No new public APIs; `CANONICAL_RIGHT`/`CANONICAL_LEFT` are read-only constants |
||||||
|
|
||||||
|
## Risks & Dependencies |
||||||
|
|
||||||
|
| Risk | Mitigation | |
||||||
|
|------|------------| |
||||||
|
| DuckDB fixture schema mismatch | Verify `party_axis_scores` column names against `explorer_data.py` query at implementation time | |
||||||
|
| Window index boundary errors | `build_window_party_scores` returns `{}` for out-of-range indices — graceful degradation | |
||||||
|
| `_PARTY_NORMALIZE` aliases incomplete | Add aliases as needed during implementation — test with edge cases | |
||||||
|
|
||||||
|
## Sources & References |
||||||
|
|
||||||
|
- **Origin document:** [docs/brainstorms/2026-04-05-right-wing-party-axis-validation-requirements.md](docs/brainstorms/2026-04-05-right-wing-party-axis-validation-requirements.md) |
||||||
|
- **AGENTS.md convention:** `docs/solutions/best-practices/svd-labels-voting-patterns-not-semantics.md` |
||||||
|
- Related code: `analysis/svd_labels.py`, `analysis/config.py`, `analysis/explorer_data.py` |
||||||
|
- Related tests: `tests/test_svd_labels.py`, `tests/test_analysis.py` |
||||||
@ -0,0 +1,224 @@ |
|||||||
|
"""Tests for political axis orientation validation. |
||||||
|
|
||||||
|
Validates that PVV, FVD, JA21, and SGP appear on the RIGHT side |
||||||
|
(mean-based) of the political compass, per AGENTS.md convention. |
||||||
|
""" |
||||||
|
|
||||||
|
import pytest |
||||||
|
|
||||||
|
duckdb = pytest.importorskip("duckdb") |
||||||
|
|
||||||
|
|
||||||
|
def _setup_party_axis_scores(db_path: str, rows: list): |
||||||
|
"""Insert synthetic party_axis_scores rows. |
||||||
|
|
||||||
|
Args: |
||||||
|
db_path: Path to DuckDB database. |
||||||
|
rows: List of (party_abbrev, window_id, x_axis_aligned, y_axis_aligned). |
||||||
|
""" |
||||||
|
conn = duckdb.connect(db_path) |
||||||
|
conn.execute( |
||||||
|
""" |
||||||
|
CREATE TABLE IF NOT EXISTS party_axis_scores ( |
||||||
|
party_abbrev TEXT, |
||||||
|
window_id TEXT, |
||||||
|
x_axis_aligned DOUBLE, |
||||||
|
y_axis_aligned DOUBLE |
||||||
|
) |
||||||
|
""" |
||||||
|
) |
||||||
|
for party, window, x, y in rows: |
||||||
|
conn.execute( |
||||||
|
"INSERT INTO party_axis_scores (party_abbrev, window_id, x_axis_aligned, y_axis_aligned) VALUES (?, ?, ?, ?)", |
||||||
|
(party, window, x, y), |
||||||
|
) |
||||||
|
conn.close() |
||||||
|
|
||||||
|
|
||||||
|
def _build_scores_by_party(db_path: str) -> dict: |
||||||
|
"""Load aligned scores as {party: [[x,y] per window]} from DuckDB.""" |
||||||
|
from analysis.explorer_data import load_party_scores_all_windows_aligned |
||||||
|
|
||||||
|
return load_party_scores_all_windows_aligned(db_path) |
||||||
|
|
||||||
|
|
||||||
|
class TestAxisPoliticalOrientation: |
||||||
|
def test_build_window_party_scores_happy_path(self): |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
|
||||||
|
data = { |
||||||
|
"PVV": [[0.5, 0.3], [0.6, 0.4]], |
||||||
|
"FVD": [[0.4, 0.2], [0.5, 0.3]], |
||||||
|
"SP": [[-0.4, -0.2], [-0.5, -0.3]], |
||||||
|
"DENK": [[-0.3, -0.1], [-0.4, -0.2]], |
||||||
|
} |
||||||
|
result = build_window_party_scores(data, 0) |
||||||
|
assert result == { |
||||||
|
"PVV": [0.5, 0.3], |
||||||
|
"FVD": [0.4, 0.2], |
||||||
|
"SP": [-0.4, -0.2], |
||||||
|
"DENK": [-0.3, -0.1], |
||||||
|
} |
||||||
|
|
||||||
|
result = build_window_party_scores(data, 1) |
||||||
|
assert result == { |
||||||
|
"PVV": [0.6, 0.4], |
||||||
|
"FVD": [0.5, 0.3], |
||||||
|
"SP": [-0.5, -0.3], |
||||||
|
"DENK": [-0.4, -0.2], |
||||||
|
} |
||||||
|
|
||||||
|
def test_build_window_party_scores_out_of_range(self): |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
|
||||||
|
data = {"PVV": [[0.5, 0.3]], "SP": [[-0.4, -0.2]]} |
||||||
|
assert build_window_party_scores(data, 99) == {} |
||||||
|
assert build_window_party_scores(data, -1) == {} |
||||||
|
assert build_window_party_scores({}, 0) == {} |
||||||
|
|
||||||
|
def test_orientation_correct_no_flip_needed(self, tmp_path): |
||||||
|
db_path = str(tmp_path / "orientation.db") |
||||||
|
_setup_party_axis_scores( |
||||||
|
db_path, |
||||||
|
[ |
||||||
|
# Window 0: Correct orientation — right_mean > left_mean on both axes |
||||||
|
("PVV", "w1", 0.8, 0.2), |
||||||
|
("FVD", "w1", 0.6, 0.1), |
||||||
|
("JA21", "w1", 0.5, 0.0), |
||||||
|
("SGP", "w1", 0.4, 0.0), |
||||||
|
("SP", "w1", -0.6, -0.2), |
||||||
|
("DENK", "w1", -0.4, -0.1), |
||||||
|
("PvdA", "w1", -0.5, -0.1), |
||||||
|
("Volt", "w1", -0.3, -0.0), |
||||||
|
# Window 1: Same correct orientation |
||||||
|
("PVV", "w2", 0.7, 0.3), |
||||||
|
("FVD", "w2", 0.5, 0.2), |
||||||
|
("JA21", "w2", 0.4, 0.1), |
||||||
|
("SGP", "w2", 0.3, 0.0), |
||||||
|
("SP", "w2", -0.5, -0.2), |
||||||
|
("DENK", "w2", -0.3, -0.1), |
||||||
|
("PvdA", "w2", -0.4, -0.1), |
||||||
|
("Volt", "w2", -0.2, 0.0), |
||||||
|
], |
||||||
|
) |
||||||
|
|
||||||
|
scores_by_party = _build_scores_by_party(db_path) |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
from analysis.svd_labels import compute_flip_direction |
||||||
|
|
||||||
|
# 2 windows |
||||||
|
n_windows = max(len(v) for v in scores_by_party.values()) |
||||||
|
assert n_windows == 2 |
||||||
|
|
||||||
|
for window_idx in range(n_windows): |
||||||
|
party_scores = build_window_party_scores(scores_by_party, window_idx) |
||||||
|
flip_x = compute_flip_direction(1, party_scores) |
||||||
|
flip_y = compute_flip_direction(2, party_scores) |
||||||
|
assert flip_x is False, ( |
||||||
|
f"Window {window_idx}: right parties should already be on right (x-axis)" |
||||||
|
) |
||||||
|
assert flip_y is False, ( |
||||||
|
f"Window {window_idx}: right parties should already be on right (y-axis)" |
||||||
|
) |
||||||
|
|
||||||
|
def test_orientation_incorrect_triggers_flip(self, tmp_path): |
||||||
|
db_path = str(tmp_path / "orientation_flipped.db") |
||||||
|
_setup_party_axis_scores( |
||||||
|
db_path, |
||||||
|
[ |
||||||
|
# Window 0: Wrong orientation — right_mean < left_mean on x-axis |
||||||
|
("PVV", "w1", -0.8, 0.0), # Right party on left |
||||||
|
("FVD", "w1", -0.6, 0.0), |
||||||
|
("JA21", "w1", -0.5, 0.0), |
||||||
|
("SGP", "w1", -0.4, 0.0), |
||||||
|
("SP", "w1", 0.6, 0.0), # Left party on right |
||||||
|
("DENK", "w1", 0.4, 0.0), |
||||||
|
], |
||||||
|
) |
||||||
|
|
||||||
|
scores_by_party = _build_scores_by_party(db_path) |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
from analysis.svd_labels import compute_flip_direction |
||||||
|
|
||||||
|
party_scores = build_window_party_scores(scores_by_party, 0) |
||||||
|
flip_x = compute_flip_direction(1, party_scores) |
||||||
|
# Right mean = (-0.8 + -0.6 + -0.5 + -0.4) / 4 = -0.575 |
||||||
|
# Left mean = (0.6 + 0.4) / 2 = 0.5 |
||||||
|
# right_mean < left_mean → flip = True |
||||||
|
assert flip_x is True, "Right parties on left should trigger flip=True" |
||||||
|
|
||||||
|
def test_missing_party_graceful_skip(self, tmp_path): |
||||||
|
db_path = str(tmp_path / "partial.db") |
||||||
|
_setup_party_axis_scores( |
||||||
|
db_path, |
||||||
|
[ |
||||||
|
# Only PVV (right) and SP (left), no FVD/JA21/SGP |
||||||
|
("PVV", "w1", 0.8, 0.2), |
||||||
|
("SP", "w1", -0.6, -0.2), |
||||||
|
("DENK", "w1", -0.4, -0.1), |
||||||
|
], |
||||||
|
) |
||||||
|
|
||||||
|
scores_by_party = _build_scores_by_party(db_path) |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
from analysis.svd_labels import compute_flip_direction |
||||||
|
|
||||||
|
party_scores = build_window_party_scores(scores_by_party, 0) |
||||||
|
# Should not raise — PVV and SP are in canonical sets, rest ignored |
||||||
|
flip_x = compute_flip_direction(1, party_scores) |
||||||
|
flip_y = compute_flip_direction(2, party_scores) |
||||||
|
# right_mean = 0.8, left_mean = (-0.6 + -0.4) / 2 = -0.5 |
||||||
|
# 0.8 > -0.5 → flip = False |
||||||
|
assert flip_x is False |
||||||
|
assert flip_y is False |
||||||
|
|
||||||
|
def test_party_name_aliasing_normalized(self, tmp_path): |
||||||
|
"""Test that aliased party names are handled gracefully. |
||||||
|
|
||||||
|
DB may return 'GL' while canonical sets use 'GroenLinks-PvdA'. |
||||||
|
The test uses exact canonical names; _PARTY_NORMALIZE handles aliases. |
||||||
|
""" |
||||||
|
db_path = str(tmp_path / "aliased.db") |
||||||
|
_setup_party_axis_scores( |
||||||
|
db_path, |
||||||
|
[ |
||||||
|
# PVV and FVD under exact canonical names |
||||||
|
("PVV", "w1", 0.8, 0.2), |
||||||
|
("FVD", "w1", 0.6, 0.1), |
||||||
|
# Left parties under exact canonical names |
||||||
|
("SP", "w1", -0.6, -0.2), |
||||||
|
("DENK", "w1", -0.4, -0.1), |
||||||
|
("Volt", "w1", -0.3, -0.1), |
||||||
|
], |
||||||
|
) |
||||||
|
|
||||||
|
scores_by_party = _build_scores_by_party(db_path) |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
from analysis.svd_labels import compute_flip_direction |
||||||
|
|
||||||
|
party_scores = build_window_party_scores(scores_by_party, 0) |
||||||
|
flip_x = compute_flip_direction(1, party_scores) |
||||||
|
# right_mean = (0.8 + 0.6) / 2 = 0.7 |
||||||
|
# left_mean = (-0.6 + -0.4 + -0.3) / 3 = -0.433 |
||||||
|
# 0.7 > -0.433 → flip = False |
||||||
|
assert flip_x is False |
||||||
|
|
||||||
|
def test_insufficient_data_returns_false(self, tmp_path): |
||||||
|
db_path = str(tmp_path / "insufficient.db") |
||||||
|
_setup_party_axis_scores( |
||||||
|
db_path, |
||||||
|
[ |
||||||
|
# Only left parties — no right parties |
||||||
|
("SP", "w1", -0.6, -0.2), |
||||||
|
("DENK", "w1", -0.4, -0.1), |
||||||
|
], |
||||||
|
) |
||||||
|
|
||||||
|
scores_by_party = _build_scores_by_party(db_path) |
||||||
|
from analysis.explorer_data import build_window_party_scores |
||||||
|
from analysis.svd_labels import compute_flip_direction |
||||||
|
|
||||||
|
party_scores = build_window_party_scores(scores_by_party, 0) |
||||||
|
flip = compute_flip_direction(1, party_scores) |
||||||
|
# No right parties in data → returns False (no flip) |
||||||
|
assert flip is False |
||||||
Loading…
Reference in new issue