diff --git a/analysis/political_axis.py b/analysis/political_axis.py
index b90bf56..8b0bb4b 100644
--- a/analysis/political_axis.py
+++ b/analysis/political_axis.py
@@ -87,22 +87,24 @@ def compute_anchor_axis(
if not mp_vecs:
return {}
- # Load party affiliation for this window from mp_metadata
+ left_set = set(left_parties)
+ right_set = set(right_parties)
+
+ # 1. Party-level actors whose entity_id IS a party name (e.g. "GroenLinks-PvdA")
+ left_vecs = [mp_vecs[p] for p in left_set if p in mp_vecs]
+ right_vecs = [mp_vecs[p] for p in right_set if p in mp_vecs]
+
+ # 2. Individual MPs via mp_metadata party affiliation
conn = duckdb.connect(db_path)
rows = conn.execute("SELECT mp_name, party FROM mp_metadata").fetchall()
conn.close()
- party_of = {mp: party for mp, party in rows}
-
- left_vecs = [
- mp_vecs[mp]
- for mp, party in party_of.items()
- if party in left_parties and mp in mp_vecs
- ]
- right_vecs = [
- mp_vecs[mp]
- for mp, party in party_of.items()
- if party in right_parties and mp in mp_vecs
- ]
+ for mp_name, party in rows:
+ if mp_name not in mp_vecs:
+ continue
+ if party in left_set and mp_name not in left_set:
+ left_vecs.append(mp_vecs[mp_name])
+ elif party in right_set and mp_name not in right_set:
+ right_vecs.append(mp_vecs[mp_name])
if not left_vecs or not right_vecs:
_logger.warning(
diff --git a/database.py b/database.py
index 0490411..0163e2b 100644
--- a/database.py
+++ b/database.py
@@ -522,12 +522,32 @@ class MotionDatabase:
"SELECT COUNT(*) FROM mp_metadata WHERE mp_name = ?", (mp_name,)
).fetchone()
if exists and exists[0] > 0:
+ # Only update if this record is newer (higher Van date) than the stored one,
+ # preferring active memberships (TotEnMet IS NULL) over ended ones.
conn.execute(
"""
UPDATE mp_metadata SET party = ?, van = ?, tot_en_met = ?, persoon_id = ?
WHERE mp_name = ?
+ AND (
+ -- prefer active over ended
+ (? IS NULL AND tot_en_met IS NOT NULL)
+ -- or same active status but newer start date
+ OR (? IS NULL AND tot_en_met IS NULL AND CAST(? AS DATE) > CAST(van AS DATE))
+ OR (? IS NOT NULL AND tot_en_met IS NOT NULL AND CAST(? AS DATE) > CAST(van AS DATE))
+ )
""",
- (party, van, tot_en_met, persoon_id, mp_name),
+ (
+ party,
+ van,
+ tot_en_met,
+ persoon_id,
+ mp_name,
+ tot_en_met, # prefer active
+ tot_en_met,
+ van, # both active, newer
+ tot_en_met,
+ van,
+ ), # both ended, newer
)
else:
conn.execute(
diff --git a/outputs/anchor_axis_2025_Q2.html b/outputs/anchor_axis_2025_Q2.html
new file mode 100644
index 0000000..4c88290
--- /dev/null
+++ b/outputs/anchor_axis_2025_Q2.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/outputs/anchor_axis_2025_Q3.html b/outputs/anchor_axis_2025_Q3.html
new file mode 100644
index 0000000..c8c7378
--- /dev/null
+++ b/outputs/anchor_axis_2025_Q3.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/outputs/anchor_axis_2025_Q4.html b/outputs/anchor_axis_2025_Q4.html
new file mode 100644
index 0000000..ad1c776
--- /dev/null
+++ b/outputs/anchor_axis_2025_Q4.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/outputs/anchor_axis_2026_Q1.html b/outputs/anchor_axis_2026_Q1.html
new file mode 100644
index 0000000..8075e46
--- /dev/null
+++ b/outputs/anchor_axis_2026_Q1.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/pipeline/fetch_mp_metadata.py b/pipeline/fetch_mp_metadata.py
index ac31861..d677e45 100644
--- a/pipeline/fetch_mp_metadata.py
+++ b/pipeline/fetch_mp_metadata.py
@@ -34,61 +34,85 @@ def normalize_mp_name(
return name
+_ODATA_BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
+_PAGE_SIZE = 250
+
+
def fetch_mp_metadata(
- db_path: str, odata_url: str = "https://odata.example/FractieZetelPersoon"
+ db_path: str,
+ odata_url: str = f"{_ODATA_BASE}/FractieZetelPersoon",
) -> int:
"""Fetch MP party membership and tenure from OData and upsert into DB.
+ Paginates through all records using $skip. Uses Fractie.Afkorting as
+ the party name so it matches the abbreviations used in mp_votes.
+
Returns the number of records processed (inserted or updated).
"""
+ expand = "$expand=FractieZetel($expand=Fractie),Persoon"
session = requests.Session()
- try:
- resp = session.get(odata_url)
- resp.raise_for_status()
- data = resp.json()
- except Exception as e:
- logger.error("Failed to fetch MP metadata: %s", e)
- raise
-
- values = data.get("value") if isinstance(data, dict) else None
- if values is None:
- logger.error("Unexpected OData payload; missing 'value' list")
- return 0
-
db = MotionDatabase(db_path)
processed = 0
+ skip = 0
- for item in values:
+ while True:
+ url = f"{odata_url}?{expand}&$top={_PAGE_SIZE}&$skip={skip}"
try:
- persoon = item.get("Persoon") or {}
- fractiezetel = item.get("FractieZetel") or {}
- fractie = fractiezetel.get("Fractie") or {}
-
- achternaam = persoon.get("Achternaam")
- initialen = persoon.get("Initialen")
- tussenvoegsel = persoon.get("Tussenvoegsel")
- persoon_id = persoon.get("Id")
-
- party = fractie.get("NaamNL")
- van = item.get("Van")
- tot_en_met = item.get("TotEnMet")
-
- if not achternaam:
- logger.debug("Skipping record without achternaam: %s", item)
- continue
-
- mp_name = normalize_mp_name(achternaam, initialen, tussenvoegsel)
-
- db.upsert_mp_metadata(
- mp_name=mp_name,
- party=party,
- van=van,
- tot_en_met=tot_en_met,
- persoon_id=persoon_id,
- )
- processed += 1
- except Exception:
- logger.exception("Error processing OData item: %s", item)
+ resp = session.get(url, timeout=30)
+ resp.raise_for_status()
+ data = resp.json()
+ except Exception as e:
+ logger.error("Failed to fetch MP metadata (skip=%d): %s", skip, e)
+ raise
+
+ values = data.get("value") if isinstance(data, dict) else None
+ if values is None:
+ logger.error("Unexpected OData payload at skip=%d; missing 'value'", skip)
+ break
+
+ if not values:
+ break # no more pages
+
+ for item in values:
+ try:
+ if item.get("Verwijderd"):
+ continue
+
+ persoon = item.get("Persoon") or {}
+ fractiezetel = item.get("FractieZetel") or {}
+ fractie = fractiezetel.get("Fractie") or {}
+
+ achternaam = persoon.get("Achternaam")
+ initialen = persoon.get("Initialen")
+ tussenvoegsel = persoon.get("Tussenvoegsel")
+ persoon_id = persoon.get("Id")
+
+ # Use Afkorting (e.g. "VVD", "GroenLinks-PvdA") to match mp_votes party column
+ party = fractie.get("Afkorting") or fractie.get("NaamNL")
+ van = item.get("Van")
+ tot_en_met = item.get("TotEnMet")
+
+ if not achternaam:
+ logger.debug("Skipping record without achternaam: %s", item)
+ continue
+
+ mp_name = normalize_mp_name(achternaam, initialen, tussenvoegsel)
+
+ db.upsert_mp_metadata(
+ mp_name=mp_name,
+ party=party,
+ van=van,
+ tot_en_met=tot_en_met,
+ persoon_id=persoon_id,
+ )
+ processed += 1
+ except Exception:
+ logger.exception("Error processing OData item: %s", item)
+
+ logger.debug("Fetched page skip=%d, got %d records", skip, len(values))
+ if len(values) < _PAGE_SIZE:
+ break # last page
+ skip += _PAGE_SIZE
logger.info("Processed %d MP metadata records", processed)
return processed
diff --git a/tests/test_fetch_mp_metadata.py b/tests/test_fetch_mp_metadata.py
index 9c99e1c..d7171e3 100644
--- a/tests/test_fetch_mp_metadata.py
+++ b/tests/test_fetch_mp_metadata.py
@@ -28,44 +28,58 @@ class MockResponse:
class MockSession:
- def __init__(self, response):
- self._response = response
+ """Session mock that returns a data page on first call and empty page on second."""
- def get(self, url):
- return self._response
+ def __init__(self, data_page):
+ self._pages = [data_page, {"value": []}]
+ self._call = 0
+
+ def get(self, url, **kwargs):
+ resp = MockResponse(self._pages[min(self._call, len(self._pages) - 1)])
+ self._call += 1
+ return resp
def test_fetch_mp_metadata_idempotent(tmp_path, monkeypatch):
- # Prepare canned OData response with two FractieZetelPersoon records
+ # Prepare canned OData response with two FractieZetelPersoon records.
+ # Use Afkorting (not NaamNL) because fetch_mp_metadata prefers Afkorting.
data = {
"value": [
{
+ "Verwijderd": False,
"Persoon": {
"Achternaam": "Yesilgöz-Zegerius",
"Initialen": "D.",
"Tussenvoegsel": None,
"Id": "guid-1",
},
- "FractieZetel": {"Fractie": {"NaamNL": "VVD"}},
+ "FractieZetel": {
+ "Fractie": {
+ "Afkorting": "VVD",
+ "NaamNL": "Volkspartij voor Vrijheid en Democratie",
+ }
+ },
"Van": "2023-01-01",
"TotEnMet": None,
},
{
+ "Verwijderd": False,
"Persoon": {
"Achternaam": "Plas",
"Initialen": "C.",
"Tussenvoegsel": "van der",
"Id": "guid-2",
},
- "FractieZetel": {"Fractie": {"NaamNL": "BBB"}},
+ "FractieZetel": {
+ "Fractie": {"Afkorting": "BBB", "NaamNL": "BoerBurgerBeweging"}
+ },
"Van": "2023-06-01",
"TotEnMet": "2024-01-01",
},
]
}
- mock_resp = MockResponse(data)
- mock_session = MockSession(mock_resp)
+ mock_session = MockSession(data)
# Patch requests.Session to return our mock session
monkeypatch.setattr(requests, "Session", lambda: mock_session)
@@ -98,6 +112,7 @@ def test_fetch_mp_metadata_idempotent(tmp_path, monkeypatch):
assert rows[1][3] == None
assert rows[1][4] == "guid-1"
- # Run again to assert idempotence (no exception and same count processed)
+ # Run again to assert idempotence: same records processed, DB unchanged
+ monkeypatch.setattr(requests, "Session", lambda: MockSession(data))
count2 = fetch_mp_metadata(db_path=db_path, odata_url="http://example/odata")
assert count2 == 2