You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/pipeline/fetch_mp_metadata.py

118 lines
3.8 KiB

import logging
from typing import Optional
import requests
from database import MotionDatabase
logger = logging.getLogger(__name__)
def normalize_mp_name(
achternaam: str, initialen: Optional[str], tussenvoegsel: Optional[str]
) -> str:
"""Reconstruct ActorNaam format used in voting_results keys.
Format: "{Tussenvoegsel} {Achternaam}, {Initialen}" with sensible stripping when
tussenvoegsel is missing.
"""
parts = []
if tussenvoegsel:
parts.append(tussenvoegsel)
parts.append(achternaam)
name = " ".join(parts).strip()
# Ensure the displayed name starts with an uppercase letter so
# ORDER BY mp_name behaves predictably across databases that may
# sort uppercase before lowercase. Only change the first character
# to upper-case to avoid lowercasing other letters (e.g. hyphenated
# or already capitalized parts).
if name and name[0].islower():
name = name[0].upper() + name[1:]
if initialen:
name = f"{name}, {initialen}"
return name
_ODATA_BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
_PAGE_SIZE = 250
def fetch_mp_metadata(
db_path: str,
odata_url: str = f"{_ODATA_BASE}/FractieZetelPersoon",
) -> int:
"""Fetch MP party membership and tenure from OData and upsert into DB.
Paginates through all records using $skip. Uses Fractie.Afkorting as
the party name so it matches the abbreviations used in mp_votes.
Returns the number of records processed (inserted or updated).
"""
expand = "$expand=FractieZetel($expand=Fractie),Persoon"
session = requests.Session()
db = MotionDatabase(db_path)
processed = 0
skip = 0
while True:
url = f"{odata_url}?{expand}&$top={_PAGE_SIZE}&$skip={skip}"
try:
resp = session.get(url, timeout=30)
resp.raise_for_status()
data = resp.json()
except Exception as e:
logger.error("Failed to fetch MP metadata (skip=%d): %s", skip, e)
raise
values = data.get("value") if isinstance(data, dict) else None
if values is None:
logger.error("Unexpected OData payload at skip=%d; missing 'value'", skip)
break
if not values:
break # no more pages
for item in values:
try:
if item.get("Verwijderd"):
continue
persoon = item.get("Persoon") or {}
fractiezetel = item.get("FractieZetel") or {}
fractie = fractiezetel.get("Fractie") or {}
achternaam = persoon.get("Achternaam")
initialen = persoon.get("Initialen")
tussenvoegsel = persoon.get("Tussenvoegsel")
persoon_id = persoon.get("Id")
# Use Afkorting (e.g. "VVD", "GroenLinks-PvdA") to match mp_votes party column
party = fractie.get("Afkorting") or fractie.get("NaamNL")
van = item.get("Van")
tot_en_met = item.get("TotEnMet")
if not achternaam:
logger.debug("Skipping record without achternaam: %s", item)
continue
mp_name = normalize_mp_name(achternaam, initialen, tussenvoegsel)
db.upsert_mp_metadata(
mp_name=mp_name,
party=party,
van=van,
tot_en_met=tot_en_met,
persoon_id=persoon_id,
)
processed += 1
except Exception:
logger.exception("Error processing OData item: %s", item)
logger.debug("Fetched page skip=%d, got %d records", skip, len(values))
if len(values) < _PAGE_SIZE:
break # last page
skip += _PAGE_SIZE
logger.info("Processed %d MP metadata records", processed)
return processed