import logging from typing import Optional import requests from database import MotionDatabase logger = logging.getLogger(__name__) def normalize_mp_name( achternaam: str, initialen: Optional[str], tussenvoegsel: Optional[str] ) -> str: """Reconstruct ActorNaam format used in voting_results keys. Format: "{Tussenvoegsel} {Achternaam}, {Initialen}" with sensible stripping when tussenvoegsel is missing. """ parts = [] if tussenvoegsel: parts.append(tussenvoegsel) parts.append(achternaam) name = " ".join(parts).strip() # Ensure the displayed name starts with an uppercase letter so # ORDER BY mp_name behaves predictably across databases that may # sort uppercase before lowercase. Only change the first character # to upper-case to avoid lowercasing other letters (e.g. hyphenated # or already capitalized parts). if name and name[0].islower(): name = name[0].upper() + name[1:] if initialen: name = f"{name}, {initialen}" return name _ODATA_BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0" _PAGE_SIZE = 250 def fetch_mp_metadata( db_path: str, odata_url: str = f"{_ODATA_BASE}/FractieZetelPersoon", ) -> int: """Fetch MP party membership and tenure from OData and upsert into DB. Paginates through all records using $skip. Uses Fractie.Afkorting as the party name so it matches the abbreviations used in mp_votes. Returns the number of records processed (inserted or updated). """ expand = "$expand=FractieZetel($expand=Fractie),Persoon" session = requests.Session() db = MotionDatabase(db_path) processed = 0 skip = 0 while True: url = f"{odata_url}?{expand}&$top={_PAGE_SIZE}&$skip={skip}" try: resp = session.get(url, timeout=30) resp.raise_for_status() data = resp.json() except Exception as e: logger.error("Failed to fetch MP metadata (skip=%d): %s", skip, e) raise values = data.get("value") if isinstance(data, dict) else None if values is None: logger.error("Unexpected OData payload at skip=%d; missing 'value'", skip) break if not values: break # no more pages for item in values: try: if item.get("Verwijderd"): continue persoon = item.get("Persoon") or {} fractiezetel = item.get("FractieZetel") or {} fractie = fractiezetel.get("Fractie") or {} achternaam = persoon.get("Achternaam") initialen = persoon.get("Initialen") tussenvoegsel = persoon.get("Tussenvoegsel") persoon_id = persoon.get("Id") # Use Afkorting (e.g. "VVD", "GroenLinks-PvdA") to match mp_votes party column party = fractie.get("Afkorting") or fractie.get("NaamNL") van = item.get("Van") tot_en_met = item.get("TotEnMet") if not achternaam: logger.debug("Skipping record without achternaam: %s", item) continue mp_name = normalize_mp_name(achternaam, initialen, tussenvoegsel) db.upsert_mp_metadata( mp_name=mp_name, party=party, van=van, tot_en_met=tot_en_met, persoon_id=persoon_id, ) processed += 1 except Exception: logger.exception("Error processing OData item: %s", item) logger.debug("Fetched page skip=%d, got %d records", skip, len(values)) if len(values) < _PAGE_SIZE: break # last page skip += _PAGE_SIZE logger.info("Processed %d MP metadata records", processed) return processed