You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
3.8 KiB
118 lines
3.8 KiB
import logging
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
from database import MotionDatabase
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def normalize_mp_name(
|
|
achternaam: str, initialen: Optional[str], tussenvoegsel: Optional[str]
|
|
) -> str:
|
|
"""Reconstruct ActorNaam format used in voting_results keys.
|
|
|
|
Format: "{Tussenvoegsel} {Achternaam}, {Initialen}" with sensible stripping when
|
|
tussenvoegsel is missing.
|
|
"""
|
|
parts = []
|
|
if tussenvoegsel:
|
|
parts.append(tussenvoegsel)
|
|
parts.append(achternaam)
|
|
name = " ".join(parts).strip()
|
|
|
|
# Ensure the displayed name starts with an uppercase letter so
|
|
# ORDER BY mp_name behaves predictably across databases that may
|
|
# sort uppercase before lowercase. Only change the first character
|
|
# to upper-case to avoid lowercasing other letters (e.g. hyphenated
|
|
# or already capitalized parts).
|
|
if name and name[0].islower():
|
|
name = name[0].upper() + name[1:]
|
|
if initialen:
|
|
name = f"{name}, {initialen}"
|
|
return name
|
|
|
|
|
|
_ODATA_BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
|
|
_PAGE_SIZE = 250
|
|
|
|
|
|
def fetch_mp_metadata(
|
|
db_path: str,
|
|
odata_url: str = f"{_ODATA_BASE}/FractieZetelPersoon",
|
|
) -> int:
|
|
"""Fetch MP party membership and tenure from OData and upsert into DB.
|
|
|
|
Paginates through all records using $skip. Uses Fractie.Afkorting as
|
|
the party name so it matches the abbreviations used in mp_votes.
|
|
|
|
Returns the number of records processed (inserted or updated).
|
|
"""
|
|
expand = "$expand=FractieZetel($expand=Fractie),Persoon"
|
|
session = requests.Session()
|
|
db = MotionDatabase(db_path)
|
|
processed = 0
|
|
skip = 0
|
|
|
|
while True:
|
|
url = f"{odata_url}?{expand}&$top={_PAGE_SIZE}&$skip={skip}"
|
|
try:
|
|
resp = session.get(url, timeout=30)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
except Exception as e:
|
|
logger.error("Failed to fetch MP metadata (skip=%d): %s", skip, e)
|
|
raise
|
|
|
|
values = data.get("value") if isinstance(data, dict) else None
|
|
if values is None:
|
|
logger.error("Unexpected OData payload at skip=%d; missing 'value'", skip)
|
|
break
|
|
|
|
if not values:
|
|
break # no more pages
|
|
|
|
for item in values:
|
|
try:
|
|
if item.get("Verwijderd"):
|
|
continue
|
|
|
|
persoon = item.get("Persoon") or {}
|
|
fractiezetel = item.get("FractieZetel") or {}
|
|
fractie = fractiezetel.get("Fractie") or {}
|
|
|
|
achternaam = persoon.get("Achternaam")
|
|
initialen = persoon.get("Initialen")
|
|
tussenvoegsel = persoon.get("Tussenvoegsel")
|
|
persoon_id = persoon.get("Id")
|
|
|
|
# Use Afkorting (e.g. "VVD", "GroenLinks-PvdA") to match mp_votes party column
|
|
party = fractie.get("Afkorting") or fractie.get("NaamNL")
|
|
van = item.get("Van")
|
|
tot_en_met = item.get("TotEnMet")
|
|
|
|
if not achternaam:
|
|
logger.debug("Skipping record without achternaam: %s", item)
|
|
continue
|
|
|
|
mp_name = normalize_mp_name(achternaam, initialen, tussenvoegsel)
|
|
|
|
db.upsert_mp_metadata(
|
|
mp_name=mp_name,
|
|
party=party,
|
|
van=van,
|
|
tot_en_met=tot_en_met,
|
|
persoon_id=persoon_id,
|
|
)
|
|
processed += 1
|
|
except Exception:
|
|
logger.exception("Error processing OData item: %s", item)
|
|
|
|
logger.debug("Fetched page skip=%d, got %d records", skip, len(values))
|
|
if len(values) < _PAGE_SIZE:
|
|
break # last page
|
|
skip += _PAGE_SIZE
|
|
|
|
logger.info("Processed %d MP metadata records", processed)
|
|
return processed
|
|
|