import json import requests import types import pytest try: import duckdb except Exception: pytest.skip( "duckdb not installed, skipping fetch_mp_metadata tests", allow_module_level=True, ) from pipeline.fetch_mp_metadata import fetch_mp_metadata, normalize_mp_name class MockResponse: def __init__(self, data, status_code=200): self._data = data self.status_code = status_code def raise_for_status(self): if not (200 <= self.status_code < 300): raise requests.HTTPError(f"status {self.status_code}") def json(self): return self._data class MockSession: """Session mock that returns a data page on first call and empty page on second.""" def __init__(self, data_page): self._pages = [data_page, {"value": []}] self._call = 0 def get(self, url, **kwargs): resp = MockResponse(self._pages[min(self._call, len(self._pages) - 1)]) self._call += 1 return resp def test_fetch_mp_metadata_idempotent(tmp_path, monkeypatch): # Prepare canned OData response with two FractieZetelPersoon records. # Use Afkorting (not NaamNL) because fetch_mp_metadata prefers Afkorting. data = { "value": [ { "Verwijderd": False, "Persoon": { "Achternaam": "Yesilgöz-Zegerius", "Initialen": "D.", "Tussenvoegsel": None, "Id": "guid-1", }, "FractieZetel": { "Fractie": { "Afkorting": "VVD", "NaamNL": "Volkspartij voor Vrijheid en Democratie", } }, "Van": "2023-01-01", "TotEnMet": None, }, { "Verwijderd": False, "Persoon": { "Achternaam": "Plas", "Initialen": "C.", "Tussenvoegsel": "van der", "Id": "guid-2", }, "FractieZetel": { "Fractie": {"Afkorting": "BBB", "NaamNL": "BoerBurgerBeweging"} }, "Van": "2023-06-01", "TotEnMet": "2024-01-01", }, ] } mock_session = MockSession(data) # Patch requests.Session to return our mock session monkeypatch.setattr(requests, "Session", lambda: mock_session) db_path = str(tmp_path / "test.db") # First run count = fetch_mp_metadata(db_path=db_path, odata_url="http://example/odata") assert count == 2 # Verify DB contents conn = duckdb.connect(db_path) rows = conn.execute( "SELECT mp_name, party, van, tot_en_met, persoon_id FROM mp_metadata ORDER BY mp_name" ).fetchall() conn.close() assert len(rows) == 2 # Check normalized names assert rows[0][0] == normalize_mp_name("Plas", "C.", "van der") assert rows[0][1] == "BBB" assert str(rows[0][2]) == "2023-06-01" assert str(rows[0][3]) == "2024-01-01" assert rows[0][4] == "guid-2" assert rows[1][0] == normalize_mp_name("Yesilgöz-Zegerius", "D.", None) assert rows[1][1] == "VVD" assert str(rows[1][2]) == "2023-01-01" assert rows[1][3] == None assert rows[1][4] == "guid-1" # Run again to assert idempotence: same records processed, DB unchanged monkeypatch.setattr(requests, "Session", lambda: MockSession(data)) count2 = fetch_mp_metadata(db_path=db_path, odata_url="http://example/odata") assert count2 == 2