import json import requests import types import pytest try: import duckdb except Exception: pytest.skip( "duckdb not installed, skipping fetch_mp_metadata tests", allow_module_level=True, ) from pipeline.fetch_mp_metadata import fetch_mp_metadata, normalize_mp_name class MockResponse: def __init__(self, data, status_code=200): self._data = data self.status_code = status_code def raise_for_status(self): if not (200 <= self.status_code < 300): raise requests.HTTPError(f"status {self.status_code}") def json(self): return self._data class MockSession: def __init__(self, response): self._response = response def get(self, url): return self._response def test_fetch_mp_metadata_idempotent(tmp_path, monkeypatch): # Prepare canned OData response with two FractieZetelPersoon records data = { "value": [ { "Persoon": { "Achternaam": "Yesilgöz-Zegerius", "Initialen": "D.", "Tussenvoegsel": None, "Id": "guid-1", }, "FractieZetel": {"Fractie": {"NaamNL": "VVD"}}, "Van": "2023-01-01", "TotEnMet": None, }, { "Persoon": { "Achternaam": "Plas", "Initialen": "C.", "Tussenvoegsel": "van der", "Id": "guid-2", }, "FractieZetel": {"Fractie": {"NaamNL": "BBB"}}, "Van": "2023-06-01", "TotEnMet": "2024-01-01", }, ] } mock_resp = MockResponse(data) mock_session = MockSession(mock_resp) # Patch requests.Session to return our mock session monkeypatch.setattr(requests, "Session", lambda: mock_session) db_path = str(tmp_path / "test.db") # First run count = fetch_mp_metadata(db_path=db_path, odata_url="http://example/odata") assert count == 2 # Verify DB contents conn = duckdb.connect(db_path) rows = conn.execute( "SELECT mp_name, party, van, tot_en_met, persoon_id FROM mp_metadata ORDER BY mp_name" ).fetchall() conn.close() assert len(rows) == 2 # Check normalized names assert rows[0][0] == normalize_mp_name("Plas", "C.", "van der") assert rows[0][1] == "BBB" assert str(rows[0][2]) == "2023-06-01" assert str(rows[0][3]) == "2024-01-01" assert rows[0][4] == "guid-2" assert rows[1][0] == normalize_mp_name("Yesilgöz-Zegerius", "D.", None) assert rows[1][1] == "VVD" assert str(rows[1][2]) == "2023-01-01" assert rows[1][3] == None assert rows[1][4] == "guid-1" # Run again to assert idempotence (no exception and same count processed) count2 = fetch_mp_metadata(db_path=db_path, odata_url="http://example/odata") assert count2 == 2