"""Tests for scripts/download_past_year.py enhancements. Tests extract_besluit_id helper, update_existing_motions function, and --skip-details flag wiring. """ import sys import argparse from unittest.mock import MagicMock, patch, call import pytest sys.path.insert(0, ".") from scripts.download_past_year import extract_besluit_id # --- extract_besluit_id tests --- def test_extract_besluit_id_valid(): url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789" assert extract_besluit_id(url) == "abc123-def456-ghi789" def test_extract_besluit_id_trailing_slash(): url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789/" assert extract_besluit_id(url) == "abc123-def456-ghi789" def test_extract_besluit_id_invalid(): url = "https://example.com/not-a-motion-url" assert extract_besluit_id(url) is None def test_extract_besluit_id_empty(): assert extract_besluit_id("") is None # --- update_existing_motions tests --- def test_update_existing_motions_updates_body_text(tmp_path): """Mock DuckDB + mock API, verify UPDATE is called with correct body_text.""" import duckdb from scripts.download_past_year import update_existing_motions db_path = str(tmp_path / "test.db") # Set up a real DuckDB database with the motions table conn = duckdb.connect(db_path) conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1") conn.execute(""" CREATE TABLE motions ( id INTEGER DEFAULT nextval('motions_id_seq'), title TEXT NOT NULL, description TEXT, date DATE, policy_area TEXT, voting_results JSON, winning_margin FLOAT, controversy_score FLOAT, layman_explanation TEXT, externe_identifier TEXT, body_text TEXT, url TEXT UNIQUE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (id) ) """) # Insert a motion with missing body_text conn.execute(""" INSERT INTO motions (title, description, date, url, body_text) VALUES ('Test Motion', 'desc', '2017-06-01', 'https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/besluit-id-123', NULL) """) conn.close() # Mock API that returns body_text mock_api = MagicMock() mock_api._get_motion_details.return_value = { "title": "Real Title", "description": "Real Description", "date": "2017-06-01", "externe_identifier": "kst-12345", "body_text": "constaterende dat de motie gaat over iets belangrijks", } updated, skipped = update_existing_motions( db_path=db_path, api=mock_api, start_date="2017-01-01", end_date="2017-12-31", delay=0.0, ) assert updated == 1 assert skipped == 0 # Verify the body_text was actually written to the DB conn = duckdb.connect(db_path, read_only=True) row = conn.execute("SELECT body_text FROM motions WHERE id = 1").fetchone() conn.close() assert row[0] == "constaterende dat de motie gaat over iets belangrijks" # Verify the API was called with the correct besluit_id mock_api._get_motion_details.assert_called_once_with("besluit-id-123") def test_update_existing_motions_skips_when_no_besluit_id(tmp_path): """URL without valid besluit_id is skipped.""" import duckdb from scripts.download_past_year import update_existing_motions db_path = str(tmp_path / "test.db") conn = duckdb.connect(db_path) conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1") conn.execute(""" CREATE TABLE motions ( id INTEGER DEFAULT nextval('motions_id_seq'), title TEXT NOT NULL, description TEXT, date DATE, policy_area TEXT, voting_results JSON, winning_margin FLOAT, controversy_score FLOAT, layman_explanation TEXT, externe_identifier TEXT, body_text TEXT, url TEXT UNIQUE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (id) ) """) # Insert a motion with a URL that won't parse to a besluit_id conn.execute(""" INSERT INTO motions (title, description, date, url, body_text) VALUES ('Bad URL Motion', 'desc', '2017-06-01', 'https://example.com/not-a-valid-url', NULL) """) conn.close() mock_api = MagicMock() updated, skipped = update_existing_motions( db_path=db_path, api=mock_api, start_date="2017-01-01", end_date="2017-12-31", delay=0.0, ) assert updated == 0 assert skipped == 1 # API should never have been called mock_api._get_motion_details.assert_not_called() def test_skip_details_flag_passed_to_api(): """Verify the argparse flag is wired correctly by parsing args.""" from scripts.download_past_year import build_parser # Default: skip_details should be True parser = build_parser() args = parser.parse_args([]) assert args.skip_details is True # Explicitly set to false via --no-skip-details args = parser.parse_args(["--no-skip-details"]) assert args.skip_details is False # Explicitly set --update-existing args = parser.parse_args(["--update-existing"]) assert args.update_existing is True # Default: update_existing should be False args = parser.parse_args([]) assert args.update_existing is False