You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/tests/test_download_script.py

182 lines
5.5 KiB

"""Tests for scripts/download_past_year.py enhancements.
Tests extract_besluit_id helper, update_existing_motions function,
and --skip-details flag wiring.
"""
import sys
import argparse
from unittest.mock import MagicMock, patch, call
import pytest
sys.path.insert(0, ".")
from scripts.download_past_year import extract_besluit_id
# --- extract_besluit_id tests ---
def test_extract_besluit_id_valid():
url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789"
assert extract_besluit_id(url) == "abc123-def456-ghi789"
def test_extract_besluit_id_trailing_slash():
url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789/"
assert extract_besluit_id(url) == "abc123-def456-ghi789"
def test_extract_besluit_id_invalid():
url = "https://example.com/not-a-motion-url"
assert extract_besluit_id(url) is None
def test_extract_besluit_id_empty():
assert extract_besluit_id("") is None
# --- update_existing_motions tests ---
def test_update_existing_motions_updates_body_text(tmp_path):
"""Mock DuckDB + mock API, verify UPDATE is called with correct body_text."""
import duckdb
from scripts.download_past_year import update_existing_motions
db_path = str(tmp_path / "test.db")
# Set up a real DuckDB database with the motions table
conn = duckdb.connect(db_path)
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
conn.execute("""
CREATE TABLE motions (
id INTEGER DEFAULT nextval('motions_id_seq'),
title TEXT NOT NULL,
description TEXT,
date DATE,
policy_area TEXT,
voting_results JSON,
winning_margin FLOAT,
controversy_score FLOAT,
layman_explanation TEXT,
externe_identifier TEXT,
body_text TEXT,
url TEXT UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id)
)
""")
# Insert a motion with missing body_text
conn.execute("""
INSERT INTO motions (title, description, date, url, body_text)
VALUES ('Test Motion', 'desc', '2017-06-01',
'https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/besluit-id-123',
NULL)
""")
conn.close()
# Mock API that returns body_text
mock_api = MagicMock()
mock_api._get_motion_details.return_value = {
"title": "Real Title",
"description": "Real Description",
"date": "2017-06-01",
"externe_identifier": "kst-12345",
"body_text": "constaterende dat de motie gaat over iets belangrijks",
}
updated, skipped = update_existing_motions(
db_path=db_path,
api=mock_api,
start_date="2017-01-01",
end_date="2017-12-31",
delay=0.0,
)
assert updated == 1
assert skipped == 0
# Verify the body_text was actually written to the DB
conn = duckdb.connect(db_path, read_only=True)
row = conn.execute("SELECT body_text FROM motions WHERE id = 1").fetchone()
conn.close()
assert row[0] == "constaterende dat de motie gaat over iets belangrijks"
# Verify the API was called with the correct besluit_id
mock_api._get_motion_details.assert_called_once_with("besluit-id-123")
def test_update_existing_motions_skips_when_no_besluit_id(tmp_path):
"""URL without valid besluit_id is skipped."""
import duckdb
from scripts.download_past_year import update_existing_motions
db_path = str(tmp_path / "test.db")
conn = duckdb.connect(db_path)
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
conn.execute("""
CREATE TABLE motions (
id INTEGER DEFAULT nextval('motions_id_seq'),
title TEXT NOT NULL,
description TEXT,
date DATE,
policy_area TEXT,
voting_results JSON,
winning_margin FLOAT,
controversy_score FLOAT,
layman_explanation TEXT,
externe_identifier TEXT,
body_text TEXT,
url TEXT UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id)
)
""")
# Insert a motion with a URL that won't parse to a besluit_id
conn.execute("""
INSERT INTO motions (title, description, date, url, body_text)
VALUES ('Bad URL Motion', 'desc', '2017-06-01',
'https://example.com/not-a-valid-url',
NULL)
""")
conn.close()
mock_api = MagicMock()
updated, skipped = update_existing_motions(
db_path=db_path,
api=mock_api,
start_date="2017-01-01",
end_date="2017-12-31",
delay=0.0,
)
assert updated == 0
assert skipped == 1
# API should never have been called
mock_api._get_motion_details.assert_not_called()
def test_skip_details_flag_passed_to_api():
"""Verify the argparse flag is wired correctly by parsing args."""
from scripts.download_past_year import build_parser
# Default: skip_details should be True
parser = build_parser()
args = parser.parse_args([])
assert args.skip_details is True
# Explicitly set to false via --no-skip-details
args = parser.parse_args(["--no-skip-details"])
assert args.skip_details is False
# Explicitly set --update-existing
args = parser.parse_args(["--update-existing"])
assert args.update_existing is True
# Default: update_existing should be False
args = parser.parse_args([])
assert args.update_existing is False