You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
5.5 KiB
182 lines
5.5 KiB
"""Tests for scripts/download_past_year.py enhancements.
|
|
|
|
Tests extract_besluit_id helper, update_existing_motions function,
|
|
and --skip-details flag wiring.
|
|
"""
|
|
|
|
import sys
|
|
import argparse
|
|
from unittest.mock import MagicMock, patch, call
|
|
|
|
import pytest
|
|
|
|
sys.path.insert(0, ".")
|
|
|
|
from scripts.download_past_year import extract_besluit_id
|
|
|
|
|
|
# --- extract_besluit_id tests ---
|
|
|
|
|
|
def test_extract_besluit_id_valid():
|
|
url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789"
|
|
assert extract_besluit_id(url) == "abc123-def456-ghi789"
|
|
|
|
|
|
def test_extract_besluit_id_trailing_slash():
|
|
url = "https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/abc123-def456-ghi789/"
|
|
assert extract_besluit_id(url) == "abc123-def456-ghi789"
|
|
|
|
|
|
def test_extract_besluit_id_invalid():
|
|
url = "https://example.com/not-a-motion-url"
|
|
assert extract_besluit_id(url) is None
|
|
|
|
|
|
def test_extract_besluit_id_empty():
|
|
assert extract_besluit_id("") is None
|
|
|
|
|
|
# --- update_existing_motions tests ---
|
|
|
|
|
|
def test_update_existing_motions_updates_body_text(tmp_path):
|
|
"""Mock DuckDB + mock API, verify UPDATE is called with correct body_text."""
|
|
import duckdb
|
|
from scripts.download_past_year import update_existing_motions
|
|
|
|
db_path = str(tmp_path / "test.db")
|
|
|
|
# Set up a real DuckDB database with the motions table
|
|
conn = duckdb.connect(db_path)
|
|
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
|
|
conn.execute("""
|
|
CREATE TABLE motions (
|
|
id INTEGER DEFAULT nextval('motions_id_seq'),
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
date DATE,
|
|
policy_area TEXT,
|
|
voting_results JSON,
|
|
winning_margin FLOAT,
|
|
controversy_score FLOAT,
|
|
layman_explanation TEXT,
|
|
externe_identifier TEXT,
|
|
body_text TEXT,
|
|
url TEXT UNIQUE,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (id)
|
|
)
|
|
""")
|
|
# Insert a motion with missing body_text
|
|
conn.execute("""
|
|
INSERT INTO motions (title, description, date, url, body_text)
|
|
VALUES ('Test Motion', 'desc', '2017-06-01',
|
|
'https://www.tweedekamer.nl/kamerstukken/stemmingsuitslagen/besluit-id-123',
|
|
NULL)
|
|
""")
|
|
conn.close()
|
|
|
|
# Mock API that returns body_text
|
|
mock_api = MagicMock()
|
|
mock_api._get_motion_details.return_value = {
|
|
"title": "Real Title",
|
|
"description": "Real Description",
|
|
"date": "2017-06-01",
|
|
"externe_identifier": "kst-12345",
|
|
"body_text": "constaterende dat de motie gaat over iets belangrijks",
|
|
}
|
|
|
|
updated, skipped = update_existing_motions(
|
|
db_path=db_path,
|
|
api=mock_api,
|
|
start_date="2017-01-01",
|
|
end_date="2017-12-31",
|
|
delay=0.0,
|
|
)
|
|
|
|
assert updated == 1
|
|
assert skipped == 0
|
|
|
|
# Verify the body_text was actually written to the DB
|
|
conn = duckdb.connect(db_path, read_only=True)
|
|
row = conn.execute("SELECT body_text FROM motions WHERE id = 1").fetchone()
|
|
conn.close()
|
|
assert row[0] == "constaterende dat de motie gaat over iets belangrijks"
|
|
|
|
# Verify the API was called with the correct besluit_id
|
|
mock_api._get_motion_details.assert_called_once_with("besluit-id-123")
|
|
|
|
|
|
def test_update_existing_motions_skips_when_no_besluit_id(tmp_path):
|
|
"""URL without valid besluit_id is skipped."""
|
|
import duckdb
|
|
from scripts.download_past_year import update_existing_motions
|
|
|
|
db_path = str(tmp_path / "test.db")
|
|
|
|
conn = duckdb.connect(db_path)
|
|
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
|
|
conn.execute("""
|
|
CREATE TABLE motions (
|
|
id INTEGER DEFAULT nextval('motions_id_seq'),
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
date DATE,
|
|
policy_area TEXT,
|
|
voting_results JSON,
|
|
winning_margin FLOAT,
|
|
controversy_score FLOAT,
|
|
layman_explanation TEXT,
|
|
externe_identifier TEXT,
|
|
body_text TEXT,
|
|
url TEXT UNIQUE,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (id)
|
|
)
|
|
""")
|
|
# Insert a motion with a URL that won't parse to a besluit_id
|
|
conn.execute("""
|
|
INSERT INTO motions (title, description, date, url, body_text)
|
|
VALUES ('Bad URL Motion', 'desc', '2017-06-01',
|
|
'https://example.com/not-a-valid-url',
|
|
NULL)
|
|
""")
|
|
conn.close()
|
|
|
|
mock_api = MagicMock()
|
|
|
|
updated, skipped = update_existing_motions(
|
|
db_path=db_path,
|
|
api=mock_api,
|
|
start_date="2017-01-01",
|
|
end_date="2017-12-31",
|
|
delay=0.0,
|
|
)
|
|
|
|
assert updated == 0
|
|
assert skipped == 1
|
|
# API should never have been called
|
|
mock_api._get_motion_details.assert_not_called()
|
|
|
|
|
|
def test_skip_details_flag_passed_to_api():
|
|
"""Verify the argparse flag is wired correctly by parsing args."""
|
|
from scripts.download_past_year import build_parser
|
|
|
|
# Default: skip_details should be True
|
|
parser = build_parser()
|
|
args = parser.parse_args([])
|
|
assert args.skip_details is True
|
|
|
|
# Explicitly set to false via --no-skip-details
|
|
args = parser.parse_args(["--no-skip-details"])
|
|
assert args.skip_details is False
|
|
|
|
# Explicitly set --update-existing
|
|
args = parser.parse_args(["--update-existing"])
|
|
assert args.update_existing is True
|
|
|
|
# Default: update_existing should be False
|
|
args = parser.parse_args([])
|
|
assert args.update_existing is False
|
|
|