@ -18,6 +18,8 @@ from datetime import datetime, timedelta
from typing import Optional , Tuple
from urllib . parse import urlparse
import duckdb
sys . path . insert ( 0 , " . " ) # run from project root
from api_client import TweedeKamerAPI
@ -71,8 +73,6 @@ def update_existing_motions(
Returns :
( updated_count , skipped_count ) tuple .
"""
import duckdb
# Read motions with missing body_text
conn_read = duckdb . connect ( db_path , read_only = True )
rows = conn_read . execute (
@ -89,56 +89,62 @@ def update_existing_motions(
updated = 0
skipped = 0
for row in rows :
motion_id , url , title , description = row
besluit_id = extract_besluit_id ( url or " " )
if not besluit_id :
print ( f " Skipping motion { motion_id } : cannot extract besluit_id from URL " )
skipped + = 1
continue
print ( f " Fetching details for motion { motion_id } (besluit_id= { besluit_id } )... " )
details = api . _get_motion_details ( besluit_id )
if not details or not details . get ( " body_text " ) :
print ( f " Skipping motion { motion_id } : no body_text returned " )
skipped + = 1
continue
# Build update: always set body_text; also update title/description if
# they were placeholder values (e.g. "Motion abc12345" or "No description available")
new_body = details [ " body_text " ]
new_title = title
new_desc = description
if title and ( title . startswith ( " Motion " ) or title . startswith ( " Besluit " ) ) :
new_title = details . get ( " title " ) or title
if description in (
None ,
" " ,
" No description available " ,
" Geen beschrijving beschikbaar " ,
) :
new_desc = details . get ( " description " ) or description
conn_write = duckdb . connect ( db_path , read_only = False )
conn_write . execute (
"""
UPDATE motions
SET body_text = ? , title = ? , description = ?
WHERE id = ?
""" ,
( new_body , new_title , new_desc , motion_id ) ,
)
conn_write . close ( )
conn_write = duckdb . connect ( db_path , read_only = False )
try :
for row in rows :
motion_id , url , title , description = row
besluit_id = extract_besluit_id ( url or " " )
if not besluit_id :
print (
f " Skipping motion { motion_id } : cannot extract besluit_id from URL "
)
skipped + = 1
continue
print (
f " Fetching details for motion { motion_id } (besluit_id= { besluit_id } )... "
)
details = api . _get_motion_details ( besluit_id )
if not details or not details . get ( " body_text " ) :
print ( f " Skipping motion { motion_id } : no body_text returned " )
skipped + = 1
continue
# Build update: always set body_text; also update title/description if
# they were placeholder values (e.g. "Motion abc12345" or "No description available")
new_body = details [ " body_text " ]
new_title = title
new_desc = description
if title and ( title . startswith ( " Motion " ) or title . startswith ( " Besluit " ) ) :
new_title = details . get ( " title " ) or title
if description in (
None ,
" " ,
" No description available " ,
" Geen beschrijving beschikbaar " ,
) :
new_desc = details . get ( " description " ) or description
conn_write . execute (
"""
UPDATE motions
SET body_text = ? , title = ? , description = ?
WHERE id = ?
""" ,
( new_body , new_title , new_desc , motion_id ) ,
)
updated + = 1
print ( f " Updated motion { motion_id } " )
updated + = 1
print ( f " Updated motion { motion_id } " )
if delay > 0 and updated + skipped < len ( rows ) :
time . sleep ( delay )
if delay > 0 and updated + skipped < len ( rows ) :
time . sleep ( delay )
finally :
conn_write . close ( )
return updated , skipped