@ -18,6 +18,8 @@ from datetime import datetime, timedelta
from typing import Optional , Tuple
from typing import Optional , Tuple
from urllib . parse import urlparse
from urllib . parse import urlparse
import duckdb
sys . path . insert ( 0 , " . " ) # run from project root
sys . path . insert ( 0 , " . " ) # run from project root
from api_client import TweedeKamerAPI
from api_client import TweedeKamerAPI
@ -71,8 +73,6 @@ def update_existing_motions(
Returns :
Returns :
( updated_count , skipped_count ) tuple .
( updated_count , skipped_count ) tuple .
"""
"""
import duckdb
# Read motions with missing body_text
# Read motions with missing body_text
conn_read = duckdb . connect ( db_path , read_only = True )
conn_read = duckdb . connect ( db_path , read_only = True )
rows = conn_read . execute (
rows = conn_read . execute (
@ -89,56 +89,62 @@ def update_existing_motions(
updated = 0
updated = 0
skipped = 0
skipped = 0
for row in rows :
conn_write = duckdb . connect ( db_path , read_only = False )
motion_id , url , title , description = row
try :
for row in rows :
besluit_id = extract_besluit_id ( url or " " )
motion_id , url , title , description = row
if not besluit_id :
print ( f " Skipping motion { motion_id } : cannot extract besluit_id from URL " )
besluit_id = extract_besluit_id ( url or " " )
skipped + = 1
if not besluit_id :
continue
print (
f " Skipping motion { motion_id } : cannot extract besluit_id from URL "
print ( f " Fetching details for motion { motion_id } (besluit_id= { besluit_id } )... " )
)
details = api . _get_motion_details ( besluit_id )
skipped + = 1
continue
if not details or not details . get ( " body_text " ) :
print ( f " Skipping motion { motion_id } : no body_text returned " )
print (
skipped + = 1
f " Fetching details for motion { motion_id } (besluit_id= { besluit_id } )... "
continue
)
details = api . _get_motion_details ( besluit_id )
# Build update: always set body_text; also update title/description if
# they were placeholder values (e.g. "Motion abc12345" or "No description available")
if not details or not details . get ( " body_text " ) :
new_body = details [ " body_text " ]
print ( f " Skipping motion { motion_id } : no body_text returned " )
new_title = title
skipped + = 1
new_desc = description
continue
if title and ( title . startswith ( " Motion " ) or title . startswith ( " Besluit " ) ) :
# Build update: always set body_text; also update title/description if
new_title = details . get ( " title " ) or title
# they were placeholder values (e.g. "Motion abc12345" or "No description available")
new_body = details [ " body_text " ]
if description in (
new_title = title
None ,
new_desc = description
" " ,
" No description available " ,
if title and ( title . startswith ( " Motion " ) or title . startswith ( " Besluit " ) ) :
" Geen beschrijving beschikbaar " ,
new_title = details . get ( " title " ) or title
) :
new_desc = details . get ( " description " ) or description
if description in (
None ,
conn_write = duckdb . connect ( db_path , read_only = False )
" " ,
conn_write . execute (
" No description available " ,
"""
" Geen beschrijving beschikbaar " ,
UPDATE motions
) :
SET body_text = ? , title = ? , description = ?
new_desc = details . get ( " description " ) or description
WHERE id = ?
""" ,
conn_write . execute (
( new_body , new_title , new_desc , motion_id ) ,
"""
)
UPDATE motions
conn_write . close ( )
SET body_text = ? , title = ? , description = ?
WHERE id = ?
""" ,
( new_body , new_title , new_desc , motion_id ) ,
)
updated + = 1
updated + = 1
print ( f " Updated motion { motion_id } " )
print ( f " Updated motion { motion_id } " )
if delay > 0 and updated + skipped < len ( rows ) :
if delay > 0 and updated + skipped < len ( rows ) :
time . sleep ( delay )
time . sleep ( delay )
finally :
conn_write . close ( )
return updated , skipped
return updated , skipped