@ -49,6 +49,7 @@ PARTY_COLOURS: Dict[str, str] = {
" DENK " : " #00897B " ,
" 50PLUS " : " #7E57C2 " ,
" Volt " : " #572AB7 " ,
" ChristenUnie " : " #0288D1 " ,
" Unknown " : " #9E9E9E " ,
}
@ -69,23 +70,27 @@ KNOWN_MAJOR_PARTIES = [
]
# Current parliament parties (used for party-level SVD lookups)
# Keep both common abbreviations and full names that may appear in the DB
CURRENT_PARLIAMENT_PARTIES = frozenset (
[
" VVD " ,
# Parties currently seated in the Tweede Kamer (2023 election cycle).
# Deze zijn de entity_ids zoals opgeslagen in svd_vectors voor window='2025'.
CURRENT_PARLIAMENT_PARTIES : frozenset [ str ] = frozenset (
{
" PVV " ,
" VVD " ,
" NSC " ,
" BBB " ,
" D66 " ,
" GroenLinks-PvdA " ,
" GroenLinks " ,
" PvdA " ,
" CDA " ,
" SP " ,
" NSC " ,
" CU " ,
" ChristenUnie " ,
" BBB " ,
]
" CU " , # alias for ChristenUnie
" SGP " ,
" Volt " ,
" DENK " ,
" PvdD " ,
" JA21 " ,
" FVD " ,
}
)
@ -207,16 +212,12 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
AND entity_id is a known current - parliament party .
Returns :
{ party_name : [ float * k ] } — k = 50 for the canonical 2025 window
{ party_name : [ float * k ] } — k = 50 for the canonical 2025 window .
Duplicate rows for the same party are de - duplicated ( last row wins ) .
"""
con = None
try :
# Use a deterministic, ordered list for parameter binding
party_list = sorted ( CURRENT_PARLIAMENT_PARTIES )
if not party_list :
return { }
con = duckdb . connect ( database = db_path , read_only = True )
party_list = sorted ( CURRENT_PARLIAMENT_PARTIES )
placeholders = " , " . join ( " ? " for _ in party_list )
rows = con . execute (
f " SELECT entity_id, vector FROM svd_vectors "
@ -224,70 +225,29 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
f " AND entity_id IN ( { placeholders } ) " ,
party_list ,
) . fetchall ( )
out : Dict [ str , List [ float ] ] = { }
for row in rows :
party = row [ 0 ]
vec_field = row [ 1 ]
try :
if vec_field is None :
# skip missing vectors
result : Dict [ str , List [ float ] ] = { }
for entity_id , raw_vec in rows :
if isinstance ( raw_vec , str ) :
vec = json . loads ( raw_vec )
elif isinstance ( raw_vec , ( bytes , bytearray ) ) :
vec = json . loads ( raw_vec . decode ( ) )
elif isinstance ( raw_vec , list ) :
vec = raw_vec
else :
try :
vec = list ( raw_vec )
except Exception :
continue
# string-encoded JSON vector
if isinstance ( vec_field , str ) :
vec = json . loads ( vec_field )
# bytes (some DB drivers may return bytes)
elif isinstance ( vec_field , ( bytes , bytearray ) ) :
try :
vec = json . loads ( vec_field . decode ( " utf-8 " ) )
except Exception :
# fallback: attempt to eval as list-like
vec = list ( vec_field )
# already a list/tuple/np.ndarray-like
elif isinstance ( vec_field , ( list , tuple , np . ndarray ) ) :
vec = list ( vec_field )
else :
# unknown type: attempt best-effort conversion
vec = list ( vec_field )
# ensure all entries are floats
vec_floats = [ float ( x ) for x in vec ]
out [ party ] = vec_floats
except Exception :
# skip malformed rows but keep processing others
logger . debug ( " Skipping malformed vector for party %s " , party )
continue
return out
result [ entity_id ] = [ float ( v ) if v is not None else 0.0 for v in vec ]
return result
except Exception :
logger . exception ( " Failed to load party axis scores " )
return { }
finally :
if con is not None :
try :
con . close ( )
@st . cache_data ( show_spinner = " Moties laden… " )
def load_motions_df ( db_path : str ) - > pd . DataFrame :
""" Load the full motions table as a pandas DataFrame (read-only). """
con = duckdb . connect ( database = db_path , read_only = True )
try :
df = con . execute (
"""
SELECT id , title , description , date , policy_area ,
voting_results , layman_explanation ,
winning_margin , controversy_score , url
FROM motions
"""
) . fetchdf ( )
df [ " date " ] = pd . to_datetime ( df [ " date " ] , errors = " coerce " )
df [ " year " ] = df [ " date " ] . dt . year
return df
except Exception :
logger . exception ( " Failed to load motions " )
return pd . DataFrame ( )
finally :
con . close ( )
except Exception :
pass
def _render_party_axis_chart (
@ -295,114 +255,91 @@ def _render_party_axis_chart(
) - > None :
""" Render a 1D horizontal Plotly scatter of party positions on SVD axis `comp_sel`.
party_scores : mapping party - > list - like vector ( 50 - dim )
comp_sel : 1 - based component index
Each party is plotted at its score on a single horizontal axis ( y = 0 ) .
"""
# Validate component selection
if not isinstance ( comp_sel , int ) or comp_sel < 1 :
st . caption ( " Ongeldige SVD-as geselecteerd. " )
return
if not party_scores :
st . caption ( " Partijdata zijn niet beschikbaar. " )
st . caption ( " _Partijdata niet beschikbaar voor deze as._ " )
return
axis_idx = comp_sel - 1
# Determine maximum available vector dimension to validate selection
max_dim = 0
for v in party_scores . values ( ) :
try :
if isinstance ( v , ( list , tuple , np . ndarray ) ) :
max_dim = max ( max_dim , len ( v ) )
except Exception :
continue
if axis_idx > = max_dim :
st . caption (
f " Geselecteerde component ( { comp_sel } ) valt buiten het bereik van de beschikbare vectoren ( { max_dim } dimensies). "
)
return
parties : List [ str ] = [ ]
xs : List [ float ] = [ ]
axis_idx = comp_sel - 1 # 0-based index into the 50-dim vector
data : list [ dict ] = [ ]
for party , vec in party_scores . items ( ) :
# Ensure vec is indexable/sequence-like
if not isinstance ( vec , ( list , tuple , np . ndarray ) ) :
continue
# safe indexing
if axis_idx > = len ( vec ) :
continue
try :
raw = vec [ axis_idx ]
val = float ( raw )
# filter non-finite values
if not np . isfinite ( val ) :
continue
except Exception :
continue
parties . append ( party )
xs . append ( val )
if axis_idx < len ( vec ) :
data . append ( { " party " : party , " score " : vec [ axis_idx ] } )
if not xs :
st . caption ( " Geen bruikbare partijposities gevonden voor de gekozen SVD-as. " )
if not data :
st . caption ( " _Geen partijscores voor deze as._ " )
return
try :
x_min = float ( min ( xs ) )
x_max = float ( max ( xs ) )
except Exception :
st . caption ( " Onvoldoende gegevens om het asbereik te berekenen. " )
return
# Symmetric padding around the midpoint for balanced visualisation
if x_min == x_max :
padding = 0.5 if x_min == 0 else abs ( x_min ) * 0.1
if padding < = 0 :
padding = 0.5
center = x_min
half = padding
else :
center = ( x_min + x_max ) / 2.0
half = max ( abs ( x_max - center ) , abs ( center - x_min ) )
# add slight visual padding
half = half * 1.15
x_min = center - half
x_max = center + half
# Build horizontal scatter: y is constant (0) but offset for label placement
ys = [ 0 for _ in xs ]
scores = [ d [ " score " ] for d in data ]
parties = [ d [ " party " ] for d in data ]
colours = [ PARTY_COLOURS . get ( p , " #9E9E9E " ) for p in parties ]
hover = [ f " { p } : { s : .3f } " for p , s in zip ( parties , scores ) ]
fig = go . Figure ( )
# Baseline
x_min , x_max = min ( scores ) * 1.15 , max ( scores ) * 1.15
fig . add_trace (
go . Scatter (
x = [ x_min , x_max ] ,
y = [ 0 , 0 ] ,
mode = " lines " ,
line = { " color " : " #cccccc " , " width " : 1 } ,
hoverinfo = " skip " ,
showlegend = False ,
)
)
# Party markers
fig . add_trace (
go . Scatter (
x = xs ,
y = ys ,
x = scores ,
y = [ 0 ] * len ( scores ) ,
mode = " markers+text " ,
text = parties ,
textposition = " top center " ,
marker = dict (
size = 10 , color = [ PARTY_COLOURS . get ( p , " #9E9E9E " ) for p in parties ]
) ,
hovertemplate = " % {text} <br>x: % {x:.3f} <extra></extra> " ,
marker = { " size " : 12 , " color " : colours } ,
hovertext = hover ,
hoverinfo = " text " ,
showlegend = False ,
)
)
fig . update_layout (
title = f " Partijposities op SVD-as { comp_sel } " ,
xaxis_title = " Negatief ← — → Positief " ,
yaxis = dict ( visible = False ) ,
xaxis = dict ( range = [ x_min , x_max ] ) ,
height = 300 ,
margin = dict ( t = 40 , b = 40 , l = 40 , r = 40 ) ,
showlegend = False ,
height = 160 ,
margin = { " l " : 10 , " r " : 10 , " t " : 10 , " b " : 30 } ,
xaxis = {
" title " : " ← Negatieve pool | Positieve pool → " ,
" zeroline " : True ,
" zerolinecolor " : " #aaaaaa " ,
} ,
yaxis = { " visible " : False , " range " : [ - 1 , 2 ] } ,
plot_bgcolor = " white " ,
)
st . plotly_chart ( fig , use_container_width = True )
@st . cache_data ( show_spinner = " Moties laden… " )
def load_motions_df ( db_path : str ) - > pd . DataFrame :
""" Load the full motions table as a pandas DataFrame (read-only). """
con = duckdb . connect ( database = db_path , read_only = True )
try :
df = con . execute (
"""
SELECT id , title , description , date , policy_area ,
voting_results , layman_explanation ,
winning_margin , controversy_score , url
FROM motions
"""
) . fetchdf ( )
df [ " date " ] = pd . to_datetime ( df [ " date " ] , errors = " coerce " )
df [ " year " ] = df [ " date " ] . dt . year
return df
except Exception :
logger . exception ( " Failed to load motions " )
return pd . DataFrame ( )
finally :
con . close ( )
def query_similar (
db_path : str ,
source_motion_id : int ,
@ -1039,62 +976,103 @@ def build_svd_components_tab(db_path: str) -> None:
)
comp_sel = comp_options [ comp_sel_idx ]
# Show theme explanation + poles
# Show theme explanation
theme = SVD_THEMES . get ( comp_sel , { } )
if theme :
st . info ( f " ** { theme [ ' label ' ] } ** — { theme [ ' explanation ' ] } " )
pos = theme . get ( " positive_pole " , " " )
neg = theme . get ( " negative_pole " , " " )
if pos or neg :
pcol , ncol = st . columns ( 2 )
with pcol :
st . success ( f " ▲ **Positieve pool:** { pos } " )
with ncol :
st . error ( f " ▼ **Negatieve pool:** { neg } " )
motions = comp_map . get ( comp_sel , [ ] )
col1 , col2 = st . columns ( [ 1 , 2 ] )
with col1 :
st . markdown ( " **Top-moties (titels)** " )
for m in motions :
mid = m . get ( " motion_id " )
score = m . get ( " score " , 0.0 )
title = m . get ( " title " ) or f " Motie # { mid } "
sign = " ▲ " if score > = 0 else " ▼ "
if st . button ( f " { sign } { mid } : { title [ : 72 ] } " , key = f " btn_ { comp_sel } _ { mid } " ) :
st . session_state [ " svd_selected_mid " ] = mid
with col2 :
sel_mid = st . session_state . get ( " svd_selected_mid " )
if not sel_mid and motions :
sel_mid = motions [ 0 ] . get ( " motion_id " )
if sel_mid :
# fetch motion metadata from DB for completeness
try :
con = duckdb . connect ( database = db_path , read_only = True )
row = con . execute (
" SELECT id, title, date, policy_area, url, body_text FROM motions WHERE id=? " ,
[ int ( sel_mid ) ] ,
) . fetchone ( )
con . close ( )
except Exception :
row = None
# Party axis chart
party_scores = load_party_axis_scores ( db_path )
_render_party_axis_chart ( party_scores , comp_sel )
# Batch-fetch motion details (title, date, policy_area, url, body_text, voting_results)
motion_ids = [ m . get ( " motion_id " ) for m in motions if m . get ( " motion_id " ) is not None ]
motion_details : Dict [ int , tuple ] = { }
if motion_ids :
# Defensively convert motion_ids to integers, skipping invalid values
ids_int : List [ int ] = [ ]
for mid in motion_ids :
try :
ids_int . append ( int ( mid ) )
except Exception :
logger . warning ( " Skipping invalid motion id in SVD batch fetch: %r " , mid )
if row :
st . markdown ( f " ### { row [ 1 ] or f ' Motie # { row [ 0 ] } ' } " )
# If no valid ids remain, skip the DB query
if ids_int :
con = None
try :
date_str = str ( row [ 2 ] ) [ : 10 ]
placeholders = " , " . join ( " ? " for _ in ids_int )
con = duckdb . connect ( database = db_path , read_only = True )
db_rows = con . execute (
f " SELECT id, title, date, policy_area, url, body_text, voting_results "
f " FROM motions WHERE id IN ( { placeholders } ) " ,
ids_int ,
) . fetchall ( )
motion_details = { r [ 0 ] : r for r in db_rows }
except Exception :
date_str = " ? "
st . caption ( f " 📅 { date_str } | { row [ 3 ] } " )
if row [ 4 ] and str ( row [ 4 ] ) . startswith ( " http " ) :
st . markdown ( f " [🔗 Bekijk op Tweede Kamer]( { row [ 4 ] } ) " )
if row [ 5 ] :
with st . expander ( " Toon volledige tekst " ) :
st . write ( row [ 5 ] )
else :
st . info ( f " Metadata not found in DB for motion { sel_mid } " )
logger . exception ( " Failed to batch-fetch motion details " )
finally :
if con :
con . close ( )
# Split motions by pole sign
pos_motions = [ m for m in motions if float ( m . get ( " score " , 0.0 ) ) > = 0 ]
neg_motions = [ m for m in motions if float ( m . get ( " score " , 0.0 ) ) < 0 ]
pos_pole = (
theme . get ( " positive_pole " , " Positieve pool " ) if theme else " Positieve pool "
)
neg_pole = (
theme . get ( " negative_pole " , " Negatieve pool " ) if theme else " Negatieve pool "
)
pcol , ncol = st . columns ( 2 )
with pcol :
st . success ( f " ▲ **Positieve pool:** { pos_pole } " )
for m in pos_motions :
mid = m . get ( " motion_id " )
raw_title = m . get ( " title " ) or f " Motie # { mid } "
with st . expander ( f " ▲ { raw_title [ : 80 ] } " ) :
row = motion_details . get ( int ( mid ) ) if mid is not None else None
if row :
try :
date_str = str ( row [ 2 ] ) [ : 10 ]
except Exception :
date_str = " ? "
st . caption ( f " 📅 { date_str } | { row [ 3 ] or ' — ' } " )
if row [ 4 ] and str ( row [ 4 ] ) . startswith ( " http " ) :
st . markdown ( f " [🔗 Bekijk op Tweede Kamer]( { row [ 4 ] } ) " )
if row [ 5 ] :
with st . expander ( " Toon volledige tekst " ) :
st . write ( row [ 5 ] )
_render_voting_results ( row [ 6 ] )
else :
st . caption ( " _Geen metadata beschikbaar_ " )
with ncol :
st . error ( f " ▼ **Negatieve pool:** { neg_pole } " )
for m in neg_motions :
mid = m . get ( " motion_id " )
raw_title = m . get ( " title " ) or f " Motie # { mid } "
with st . expander ( f " ▼ { raw_title [ : 80 ] } " ) :
row = motion_details . get ( int ( mid ) ) if mid is not None else None
if row :
try :
date_str = str ( row [ 2 ] ) [ : 10 ]
except Exception :
date_str = " ? "
st . caption ( f " 📅 { date_str } | { row [ 3 ] or ' — ' } " )
if row [ 4 ] and str ( row [ 4 ] ) . startswith ( " http " ) :
st . markdown ( f " [🔗 Bekijk op Tweede Kamer]( { row [ 4 ] } ) " )
if row [ 5 ] :
with st . expander ( " Toon volledige tekst " ) :
st . write ( row [ 5 ] )
_render_voting_results ( row [ 6 ] )
else :
st . caption ( " _Geen metadata beschikbaar_ " )
def build_mp_quiz_tab ( db_path : str ) - > None :