@ -262,27 +262,27 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
@st . cache_data ( show_spinner = " Scree-plot laden… " )
def load_scree_data ( db_path : str ) - > List [ float ] :
""" Return component importances (L2-norm per SVD dimension ), sorted descending.
""" Return party-level component importances (L2-norm per SVD dim), sorted descending.
Uses ALL individual MP vectors ( entity_type = ' mp ' , window = ' current_parliament ' ) ,
excluding party - aggregated rows . Since the stored vectors are U * s ( scaled by
singular values ) , the L2 - norm of all MP scores per dimension approximates the
singular value for that dimension . Sorting descending gives the proper scree shape .
The current_parliament window contains two separate SVD data spaces :
- Party vectors ( entity_id without comma ) : dims 0 – 15 have political signal
- Individual MP vectors ( entity_id with comma ) : signal in dim 3 + dims 16 – 49
( within - party variance , unrelated to between - party differences )
Note : Procrustes alignment across sub - windows may scramble the original dimension
ordering , so we sort by magnitude rather than relying on dimension index order .
Since the SVD tab axis chart uses party vectors exclusively , the scree plot
must also use party vectors . We filter to entries with L2 - norm > 1 ( excludes
near - empty / historical party entries ) , compute L2 - norm per dim , then sort
descending so the elbow shape is visible .
"""
try :
con = duckdb . connect ( database = db_path , read_only = True )
rows = con . execute (
" SELECT entity_id, vector FROM svd_vectors "
" WHERE entity_type= ' mp ' AND window_id= ' current_parliament ' "
" WHERE entity_type= ' mp ' AND window_id= ' current_parliament ' "
" AND entity_id NOT LIKE ' % , % ' "
) . fetchall ( )
# Individual MPs have "Lastname, F." format; party rows are short codes without commas
vectors : List [ List [ float ] ] = [ ]
for entity_id , raw_vec in rows :
if " , " not in entity_id :
continue # skip party-aggregated rows
if isinstance ( raw_vec , str ) :
vec = json . loads ( raw_vec )
elif isinstance ( raw_vec , ( bytes , bytearray ) ) :
@ -294,7 +294,10 @@ def load_scree_data(db_path: str) -> List[float]:
vec = list ( raw_vec )
except Exception :
continue
vectors . append ( [ float ( v ) if v is not None else 0.0 for v in vec ] )
fvec = [ float ( v ) if v is not None else 0.0 for v in vec ]
l2 = sum ( x * * 2 for x in fvec ) * * 0.5
if l2 > 1.0 : # skip near-empty / historical party entries
vectors . append ( fvec )
if not vectors :
return [ ]
n_dims = len ( vectors [ 0 ] )