|
|
|
|
@ -72,6 +72,46 @@ def get_available_windows(db_path: str) -> List[str]: |
|
|
|
|
con.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False) |
|
|
|
|
def get_uniform_dim_windows(db_path: str) -> List[str]: |
|
|
|
|
"""Return only windows whose vector dimension equals the most common dimension. |
|
|
|
|
|
|
|
|
|
np.vstack requires all vectors to have the same shape. Early or small windows |
|
|
|
|
have lower SVD rank (dim < 50). This helper filters to only windows at the |
|
|
|
|
dominant (max-count) dimension so compute_2d_axes never sees mixed shapes. |
|
|
|
|
""" |
|
|
|
|
con = duckdb.connect(database=db_path, read_only=True) |
|
|
|
|
try: |
|
|
|
|
rows = con.execute( |
|
|
|
|
""" |
|
|
|
|
WITH window_dims AS ( |
|
|
|
|
SELECT DISTINCT ON (window_id) |
|
|
|
|
window_id, |
|
|
|
|
json_array_length(vector) AS dim |
|
|
|
|
FROM svd_vectors |
|
|
|
|
WHERE entity_type = 'mp' |
|
|
|
|
ORDER BY window_id |
|
|
|
|
), |
|
|
|
|
dim_counts AS ( |
|
|
|
|
SELECT dim, COUNT(*) AS cnt FROM window_dims GROUP BY dim |
|
|
|
|
), |
|
|
|
|
dominant AS ( |
|
|
|
|
SELECT dim FROM dim_counts ORDER BY cnt DESC, dim DESC LIMIT 1 |
|
|
|
|
) |
|
|
|
|
SELECT wd.window_id |
|
|
|
|
FROM window_dims wd |
|
|
|
|
JOIN dominant d ON wd.dim = d.dim |
|
|
|
|
ORDER BY wd.window_id |
|
|
|
|
""" |
|
|
|
|
).fetchall() |
|
|
|
|
return [r[0] for r in rows] |
|
|
|
|
except Exception: |
|
|
|
|
logger.exception("Failed to query uniform-dim windows") |
|
|
|
|
return [] |
|
|
|
|
finally: |
|
|
|
|
con.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner="2D posities berekenen (kan even duren)…") |
|
|
|
|
def load_positions( |
|
|
|
|
db_path: str, window_size: str = "quarterly" |
|
|
|
|
@ -84,7 +124,9 @@ def load_positions( |
|
|
|
|
""" |
|
|
|
|
from analysis.political_axis import compute_2d_axes |
|
|
|
|
|
|
|
|
|
available = get_available_windows(db_path) |
|
|
|
|
# Only use windows where all vectors share the same dimension (dim=50). |
|
|
|
|
# Mixed-dim windows cause np.vstack to fail in compute_2d_axes. |
|
|
|
|
available = get_uniform_dim_windows(db_path) |
|
|
|
|
if window_size == "annual": |
|
|
|
|
# Keep only Q4 windows (one representative window per year) |
|
|
|
|
available = [w for w in available if w.endswith("-Q4")] |
|
|
|
|
|