Compare commits

...

2 Commits

Author SHA1 Message Date
Sven Geboers f8a52ea9b7 fix: pass annual-only windows to compute_nd_axes in SVD components tab 2 weeks ago
Sven Geboers 62d8e15e03 fix: exclude quarterly windows from all PCA/SVD computation 2 weeks ago
  1. 1
      analysis/explorer_data.py
  2. 21
      explorer.py
  3. 35
      scripts/recompute_svd.py

@ -64,6 +64,7 @@ _UNIFORM_DIM_SQL = """
SELECT window_id
FROM dominant
WHERE dim >= 25 AND cnt >= 10
AND window_id NOT LIKE '%-Q%'
ORDER BY window_id
"""

@ -481,10 +481,7 @@ def load_positions(
"""
from analysis.political_axis import compute_2d_axes
# Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
# the principal components are determined by the full temporal spread of data.
# Using only annual windows (11) causes PC1 to capture cross-temporal drift
# instead of left-right ideology, resulting in a ~90° rotation.
# Use only annual windows (quarterly windows are excluded by get_uniform_dim_windows).
all_available = get_uniform_dim_windows(db_path)
if not all_available:
@ -636,8 +633,10 @@ def _get_aligned_trajectory_scores(
"""
from analysis.political_axis import compute_nd_axes
# Get aligned scores for all windows via PCA
scores_by_window, _ = compute_nd_axes(db_path, n_components=n_components)
# Get aligned scores for the requested windows via PCA (annual-only, no quarterly)
scores_by_window, _ = compute_nd_axes(
db_path, window_ids=windows, n_components=n_components
)
if not scores_by_window:
return {}
@ -2637,7 +2636,10 @@ def build_svd_components_tab(db_path: str) -> None:
"""Get party scores for all N components from aligned PCA positions."""
from analysis.political_axis import compute_nd_axes
scores_by_window, _ = compute_nd_axes(db_path, n_components=10)
annual_windows = get_uniform_dim_windows(db_path)
scores_by_window, _ = compute_nd_axes(
db_path, window_ids=annual_windows, n_components=10
)
window_scores = scores_by_window.get(window, {})
if not window_scores:
return {}
@ -2715,11 +2717,8 @@ def build_svd_components_tab(db_path: str) -> None:
# Render party axis chart (single window or time trajectory)
if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
# Load party scores for all windows and render time trajectory
# Filter to annual windows only (exclude quarters)
available_windows = get_uniform_dim_windows(db_path)
year_windows = sorted(
w for w in available_windows if w != "current_parliament" and "-Q" not in w
)
year_windows = sorted(w for w in available_windows if w != "current_parliament")
has_current = "current_parliament" in available_windows
all_windows = year_windows + (["current_parliament"] if has_current else [])

@ -28,17 +28,20 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
logger = logging.getLogger("recompute_svd")
def quarter_bounds(window_id: str) -> Tuple[str, str]:
# window_id like '2026-Q1'
year, q = window_id.split("-Q")
y = int(year)
qn = int(q)
starts = {1: (1, 1), 2: (4, 1), 3: (7, 1), 4: (10, 1)}
ends = {1: (3, 31), 2: (6, 30), 3: (9, 30), 4: (12, 31)}
s_m, s_d = starts[qn]
e_m, e_d = ends[qn]
start = date(y, s_m, s_d).isoformat()
end = date(y, e_m, e_d).isoformat()
def year_bounds(window_id: str) -> Tuple[str, str]:
"""Return (start_date, end_date) for an annual window_id like '2024'.
Quarterly window IDs (containing '-Q') are not supported this script
only processes annual windows.
"""
if "-Q" in window_id:
raise ValueError(
f"Quarterly window '{window_id}' is not supported. "
"Only annual windows should be recomputed."
)
y = int(window_id)
start = date(y, 1, 1).isoformat()
end = date(y, 12, 31).isoformat()
return start, end
@ -76,12 +79,14 @@ def main(argv: List[str] | None = None) -> int:
db = MotionDatabase(dst)
# find windows from original DB via trajectory helper
window_ids = traj._load_window_ids(src)
all_window_ids = traj._load_window_ids(src)
# Only process annual windows — quarterly windows are excluded from all PCA/SVD computation
window_ids = [w for w in all_window_ids if "-Q" not in w]
if not window_ids:
logger.error("No windows found in source DB %s", src)
logger.error("No annual windows found in source DB %s", src)
return 3
logger.info("Will recompute SVD for windows: %s", window_ids)
logger.info("Will recompute SVD for annual windows: %s", window_ids)
# clear existing svd_vectors rows for these windows in dst DB
import duckdb
@ -100,7 +105,7 @@ def main(argv: List[str] | None = None) -> int:
# Run SVD per window
for wid in window_ids:
start, end = quarter_bounds(wid)
start, end = year_bounds(wid)
logger.info("Running SVD for %s (%s -> %s) k=%d", wid, start, end, args.k)
res = run_svd_for_window(
db=db, window_id=wid, start_date=start, end_date=end, k=args.k

Loading…
Cancel
Save