diff --git a/analysis/explorer_data.py b/analysis/explorer_data.py index 1ecdab4..8100604 100644 --- a/analysis/explorer_data.py +++ b/analysis/explorer_data.py @@ -64,6 +64,7 @@ _UNIFORM_DIM_SQL = """ SELECT window_id FROM dominant WHERE dim >= 25 AND cnt >= 10 + AND window_id NOT LIKE '%-Q%' ORDER BY window_id """ diff --git a/explorer.py b/explorer.py index a300826..3c83089 100644 --- a/explorer.py +++ b/explorer.py @@ -481,10 +481,7 @@ def load_positions( """ from analysis.political_axis import compute_2d_axes - # Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that - # the principal components are determined by the full temporal spread of data. - # Using only annual windows (11) causes PC1 to capture cross-temporal drift - # instead of left-right ideology, resulting in a ~90° rotation. + # Use only annual windows (quarterly windows are excluded by get_uniform_dim_windows). all_available = get_uniform_dim_windows(db_path) if not all_available: @@ -2715,11 +2712,8 @@ def build_svd_components_tab(db_path: str) -> None: # Render party axis chart (single window or time trajectory) if view_mode == "Tijdtraject" and selected_parties_for_trajectory: # Load party scores for all windows and render time trajectory - # Filter to annual windows only (exclude quarters) available_windows = get_uniform_dim_windows(db_path) - year_windows = sorted( - w for w in available_windows if w != "current_parliament" and "-Q" not in w - ) + year_windows = sorted(w for w in available_windows if w != "current_parliament") has_current = "current_parliament" in available_windows all_windows = year_windows + (["current_parliament"] if has_current else []) diff --git a/scripts/recompute_svd.py b/scripts/recompute_svd.py index 0c1edf6..84a3120 100644 --- a/scripts/recompute_svd.py +++ b/scripts/recompute_svd.py @@ -28,17 +28,20 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess logger = logging.getLogger("recompute_svd") -def quarter_bounds(window_id: str) -> Tuple[str, str]: - # window_id like '2026-Q1' - year, q = window_id.split("-Q") - y = int(year) - qn = int(q) - starts = {1: (1, 1), 2: (4, 1), 3: (7, 1), 4: (10, 1)} - ends = {1: (3, 31), 2: (6, 30), 3: (9, 30), 4: (12, 31)} - s_m, s_d = starts[qn] - e_m, e_d = ends[qn] - start = date(y, s_m, s_d).isoformat() - end = date(y, e_m, e_d).isoformat() +def year_bounds(window_id: str) -> Tuple[str, str]: + """Return (start_date, end_date) for an annual window_id like '2024'. + + Quarterly window IDs (containing '-Q') are not supported — this script + only processes annual windows. + """ + if "-Q" in window_id: + raise ValueError( + f"Quarterly window '{window_id}' is not supported. " + "Only annual windows should be recomputed." + ) + y = int(window_id) + start = date(y, 1, 1).isoformat() + end = date(y, 12, 31).isoformat() return start, end @@ -76,12 +79,14 @@ def main(argv: List[str] | None = None) -> int: db = MotionDatabase(dst) # find windows from original DB via trajectory helper - window_ids = traj._load_window_ids(src) + all_window_ids = traj._load_window_ids(src) + # Only process annual windows — quarterly windows are excluded from all PCA/SVD computation + window_ids = [w for w in all_window_ids if "-Q" not in w] if not window_ids: - logger.error("No windows found in source DB %s", src) + logger.error("No annual windows found in source DB %s", src) return 3 - logger.info("Will recompute SVD for windows: %s", window_ids) + logger.info("Will recompute SVD for annual windows: %s", window_ids) # clear existing svd_vectors rows for these windows in dst DB import duckdb @@ -100,7 +105,7 @@ def main(argv: List[str] | None = None) -> int: # Run SVD per window for wid in window_ids: - start, end = quarter_bounds(wid) + start, end = year_bounds(wid) logger.info("Running SVD for %s (%s -> %s) k=%d", wid, start, end, args.k) res = run_svd_for_window( db=db, window_id=wid, start_date=start, end_date=end, k=args.k