Compare commits

..

No commits in common. 'f8a52ea9b76a07023c9aa4961bfbfbf061f2158e' and 'be4375b30357424a0aa0db3afa242fd74c9a0032' have entirely different histories.

  1. 1
      analysis/explorer_data.py
  2. 21
      explorer.py
  3. 35
      scripts/recompute_svd.py

@ -64,7 +64,6 @@ _UNIFORM_DIM_SQL = """
SELECT window_id SELECT window_id
FROM dominant FROM dominant
WHERE dim >= 25 AND cnt >= 10 WHERE dim >= 25 AND cnt >= 10
AND window_id NOT LIKE '%-Q%'
ORDER BY window_id ORDER BY window_id
""" """

@ -481,7 +481,10 @@ def load_positions(
""" """
from analysis.political_axis import compute_2d_axes from analysis.political_axis import compute_2d_axes
# Use only annual windows (quarterly windows are excluded by get_uniform_dim_windows). # Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
# the principal components are determined by the full temporal spread of data.
# Using only annual windows (11) causes PC1 to capture cross-temporal drift
# instead of left-right ideology, resulting in a ~90° rotation.
all_available = get_uniform_dim_windows(db_path) all_available = get_uniform_dim_windows(db_path)
if not all_available: if not all_available:
@ -633,10 +636,8 @@ def _get_aligned_trajectory_scores(
""" """
from analysis.political_axis import compute_nd_axes from analysis.political_axis import compute_nd_axes
# Get aligned scores for the requested windows via PCA (annual-only, no quarterly) # Get aligned scores for all windows via PCA
scores_by_window, _ = compute_nd_axes( scores_by_window, _ = compute_nd_axes(db_path, n_components=n_components)
db_path, window_ids=windows, n_components=n_components
)
if not scores_by_window: if not scores_by_window:
return {} return {}
@ -2636,10 +2637,7 @@ def build_svd_components_tab(db_path: str) -> None:
"""Get party scores for all N components from aligned PCA positions.""" """Get party scores for all N components from aligned PCA positions."""
from analysis.political_axis import compute_nd_axes from analysis.political_axis import compute_nd_axes
annual_windows = get_uniform_dim_windows(db_path) scores_by_window, _ = compute_nd_axes(db_path, n_components=10)
scores_by_window, _ = compute_nd_axes(
db_path, window_ids=annual_windows, n_components=10
)
window_scores = scores_by_window.get(window, {}) window_scores = scores_by_window.get(window, {})
if not window_scores: if not window_scores:
return {} return {}
@ -2717,8 +2715,11 @@ def build_svd_components_tab(db_path: str) -> None:
# Render party axis chart (single window or time trajectory) # Render party axis chart (single window or time trajectory)
if view_mode == "Tijdtraject" and selected_parties_for_trajectory: if view_mode == "Tijdtraject" and selected_parties_for_trajectory:
# Load party scores for all windows and render time trajectory # Load party scores for all windows and render time trajectory
# Filter to annual windows only (exclude quarters)
available_windows = get_uniform_dim_windows(db_path) available_windows = get_uniform_dim_windows(db_path)
year_windows = sorted(w for w in available_windows if w != "current_parliament") year_windows = sorted(
w for w in available_windows if w != "current_parliament" and "-Q" not in w
)
has_current = "current_parliament" in available_windows has_current = "current_parliament" in available_windows
all_windows = year_windows + (["current_parliament"] if has_current else []) all_windows = year_windows + (["current_parliament"] if has_current else [])

@ -28,20 +28,17 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
logger = logging.getLogger("recompute_svd") logger = logging.getLogger("recompute_svd")
def year_bounds(window_id: str) -> Tuple[str, str]: def quarter_bounds(window_id: str) -> Tuple[str, str]:
"""Return (start_date, end_date) for an annual window_id like '2024'. # window_id like '2026-Q1'
year, q = window_id.split("-Q")
Quarterly window IDs (containing '-Q') are not supported this script y = int(year)
only processes annual windows. qn = int(q)
""" starts = {1: (1, 1), 2: (4, 1), 3: (7, 1), 4: (10, 1)}
if "-Q" in window_id: ends = {1: (3, 31), 2: (6, 30), 3: (9, 30), 4: (12, 31)}
raise ValueError( s_m, s_d = starts[qn]
f"Quarterly window '{window_id}' is not supported. " e_m, e_d = ends[qn]
"Only annual windows should be recomputed." start = date(y, s_m, s_d).isoformat()
) end = date(y, e_m, e_d).isoformat()
y = int(window_id)
start = date(y, 1, 1).isoformat()
end = date(y, 12, 31).isoformat()
return start, end return start, end
@ -79,14 +76,12 @@ def main(argv: List[str] | None = None) -> int:
db = MotionDatabase(dst) db = MotionDatabase(dst)
# find windows from original DB via trajectory helper # find windows from original DB via trajectory helper
all_window_ids = traj._load_window_ids(src) window_ids = traj._load_window_ids(src)
# Only process annual windows — quarterly windows are excluded from all PCA/SVD computation
window_ids = [w for w in all_window_ids if "-Q" not in w]
if not window_ids: if not window_ids:
logger.error("No annual windows found in source DB %s", src) logger.error("No windows found in source DB %s", src)
return 3 return 3
logger.info("Will recompute SVD for annual windows: %s", window_ids) logger.info("Will recompute SVD for windows: %s", window_ids)
# clear existing svd_vectors rows for these windows in dst DB # clear existing svd_vectors rows for these windows in dst DB
import duckdb import duckdb
@ -105,7 +100,7 @@ def main(argv: List[str] | None = None) -> int:
# Run SVD per window # Run SVD per window
for wid in window_ids: for wid in window_ids:
start, end = year_bounds(wid) start, end = quarter_bounds(wid)
logger.info("Running SVD for %s (%s -> %s) k=%d", wid, start, end, args.k) logger.info("Running SVD for %s (%s -> %s) k=%d", wid, start, end, args.k)
res = run_svd_for_window( res = run_svd_for_window(
db=db, window_id=wid, start_date=start, end_date=end, k=args.k db=db, window_id=wid, start_date=start, end_date=end, k=args.k

Loading…
Cancel
Save