deploy to server

main
Sven Geboers 1 month ago
parent c3f74433b2
commit b5c14d0c65
  1. 2
      .drone.yml
  2. 48
      analysis/political_axis.py
  3. 23
      analysis/trajectory.py
  4. 6
      docker-compose.yml
  5. 487
      explorer.py
  6. 2
      pipeline/run_pipeline.py
  7. 69
      pipeline/svd_pipeline.py
  8. 772
      thoughts/explorer/top_svd_top_motions.json
  9. 202
      thoughts/shared/plans/2026-03-26-motief-deployment-plan.md

@ -28,7 +28,7 @@ steps:
password: ${DEPLOY_PASSWORD} password: ${DEPLOY_PASSWORD}
script: | script: |
set -e set -e
cd /srv/stematlas cd /home/webapps/motief
docker pull ${DOCKER_REGISTRY}/${DRONE_REPO_OWNER}/${DRONE_REPO_NAME}:latest docker pull ${DOCKER_REGISTRY}/${DRONE_REPO_OWNER}/${DRONE_REPO_NAME}:latest
docker-compose pull docker-compose pull
docker-compose up -d docker-compose up -d

@ -174,6 +174,20 @@ def compute_2d_axes(
for wid in window_ids: for wid in window_ids:
raw_window_vecs[wid] = _trajectory._load_mp_vectors_for_window(db_path, wid) raw_window_vecs[wid] = _trajectory._load_mp_vectors_for_window(db_path, wid)
# Pad all vectors to the maximum dimension across windows before Procrustes.
# Some windows (e.g. 2017 with only 30 motions) have lower-rank SVD output
# (dim=29 instead of 50). Padding with zeros lets Procrustes treat all vectors
# as the same dimension, preserving the alignment chain.
if raw_window_vecs:
max_dim = max(v.shape[0] for d in raw_window_vecs.values() for v in d.values())
padded: Dict[str, Dict[str, np.ndarray]] = {}
for wid, d in raw_window_vecs.items():
padded[wid] = {
e: np.pad(v, (0, max_dim - v.shape[0])) if v.shape[0] < max_dim else v
for e, v in d.items()
}
raw_window_vecs = padded
aligned_window_vecs = _trajectory._procrustes_align_windows(raw_window_vecs) aligned_window_vecs = _trajectory._procrustes_align_windows(raw_window_vecs)
# Stack all vectors across windows into a single matrix for PCA if needed # Stack all vectors across windows into a single matrix for PCA if needed
@ -246,10 +260,27 @@ def compute_2d_axes(
# Ensure consistent left/right and progressive/conservative orientation # Ensure consistent left/right and progressive/conservative orientation
# by checking canonical party centroids and flipping axis signs if needed. # by checking canonical party centroids and flipping axis signs if needed.
try: try:
right_parties = {"PVV", "VVD", "FVD", "BBB", "JA21"} right_parties = {
left_parties = {"SP", "PvdA", "GroenLinks", "GroenLinks-PvdA", "DENK"} "PVV",
cons_parties = {"PVV", "VVD", "FVD", "CDA", "SGP", "BBB", "JA21"} "VVD",
"FVD",
"BBB",
"JA21",
"Nieuw Sociaal Contract",
}
left_parties = {"SP", "PvdA", "GL", "GroenLinks", "GroenLinks-PvdA", "DENK"}
cons_parties = {
"PVV",
"VVD",
"FVD",
"CDA",
"SGP",
"BBB",
"JA21",
"Nieuw Sociaal Contract",
}
prog_parties = { prog_parties = {
"GL",
"GroenLinks", "GroenLinks",
"PvdA", "PvdA",
"PvdD", "PvdD",
@ -263,9 +294,12 @@ def compute_2d_axes(
def _centroid_for_party_set(party_set): def _centroid_for_party_set(party_set):
vecs = [] vecs = []
# Party-level centroid vectors (entity_id == party name directly).
for p in party_set: for p in party_set:
if p in ent_to_vec: if p in ent_to_vec:
vecs.append(ent_to_vec[p]) vecs.append(ent_to_vec[p])
# MP-level vectors: mp_metadata stores mp_name in the same
# "Lastname, Initials" format as entity_id in svd_vectors.
try: try:
conn = duckdb.connect(db_path) conn = duckdb.connect(db_path)
rows = conn.execute( rows = conn.execute(
@ -295,7 +329,7 @@ def compute_2d_axes(
) )
axes["x_axis"] = -axes["x_axis"] axes["x_axis"] = -axes["x_axis"]
# Y-axis: progressive vs conservative — prefer positive = conservative # Y-axis: progressive vs conservative — positive Y = progressive
prog_cent = _centroid_for_party_set(prog_parties) prog_cent = _centroid_for_party_set(prog_parties)
cons_cent = _centroid_for_party_set(cons_parties) cons_cent = _centroid_for_party_set(cons_parties)
if prog_cent is not None and cons_cent is not None: if prog_cent is not None and cons_cent is not None:
@ -378,7 +412,11 @@ def compute_2d_axes(
cons_centroid = np.mean(np.vstack(cons_vecs), axis=0) cons_centroid = np.mean(np.vstack(cons_vecs), axis=0)
lr = right_centroid - left_centroid lr = right_centroid - left_centroid
pc = cons_centroid - prog_centroid # Construct progressive-conservative axis so that positive Y corresponds
# to *progressive* positions (consistent with PCA branch where we flip
# the sign to make progressive > conservative). Use prog - cons so a
# positive dot product means closer to the progressive centroid.
pc = prog_centroid - cons_centroid
# Gram-Schmidt: make pc orthogonal to lr # Gram-Schmidt: make pc orthogonal to lr
lr_norm = np.linalg.norm(lr) lr_norm = np.linalg.norm(lr)

@ -56,7 +56,28 @@ def _procrustes_align_windows(
for wid in window_ids[1:]: for wid in window_ids[1:]:
cur = window_vecs[wid] cur = window_vecs[wid]
common = [e for e in cur if e in prev_aligned] # Only consider common entities whose vectors share the same dimensionality
common = [
e
for e in cur
if e in prev_aligned and cur[e].shape == prev_aligned[e].shape
]
# If there are common entities but their vector dimensions differ between
# the current and previously aligned window, skip Procrustes alignment
# for this window rather than raising an exception in orthogonal_procrustes.
if any(
e
for e in cur
if e in prev_aligned and cur[e].shape != prev_aligned[e].shape
):
_logger.debug(
"Procrustes skipped for %s: vector dimensionality mismatch between windows",
wid,
)
result[wid] = cur
prev_aligned = cur
continue
if len(common) < min_overlap: if len(common) < min_overlap:
_logger.debug( _logger.debug(

@ -1,12 +1,12 @@
version: "3.9" version: "3.9"
services: services:
stematlas: motief:
image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest
ports: ports:
- "127.0.0.1:8501:8501" - "127.0.0.1:8501:8501"
volumes: volumes:
- /srv/stematlas/data:/home/app/app/data - ${DATA_DIR:-/home/webapps/motief/data}:/home/app/app/data
restart: unless-stopped restart: unless-stopped
environment: environment:
- PYTHONPATH=/home/app/app - PYTHONPATH=/home/app/app
@ -23,7 +23,7 @@ services:
image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest image: ${DOCKER_REGISTRY}/sgeboers/stemwijzer:latest
command: python scheduler.py command: python scheduler.py
volumes: volumes:
- /srv/stematlas/data:/home/app/app/data - ${DATA_DIR:-/home/webapps/motief/data}:/home/app/app/data
restart: unless-stopped restart: unless-stopped
environment: environment:
- PYTHONPATH=/home/app/app - PYTHONPATH=/home/app/app

@ -100,8 +100,9 @@ _PARTY_NORMALIZE: dict[str, str] = {
"GL": "GroenLinks-PvdA", "GL": "GroenLinks-PvdA",
"GroenLinks": "GroenLinks-PvdA", "GroenLinks": "GroenLinks-PvdA",
"PvdA": "GroenLinks-PvdA", "PvdA": "GroenLinks-PvdA",
"Gündoğan": "GroenLinks-PvdA", # briefly sat with GL-PvdA faction "Gündoğan": "Volt", # confirmed Volt, left parliament 2023-12-05
"Lid Keijzer": "BBB", # Keijzer left CDA, joined BBB cabinet "Lid Keijzer": "BBB", # Keijzer left CDA, joined BBB cabinet
"Groep Markuszower": "PVV", # Markuszower sits with PVV faction
} }
@ -157,7 +158,7 @@ def get_uniform_dim_windows(db_path: str) -> List[str]:
) )
SELECT window_id SELECT window_id
FROM dominant FROM dominant
WHERE dim = 50 AND cnt >= 10 WHERE dim >= 25 AND cnt >= 10
ORDER BY window_id ORDER BY window_id
""" """
).fetchall() ).fetchall()
@ -181,29 +182,30 @@ def load_positions(
""" """
from analysis.political_axis import compute_2d_axes from analysis.political_axis import compute_2d_axes
# Only use windows where all vectors share the same dimension (dim=50). # Always compute PCA on ALL uniform-dim windows (quarterly + annual) so that
# Mixed-dim windows cause np.vstack to fail in compute_2d_axes. # the principal components are determined by the full temporal spread of data.
available = get_uniform_dim_windows(db_path) # Using only annual windows (11) causes PC1 to capture cross-temporal drift
if window_size == "annual": # instead of left-right ideology, resulting in a ~90° rotation.
# Use actual annual windows (no "-Q" suffix, not "current_parliament"). all_available = get_uniform_dim_windows(db_path)
# These are window_ids like "2022", "2023", etc. computed over full calendar years.
# Always include current_parliament as the most-recent anchor.
annual = [w for w in available if "-Q" not in w and w != "current_parliament"]
# Sort so current_parliament comes last (after all year windows)
available = sorted(annual) + (
["current_parliament"] if "current_parliament" in available else []
)
if not available: if not all_available:
return {}, {} return {}, {}
positions_by_window, axis_def = compute_2d_axes( positions_by_window, axis_def = compute_2d_axes(
db_path, db_path,
window_ids=available, window_ids=all_available,
method="pca", method="pca",
pca_residual=True, pca_residual=True,
normalize_vectors=True, normalize_vectors=True,
) )
# Filter displayed windows by window_size AFTER PCA computation.
if window_size == "annual":
annual_keys = set(w for w in all_available if "-Q" not in w)
positions_by_window = {
w: v for w, v in positions_by_window.items() if w in annual_keys
}
return positions_by_window, axis_def return positions_by_window, axis_def
@ -224,6 +226,25 @@ def load_party_map(db_path: str) -> Dict[str, str]:
return {} return {}
@st.cache_data(show_spinner="Actieve Kamerleden laden…")
def load_active_mps(db_path: str) -> set:
"""Return the set of mp_name values that are currently seated in parliament.
An MP is considered active if their mp_metadata row has tot_en_met IS NULL,
meaning they have no recorded end date for their current seat.
"""
try:
con = duckdb.connect(database=db_path, read_only=True)
rows = con.execute(
"SELECT mp_name FROM mp_metadata WHERE tot_en_met IS NULL"
).fetchall()
con.close()
return {r[0] for r in rows}
except Exception:
logger.exception("Failed to load active MPs")
return set()
@st.cache_data(show_spinner="Partijposities op SVD-assen laden…") @st.cache_data(show_spinner="Partijposities op SVD-assen laden…")
def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]: def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
"""Return per-party SVD vectors, computed as mean of individual MP vectors. """Return per-party SVD vectors, computed as mean of individual MP vectors.
@ -241,25 +262,38 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
try: try:
con = duckdb.connect(database=db_path, read_only=True) con = duckdb.connect(database=db_path, read_only=True)
# Dominant party per individual MP from mp_votes (majority-vote assignment) # Build mp → party mapping from mp_metadata (most recent party during current parliament).
party_rows = con.execute( # mp_metadata format: mp_name like "Van Baarle, S.R.T.", party = "GroenLinks-PvdA"
"SELECT mp_name, party, COUNT(*) as n FROM mp_votes " # We take the party record with the latest `van` date (most recent assignment).
"WHERE party IS NOT NULL AND party != '' AND mp_name LIKE '%,%' " meta_rows = con.execute(
"GROUP BY mp_name, party" "SELECT mp_name, party FROM mp_metadata "
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22'"
).fetchall() ).fetchall()
party_counts: Dict[str, Dict[str, int]] = {} # For MPs with multiple records (party switches), keep the one with latest van date.
for mp_name, party, n in party_rows: # Simple approach: last-write-wins per mp_name after sorting by van ascending.
# Normalize variant names to canonical party names mp_party_raw: Dict[str, str] = {}
for mp_name, party in meta_rows:
if mp_name and party:
mp_party_raw[mp_name] = party # later rows (after ORDER BY van) win
# Re-query ordered so latest van wins reliably
meta_ordered = con.execute(
"SELECT mp_name, party FROM mp_metadata "
"WHERE van >= '2023-11-22' OR tot_en_met IS NULL OR tot_en_met >= '2023-11-22' "
"ORDER BY van ASC"
).fetchall()
mp_party_raw = {}
for mp_name, party in meta_ordered:
if mp_name and party:
mp_party_raw[mp_name] = party
# Normalize party names to canonical abbreviations
mp_party: Dict[str, str] = {}
for mp_name, party in mp_party_raw.items():
canonical = _PARTY_NORMALIZE.get(party, party) canonical = _PARTY_NORMALIZE.get(party, party)
party_counts.setdefault(mp_name, {})[canonical] = ( mp_party[mp_name] = canonical
party_counts.setdefault(mp_name, {}).get(canonical, 0) + n
)
mp_party: Dict[str, str] = {
mp: max(counts, key=counts.__getitem__)
for mp, counts in party_counts.items()
}
# Individual MP vectors from current_parliament (all mp rows are now individual MPs) # Individual MP vectors from current_parliament
rows = con.execute( rows = con.execute(
"SELECT entity_id, vector FROM svd_vectors " "SELECT entity_id, vector FROM svd_vectors "
"WHERE entity_type='mp' AND window_id='current_parliament'" "WHERE entity_type='mp' AND window_id='current_parliament'"
@ -301,24 +335,18 @@ def load_party_axis_scores(db_path: str) -> Dict[str, List[float]]:
@st.cache_data(show_spinner="Scree-plot laden…") @st.cache_data(show_spinner="Scree-plot laden…")
def load_scree_data(db_path: str) -> List[float]: def load_scree_data(db_path: str) -> List[float]:
"""Return party-level component importances (L2-norm per SVD dim), sorted descending. """Return per-component importances (L2-norm per SVD dim), sorted descending.
The current_parliament window contains two separate SVD data spaces:
- Party vectors (entity_id without comma): dims 015 have political signal
- Individual MP vectors (entity_id with comma): signal in dim 3 + dims 1649
(within-party variance, unrelated to between-party differences)
Since the SVD tab axis chart uses party vectors exclusively, the scree plot Uses individual MP vectors from current_parliament (entity_id LIKE '%,%').
must also use party vectors. We filter to entries with L2-norm > 1 (excludes Computes L2-norm per SVD dimension across all MPs, then sorts descending
near-empty/historical party entries), compute L2-norm per dim, then sort so the elbow shape is visible in the scree chart.
descending so the elbow shape is visible.
""" """
try: try:
con = duckdb.connect(database=db_path, read_only=True) con = duckdb.connect(database=db_path, read_only=True)
rows = con.execute( rows = con.execute(
"SELECT entity_id, vector FROM svd_vectors " "SELECT entity_id, vector FROM svd_vectors "
"WHERE entity_type='mp' AND window_id='current_parliament' " "WHERE entity_type='mp' AND window_id='current_parliament' "
"AND entity_id NOT LIKE '%,%'" "AND entity_id LIKE '%,%'"
).fetchall() ).fetchall()
vectors: List[List[float]] = [] vectors: List[List[float]] = []
for entity_id, raw_vec in rows: for entity_id, raw_vec in rows:
@ -334,9 +362,7 @@ def load_scree_data(db_path: str) -> List[float]:
except Exception: except Exception:
continue continue
fvec = [float(v) if v is not None else 0.0 for v in vec] fvec = [float(v) if v is not None else 0.0 for v in vec]
l2 = sum(x**2 for x in fvec) ** 0.5 vectors.append(fvec)
if l2 > 1.0: # skip near-empty / historical party entries
vectors.append(fvec)
if not vectors: if not vectors:
return [] return []
n_dims = len(vectors[0]) n_dims = len(vectors[0])
@ -357,10 +383,11 @@ def load_scree_data(db_path: str) -> List[float]:
def _render_scree_plot(importances: List[float], n_show: int = 15) -> None: def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"""Render a bar+line combo chart showing relative SVD component importance. """Render a scree plot showing relative SVD component importance.
Bars show the L2-norm (singular value proxy) per rank; a line connects the tops Highlighted bars for the top-2 components (used in the compass); muted bars
of the bars to make the 'elbow' in the scree curve easy to spot. for the rest. A cumulative-variance dashed line on the same y-axis helps
spot the elbow. A 50 % cumulative threshold line is drawn for reference.
Args: Args:
importances: List of importance values sorted descending (from load_scree_data). importances: List of importance values sorted descending (from load_scree_data).
@ -372,34 +399,88 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
raw = importances[:n_show] raw = importances[:n_show]
data = [v / total * 100 for v in raw] data = [v / total * 100 for v in raw]
ranks = list(range(1, len(data) + 1)) ranks = list(range(1, len(data) + 1))
bar_colour = "#90CAF9"
line_colour = "#1565C0" # Cumulative variance for the dashed overlay line
cumsum = []
running = 0.0
for v in data:
running += v
cumsum.append(running)
# Colour: first 2 bars highlighted (compass axes), rest muted
n_highlight = 2
bar_colours = [
"#1565C0" if i < n_highlight else "#90CAF9" for i in range(len(data))
]
fig = go.Figure() fig = go.Figure()
# Bars
fig.add_trace( fig.add_trace(
go.Bar( go.Bar(
x=ranks, x=ranks,
y=data, y=data,
marker_color=bar_colour, marker_color=bar_colours,
hovertemplate="Rang %{x}<br>%{y:.1f}% van totaal<extra></extra>", hovertemplate="As %{x}<br><b>%{y:.1f}%</b> van totaal<extra></extra>",
showlegend=False, showlegend=False,
) )
) )
# Cumulative variance line (dashed, warm amber)
fig.add_trace( fig.add_trace(
go.Scatter( go.Scatter(
x=ranks, x=ranks,
y=data, y=cumsum,
mode="lines+markers", mode="lines+markers",
line={"color": line_colour, "width": 2}, line={"color": "#F57C00", "width": 2, "dash": "dot"},
marker={"size": 6, "color": line_colour}, marker={"size": 5, "color": "#F57C00"},
hoverinfo="skip", hovertemplate="As %{x}<br>Cumulatief: <b>%{y:.1f}%</b><extra></extra>",
showlegend=False, name="Cumulatief",
showlegend=True,
) )
) )
# 50 % reference line
fig.add_hline(
y=50,
line_dash="dash",
line_color="#BDBDBD",
line_width=1,
annotation_text="50%",
annotation_position="right",
annotation_font_color="#9E9E9E",
annotation_font_size=11,
)
# Annotations on the top-2 bars showing their % value
for i in range(min(n_highlight, len(data))):
fig.add_annotation(
x=ranks[i],
y=data[i] + 0.3,
text=f"{data[i]:.1f}%",
showarrow=False,
font={"size": 11, "color": "#1565C0"},
yanchor="bottom",
)
fig.update_layout( fig.update_layout(
height=220, height=280,
margin={"l": 10, "r": 10, "t": 10, "b": 30}, margin={"l": 10, "r": 50, "t": 30, "b": 40},
title={
"text": "Belang per SVD-as",
"font": {"size": 13, "color": "#555555"},
"x": 0.02,
"xanchor": "left",
},
legend={
"orientation": "h",
"x": 0.5,
"xanchor": "center",
"y": 1.08,
"font": {"size": 11},
},
xaxis={ xaxis={
"title": "Rang", "title": {"text": "As (rang)", "font": {"size": 11}},
"tickmode": "linear", "tickmode": "linear",
"tick0": 1, "tick0": 1,
"dtick": 1, "dtick": 1,
@ -407,15 +488,16 @@ def _render_scree_plot(importances: List[float], n_show: int = 15) -> None:
"showgrid": False, "showgrid": False,
}, },
yaxis={ yaxis={
"title": "% van totale variantie", "title": {"text": "% van totale variantie", "font": {"size": 11}},
"showline": False, "showline": False,
"showgrid": True, "showgrid": True,
"gridcolor": "#eeeeee", "gridcolor": "#eeeeee",
"ticksuffix": "%", "ticksuffix": "%",
"range": [0, max(cumsum) * 1.08],
}, },
plot_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)",
bargap=0.2, bargap=0.25,
) )
st.plotly_chart(fig, use_container_width=True) st.plotly_chart(fig, use_container_width=True)
@ -615,7 +697,8 @@ def build_compass_tab(db_path: str, window_size: str) -> None:
"2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)." "2D projectie van Kamerlid posities op basis van stemgedrag (PCA op SVD-vectoren)."
) )
positions_by_window, axis_def = load_positions(db_path, window_size) # Compass always uses annual windows regardless of the sidebar window_size setting.
positions_by_window, axis_def = load_positions(db_path, "annual")
if not positions_by_window: if not positions_by_window:
st.warning( st.warning(
"Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid." "Geen positiedata beschikbaar. Controleer of de pipeline is gedraaid."
@ -623,12 +706,30 @@ def build_compass_tab(db_path: str, window_size: str) -> None:
return return
party_map = load_party_map(db_path) party_map = load_party_map(db_path)
windows = sorted(positions_by_window.keys()) active_mps = load_active_mps(db_path)
# Sort windows: year windows first (ascending), current_parliament last.
year_windows = sorted(w for w in positions_by_window if w != "current_parliament")
has_current = "current_parliament" in positions_by_window
windows = year_windows + (["current_parliament"] if has_current else [])
# Motion counts per year — sparse years get a warning label.
_SPARSE_YEARS = {"2016", "2017", "2018"}
def _window_label(w: str) -> str:
if w == "current_parliament":
return "Huidig parlement"
if w in _SPARSE_YEARS:
return f"{w}"
return w
col1, col2 = st.columns([3, 1]) col1, col2 = st.columns([3, 1])
with col2: with col2:
window_idx = st.selectbox( window_idx = st.selectbox(
"Tijdsvenster", options=windows, index=len(windows) - 1 "Jaar",
options=windows,
index=len(windows) - 1, # default: current_parliament
format_func=_window_label,
) )
level = st.radio( level = st.radio(
"Weergave", "Weergave",
@ -636,12 +737,25 @@ def build_compass_tab(db_path: str, window_size: str) -> None:
index=0, index=0,
horizontal=True, horizontal=True,
) )
min_mps = st.number_input(
"Min. Kamerleden per partij",
min_value=1,
max_value=20,
value=3,
step=1,
help="Partijen met minder dan dit aantal zetels worden niet weergegeven.",
)
pos = positions_by_window.get(window_idx, {}) pos = positions_by_window.get(window_idx, {})
if not pos: if not pos:
st.info(f"Geen data voor venster {window_idx}") st.info(f"Geen data voor venster {window_idx}")
return return
# For current_parliament, restrict to MPs who are still seated (tot_en_met IS NULL).
# Historical windows include all MPs active at the time — no restriction needed.
if window_idx == "current_parliament":
pos = {mp: xy for mp, xy in pos.items() if mp in active_mps}
# Deduplicate MPs whose names appear both with and without a parenthetical first name, # Deduplicate MPs whose names appear both with and without a parenthetical first name,
# e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and # e.g. "Dijk, J.P." and "Dijk, J.P. (Jimmy)". Keep the canonical (stripped) name and
# average positions if both variants are present. # average positions if both variants are present.
@ -665,12 +779,19 @@ def build_compass_tab(db_path: str, window_size: str) -> None:
df_pos = pd.DataFrame(rows) df_pos = pd.DataFrame(rows)
# Drop parties below the minimum MP threshold (unreliable centroids).
party_counts = df_pos[df_pos["party"] != "Unknown"]["party"].value_counts()
valid_parties = set(party_counts[party_counts >= min_mps].index)
df_pos = df_pos[df_pos["party"].isin(valid_parties)]
if df_pos.empty:
st.info("Geen partijen met genoeg Kamerleden voor dit venster.")
return
if level == "Partijen": if level == "Partijen":
# Aggregate to party centroids # Aggregate to party centroids
df_party = ( df_party = df_pos.groupby("party", as_index=False).agg(
df_pos[df_pos["party"] != "Unknown"] x=("x", "mean"), y=("y", "mean"), n=("name", "count")
.groupby("party", as_index=False)
.agg(x=("x", "mean"), y=("y", "mean"), n=("name", "count"))
) )
df_party["name"] = df_party["party"] df_party["name"] = df_party["party"]
colour_map = { colour_map = {
@ -685,7 +806,7 @@ def build_compass_tab(db_path: str, window_size: str) -> None:
hover_name="party", hover_name="party",
hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True}, hover_data={"party": False, "x": ":.3f", "y": ":.3f", "n": True},
color_discrete_map=colour_map, color_discrete_map=colour_map,
title=f"Politiek Kompas — {window_idx} (partijen)", title=f"Politiek Kompas — {_window_label(window_idx)} (partijen)",
labels={ labels={
"x": "Links ← → Rechts", "x": "Links ← → Rechts",
"y": "Progressief ↑ / Conservatief ↓", "y": "Progressief ↑ / Conservatief ↓",
@ -705,7 +826,7 @@ def build_compass_tab(db_path: str, window_size: str) -> None:
hover_name="name", hover_name="name",
hover_data={"party": True, "x": ":.3f", "y": ":.3f"}, hover_data={"party": True, "x": ":.3f", "y": ":.3f"},
color_discrete_map=colour_map, color_discrete_map=colour_map,
title=f"Politiek Kompas — {window_idx}", title=f"Politiek Kompas — {_window_label(window_idx)}",
labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"}, labels={"x": "Links ← → Rechts", "y": "Progressief ↑ / Conservatief ↓"},
) )
@ -1012,158 +1133,162 @@ def build_svd_components_tab(db_path: str) -> None:
"De dominante dimensie van het parlement: partijen aan de linkerkant (PvdD, GL-PvdA, " "De dominante dimensie van het parlement: partijen aan de linkerkant (PvdD, GL-PvdA, "
"DENK, SP) stemmen progressief — voor sociale voorzieningen, klimaat, internationale " "DENK, SP) stemmen progressief — voor sociale voorzieningen, klimaat, internationale "
"solidariteit — terwijl partijen aan de rechterkant (PVV, NSC, BBB, SGP) inzetten op " "solidariteit — terwijl partijen aan de rechterkant (PVV, NSC, BBB, SGP) inzetten op "
"nationaal belang, migratiebeheer en conservatieve waarden. Dit is de klassieke " "nationaal belang, migratiebeheer en conservatieve waarden. Linkse moties omvatten "
"links-rechts tegenstelling die het meeste verschil in stemgedrag verklaart." "boycots van Israëlische defensiebedrijven, huurverlaging en het oprichten van "
"zorgbuurthuizen; rechtse moties gaan over NAVO-verplichtingen, juridische ruimte voor "
"drones en gaswinningsprojecten. Dit is de klassieke links-rechts tegenstelling die "
"het meeste verschil in stemgedrag verklaart."
), ),
"positive_pole": "Nationalistisch-conservatief: PVV, NSC, BBB, SGP", "positive_pole": "Nationalistisch-conservatief: PVV, NSC, BBB, SGP, VVD",
"negative_pole": "Progressief-links: PvdD, GL-PvdA, DENK, SP", "negative_pole": "Progressief-links: PvdD, GL-PvdA, DENK, SP",
"flip": False, "flip": False,
}, },
2: { 2: {
"label": "Nationalistisch migratiebeleid versus progressief internationaal solidariteitsdenken", "label": "PVV/FVD populistisch isolationisme versus het overige parlement",
"explanation": ( "explanation": (
"Deze as weerspiegelt de spanning tussen een nationalistisch-conservatieve koers " "Deze as isoleert PVV en FVD van alle andere partijen. Aan de positieve kant staan "
"(PVV, BBB, JA21, NSC) die inzet op asielbeperking en nationaal belang, versus een " "moties die artsen vrijpleiten die hydroxychloroquine voorschreven, Syriërs direct "
"links-progressieve koers (SP, PvdD, DENK, GroenLinks) die internationale solidariteit, " "willen terugsturen, geen geld aan Jordanië willen geven en de richtlijn tijdelijke "
"reproductieve rechten, LHBT+-zorg en bescherming van minderheden centraal stelt. " "bescherming voor Oekraïners willen beëindigen. Aan de negatieve kant staan "
"Moties aan de positieve kant beperken asielzoekers en verdedigen nationaal belang " "mainstream-moties van CU, CDA, VVD en NSC over digitale toegankelijkheid, "
"tegenover EU-druk, terwijl moties aan de negatieve kant Israëlisch optreden veroordelen, " "jongerenzorg en zorgstandaarden — partijen die in de positieve ruimte van as 1 "
"Gazaanse slachtoffers helpen, abortus en PrEP vergoeden, en moslims beschermen tegen " "zitten maar hier op één lijn staan met links. Dit is geen links-rechts as maar een "
"discriminatie. Dit is een van de meest fundamentele breuken in de huidige Nederlandse politiek." "populistisch-isolationisme-as: PVV en FVD vormen een eigen cluster dat los staat "
"van de rest van het politieke spectrum."
), ),
"positive_pole": "Asielbeperking, nationaal belang, restrictief migratiebeleid", "positive_pole": "PVV/FVD populistisch isolationisme: anti-EU, anti-Oekraïne, antiwetenschap",
"negative_pole": "Pro-Palestina, progressieve zorgrechten, anti-discriminatie minderheden", "negative_pole": "Gehele overige parlement: mainstream links én rechts",
"flip": False, "flip": False,
}, },
3: { 3: {
"label": "Humanitaire solidariteit en inclusie versus nationalistische handhaving en deregulering", "label": "Sociaal-economisch links versus marktliberaal en landelijk rechts",
"explanation": ( "explanation": (
"Deze as scheidt partijen die inzetten op internationale humanitaire solidariteit " "Deze as weerspiegelt de klassieke sociaal-economische breuklijn. Aan de linkerkant "
"(sanctieverlichting Syrië, medische evacuatie Gazaanse kinderen), sociale inclusie " "staan moties van SP die bezuinigingen op zorg en gemeentefonds willen schrappen, "
"(gelijke financieringstoegang) en pragmatische justitie (slimmer straffen) van partijen " "winstuitkeringen in de zorg willen verbieden en instemmingsrecht bij "
"die pleiten voor strikte strafuitvoering, deregulering via afschaffing nationale koppen, " "ziekenhuisfusies eisen — allemaal gericht op bescherming van publieke voorzieningen. "
"beperking van ngo's in het migratiebeleid en bescherming van eigendomsvrijheid zonder " "Aan de rechterkant staan moties van BBB (wolvenzendering), VVD (langetermijn"
"discriminatieregels. De uitzonderlijk grote scoremagnitudes (±11,7) duiden op een " "investeerders zorg, controversieel verklaren) en NSC (belastingplichtigen 2023/2024) "
"bijzonder scherpe polarisatie, waarbij links-progressieve en christelijk-sociale partijen " "die een marktgerichtere koers voorstaan of agrarische belangen verdedigen. SP scoort "
"(SP, D66, GL, DENK, CU, PvdA, CDA, NSC) tegenover rechts-nationalistisch georiënteerde " "sterk links, VVD en NSC sterk rechts."
"partijen (PVV, BBB, VVD-rechtsflank, JA21) staan. "
"Opvallend: FVD scoort ook aan de solidariteitskant, niet vanuit humanitaire overtuiging "
"maar vanwege hun anti-Amerikaans/pro-Russisch buitenlandbeleid — zij stemden vóór "
"opheffing van Amerikaanse sancties tegen Syrië om dezelfde reden als DENK en SP, "
"maar vanuit een heel andere politieke logica."
), ),
"positive_pole": "Internationale solidariteit, inclusie en pragmatische overheidsinterventie", "positive_pole": "Sociaal-economisch links: publieke zorg, tegengaan marktwerking",
"negative_pole": "Strikte handhaving, deregulering en nationalistisch eigenbelang boven humanitaire verplichtingen", "negative_pole": "Marktliberaal en agrarisch-rechts: VVD, NSC, BBB",
"flip": True, "flip": True,
}, },
4: { 4: {
"label": "Publieke voorzieningen beschermen versus liberale marktwerking", "label": "Christelijk-sociaal centrum versus populistisch-soevereinistisch",
"explanation": ( "explanation": (
"Deze as weerspiegelt de klassieke sociaal-economische tegenstelling tussen links en " "Deze as scheidt christelijk-sociale en gematigde centrumpartijen (CU, CDA, D66) van "
"liberaal-economisch rechts. Aan de positieve kant staan moties van SP en DENK die " "populistisch-soevereinistische partijen (FVD, NSC). Aan de linkerkant staan "
"pleiten voor betaalbare zorg, lage treintarieven, bescherming van politiepersoneel en " "CU-moties over vaderbetrokkenheid, long covid vergoeding en internationale "
"regionale brandweerposten — allemaal gericht op het beschermen van publieke voorzieningen " "samenwerking; aan de rechterkant FVD-moties over het verbieden van pleegzorg bij "
"voor gewone burgers. Aan de negatieve kant staan moties van VVD, D66, Volt en NSC die " "paren van hetzelfde geslacht, een migratiesaldo van max 60.000 en het verlaten van "
"pleiten voor het EU-Mercosur vrijhandelsverdrag en een flexibele kennismigrantenregeling " "de WHO. NSC scoort sterk rechts op deze as door amendementen die evaluaties en "
"ten behoeve van het economisch verdienvermogen. Deze dimensie is politiek betekenisvol " "grondwetswijzigingen (artikel 23) willen blokkeren. Dit is een cultureel-"
"omdat hij de fundamentele vraag raakt of de staat actief moet ingrijpen om collectieve " "institutionele as: vertrouwen in internationale instituties en pluralisme tegenover "
"voorzieningen betaalbaar en toegankelijk te houden, of dat vrije markt en open handel " "soevereinistisch-traditioneel wantrouwen."
"leidend moeten zijn."
), ),
"positive_pole": "Vrije handel, open economie en marktgerichte arbeidsmigratie", "positive_pole": "Christelijk-sociaal en institutioneel: CU, CDA, D66",
"negative_pole": "Staatsbescherming van betaalbare publieke voorzieningen voor iedereen", "negative_pole": "Populistisch-soevereinistisch: FVD, NSC-rechtsflank",
"flip": False, "flip": True,
}, },
5: { 5: {
"label": "Christelijk-conservatief sociaal beleid versus seculier progressief", "label": "Christelijk-conservatief en ruraal sociaal versus seculier-progressief",
"explanation": ( "explanation": (
"Deze as reflecteert de politieke tegenstelling tussen christelijk-conservatieve " "Deze as reflecteert de tegenstelling tussen christelijk-conservatieve en ruraal-"
"partijen (ChristenUnie en SGP) en seculier-progressieve partijen. Aan de positieve " "sociale partijen enerzijds (NSC, CU, SGP, CDA) en seculier-progressieve partijen "
"pool staan moties over bescherming van het gezin, kritische houding tegenover euthanasie " "anderzijds (D66, GL-PvdA, SP). Rechtse moties omvatten vrijwilligers in "
"bij jongeren, ruimte voor kerkgemeenschappen, bescherming van de agrarische sector en " "schuldhulpverlening ondersteunen, maatschappelijke diensttijd koppelen aan "
"een zakelijker onderwijscurriculum. Deze as is politiek betekenisvol omdat de SGP en CU " "arbeidsmarktafstand en WW-duur alleen verkorten met omscholing. Linkse moties "
"consistent vanuit een christelijk-sociale visie stemmen tegenover partijen als D66, " "bepleiten erkenning van meerouderschap, het recht op abortus in het EU-handvest "
"GroenLinks-PvdA en SP die een seculier-progressief beleid voorstaan." "en een nationaal coördinator buitenlandse beïnvloeding. NSC en CU scoren sterk "
"rechts; D66 en GL-PvdA sterk links."
), ),
"positive_pole": "Seculier-progressief: individuele autonomie, progressieve sociale rechten", "positive_pole": "Christelijk-conservatief en ruraal: NSC, CU, SGP, CDA",
"negative_pole": "Christelijk-conservatief: gezin, kerk, leven, traditionele waarden", "negative_pole": "Seculier-progressief: D66, GL-PvdA, SP",
"flip": True, "flip": False,
}, },
6: { 6: {
"label": "Christelijk-sociaal beschermingsbeleid versus links-progressieve systeemkritiek", "label": "Energiepragmatisme en liberale fiscaliteit versus klimaatactivisme en anti-discriminatie",
"explanation": ( "explanation": (
"Deze as scheidt centrum-rechtse, christelijk-sociale partijen (CU, SGP, BBB, VVD) die " "Aan de rechterkant staan moties die kernenergie als CO₂-arm alternatief willen "
"via pragmatisch overheidsbeleid specifieke kwetsbare groepen beschermen — zoals " "erkennen op COP30, lng-capaciteit prefereren boven vulgraadverplichtingen en "
"pgb-budgethouders, christenen in Syrisch terugkeerbeleid en kinderen online — van " "discriminatiemeldpunten willen inventariseren (JA21). Aan de linkerkant staan "
"links-progressieve partijen (SP, GroenLinks) die structurele systeemkritiek leveren op " "moties die fossiele industrie van klimaatconferenties willen weren (GL), de "
"arbeidsmigratiemisstanden, winstbejag in de zorg en internationale solidariteit met " "integratieparadox willen meenemen in beleid en aanvallen van Israël op Libanon "
"gemarginaliseerde groepen. De politieke tegenstelling gaat over de oriëntatie van sociaal " "veroordelen (DENK, SP). FVD en JA21 scoren sterk rechts; GL-PvdA, DENK en SP "
"beleid: doelgroepgericht en institutioneel versus structureel-hervormend en solidair." "sterk links. Dit is een combinatie van energie-ideologie en culturele polarisatie "
"rondom klimaat, integratie en buitenlandspolitiek."
), ),
"positive_pole": "Christelijk-sociaal beschermingsbeleid voor pgb, kinderen en geloofsgroepen", "positive_pole": "Energiepragmatisme, kernenergie, liberale fiscaliteit: FVD, JA21, SGP, CU",
"negative_pole": "Links-progressieve systeemkritiek op zorg, arbeid en internationale solidariteit", "negative_pole": "Klimaatactivisme, anti-discriminatie en internationale verantwoordelijkheid: GL, DENK, SP",
"flip": False, "flip": False,
}, },
7: { 7: {
"label": "Liberaal investeren en defensie versus linkse bescherming en controle", "label": "Pragmatisch coalitiebeleid versus ecologisch-progressief en religieuze bescherming",
"explanation": ( "explanation": (
"Deze as scheidt partijen als D66 en VVD (positief), die inzetten op gerichte " "Aan de rechterkant staan pragmatische coalitiemoties: voedselprijzen inzichtelijk "
"investeringen in sport, wetenschap, defensie en slachtofferhulp, van partijen als SP, " "maken (PVV/CU), papieren schoolboeken behouden (CDA), invoeringstoets voor mkb "
"DENK en NSC (negatief), die nadruk leggen op collectieve sociale bescherming, " "(NSC) en het controversieel verklaren van bepaalde dossiers (VVD). Aan de "
"parlementaire controle over militaire inzet en weren van marktwerking uit publieke " "linkerkant staan progressief-ecologische moties: een landelijk stookverbod (PvdD), "
"sectoren zoals de zorg. De spanning draait om de vraag of de overheid via gerichte " "verbranding van religieuze geschriften strafbaar stellen (DENK), chroom-6 "
"liberale investeringen of via collectivistische regulering en bescherming moet opereren." "schadevergoedingen (SP/D66) en tegenhouden van nieuwe gaswinning (SP). De "
"partijscores zijn smal maar consistent: PvdD, DENK en SP links; CU, NSC en CDA "
"rechts."
), ),
"positive_pole": "Gerichte liberale investeringen in sport, wetenschap en defensie", "positive_pole": "Ecologisch-progressief en religieuze bescherming: PvdD, DENK, SP",
"negative_pole": "Collectieve bescherming, parlementaire controle en anti-marktwerking in zorg", "negative_pole": "Pragmatisch coalitiebeleid: PVV, CU, NSC, CDA, VVD",
"flip": False, "flip": True,
}, },
8: { 8: {
"label": "Confessioneel-sociaal coalitiebeleid versus procedurele blokkade en handhaving", "label": "Pro-Europees defensie en investering versus nationaal-populistisch wantrouwen",
"explanation": ( "explanation": (
"De positieve pool groepeert moties van CU, NSC, SGP en D66 die concreet beleid bepleiten " "Aan de rechterkant staan moties van D66, Volt en CDA die NAVO-militaire mobiliteit "
"op uiteenlopende terreinen: kostendelersnorm afschaffen ten gunste van bijstandsgerechtigden, " "in het Schengengebied regelen, 35% van defensiematerieel Europees willen inkopen "
"arbeidskorting terugdraaien om gepensioneerden te ontzien, Dutch Dome voor vitale " "en een Nationaal Groeifonds-ronde willen lanceren — allemaal pro-Europees en "
"infrastructuurbescherming, stikstofmaatwerk in Regio Foodvalley, versnelling van " "investerings-georiënteerd. Aan de linkerkant staan moties over coronastrategie "
"bouwvergunningverlening en het expliciet stellen van abortusvermindering als beleidsdoel. " "en oversterfte (PVV/BBB), ketenverantwoordelijkheid bij toeslagen (DENK) en "
"De negatieve pool bevat het controversieel verklaren van het coffeeshopketen-experiment " "vraagresponsovereenkomsten zonder opzegtermijn (PVV). SP scoort sterk links door "
"(procedurele blokkade), een handhavingsdoelstelling voor illegaal vuurwerk en een " "wantrouwen jegens institutionele processen; D66 en Volt sterk rechts door hun "
"deregulerende uitzondering voor de topsportsector." "pro-Europese en investeringsgerichte koers."
), ),
"positive_pole": "Coalitie christelijk-sociaal beleid: defensie, stikstofmaatwerk, bouw en ethiek", "positive_pole": "Pro-Europees, NAVO en investering: D66, Volt, CDA",
"negative_pole": "Procedurele blokkade coffeeshop, handhavingsdoelstelling en topsportderegulering", "negative_pole": "Nationaal-populistisch wantrouwen: PVV, SP-controlereflex, DENK",
"flip": False, "flip": False,
}, },
9: { 9: {
"label": "Brede coalitiemeerderheid versus links marktingrijpen zorg", "label": "Gereformeerd-sociaal centrum versus progressief regulerend",
"explanation": ( "explanation": (
"Deze as onderscheidt moties die steun kregen van een brede, rechtse-tot-centrumlinkse " "Aan de linkerkant staan moties van NSC, CU en SGP over naleving van de Financiële-"
"meerderheid (positieve pool) van een specifieke linkse motie (SP/GL-PvdA) die strenge " "verhoudingswet, beperking van arbeidsmigratie binnen de EU, een nieuwe "
"inkomenslimieten voor zorgbestuurders en aandeelhouders wil afdwingen (negatieve pool). " "opleidingsplek voor tandartsen en een actieplan tegen misbruik van "
"De positieve moties omvatten uiteenlopende onderwerpen — defensie, onderwijs, asiel, " "hallucinerende geneesmiddelen. Aan de rechterkant staan moties van PvdD, GL "
"belastingconstructies, natuur — ingediend door partijen als CU, NSC, JA21, D66 en " "en D66: moratorium op geitenstallen, verbod op gokadvertenties in zoekmachines, "
"PvdA/GL, die gezamenlijk door de meerderheid werden gesteund. De negatieve pool " "verduidelijking van voorlopige hechtenis en leegstandbelasting voor woningen. "
"vertegenwoordigt een socialistische marktinterventie (WNT-normen in de zorg) die door " "CU en SGP scoren sterk links; PvdD en D66 sterk rechts. Dit is een as van "
"de rechtse regeringsmeerderheid werd weggestemd." "gereformeerd-sociaal pragmatisme tegenover progressieve regulering."
), ),
"positive_pole": "Breed gedragen beleid door centrum-rechts meerderheidsstemmen", "positive_pole": "Gereformeerd-sociaal centrum: NSC, CU, SGP — naleving, arbeidsmarkt, volksgezondheid",
"negative_pole": "Socialistische inkomensregulering en marktingrijpen in de zorg", "negative_pole": "Progressief regulerend: PvdD, GL, D66 — milieu, wonen, rechtsstaat",
"flip": False, "flip": True,
}, },
10: { 10: {
"label": "Gereguleerde kennismigratie en natuur-landbouwtransitie versus institutionele veiligheid", "label": "Residuele as: individuele dienstverlening versus collectieve handhaving",
"explanation": ( "explanation": (
"De positieve pool groepeert moties die kennismigratie willen beperken tot sectoren met " "De tiende as vangt kleine resterende variantie op na de eerste negen. Aan de "
"aantoonbaar tekort (blauwe kaart), de kalverhouderij willen inkrimpen via dierrechten, " "linkerkant staan moties die individuele dienstverlening verbeteren: minder "
"intensieve teelt op vrijgekochte boerengrond willen voorkomen, en humanitaire druk op " "tijdsintensieve inspectiebezoeken (VVD), een persoonlijk dossier voor ouders "
"Israël inzake Gaza willen uitoefenen. Indieners zijn een brede coalitie van NSC, SP, " "binnen één maand (SP), tegemoetkoming arbeidsongeschikten in stand houden (SP) "
"D66, CU en GL-PvdA. De negatieve pool bevat moties over institutionele coördinatie van " "en een verlaging van de leeftijd voor kindgesprekken (PVV). Aan de rechterkant "
"veiligheidsbeleid (Veilig Thuis), economische soevereiniteit (vitale productie " "staan handhavings- en regelgevingsmoties: aangifteplicht voor scholen bij "
"terughalen naar Nederland) en beperking van anonimiteit bij Woo-verzoeken, ingediend " "veiligheidsincidenten (VVD), rookvrije auto's met kinderen (NSC/CDA) en "
"door NSC, SGP en SP." "beloningsgeld voor tipgevers op de Nationale Opsporingslijst (VVD). De scores "
"zijn klein (max ±6,5) en de coalitie op elke pool is gemengd — dit is geen "
"duidelijke ideologische as maar een restfactor."
), ),
"positive_pole": "Beperkte kennismigratie, natuur-landbouwtransitie en Gaza-humanitair", "positive_pole": "Individuele dienstverlening en ontzorging: VVD, SP, PVV-elementen",
"negative_pole": "Institutionele veiligheidssturing, economisch nationalisme en Woo-beperking", "negative_pole": "Collectieve handhaving en regelgeving: VVD-handhavingsflank, CDA, NSC",
"flip": True, "flip": True,
}, },
} }

@ -202,7 +202,7 @@ def run(args: argparse.Namespace) -> int:
if not dry_run: if not dry_run:
from pipeline.text_pipeline import ensure_text_embeddings from pipeline.text_pipeline import ensure_text_embeddings
stored, existing, no_text, errors = ensure_text_embeddings( stored, existing, no_text, errors, _failed_ids = ensure_text_embeddings(
db_path=db_path, model=args.text_model, batch_size=args.text_batch_size db_path=db_path, model=args.text_model, batch_size=args.text_batch_size
) )
_logger.info( _logger.info(

@ -1,5 +1,6 @@
import json import json
import logging import logging
import re
from typing import Optional, Dict, List, Tuple from typing import Optional, Dict, List, Tuple
import numpy as np import numpy as np
@ -63,6 +64,68 @@ _PARTY_NAME_MAP = {
# Party names for which we have no usable mp_metadata (tiny noise, skip expansion) # Party names for which we have no usable mp_metadata (tiny noise, skip expansion)
_SKIP_PARTIES = {"Brinkman", "Bontes", "Krol", "Van Kooten-Arissen"} _SKIP_PARTIES = {"Brinkman", "Bontes", "Krol", "Van Kooten-Arissen"}
# Special-character corrections for individual vote name parts
_NAME_CHAR_FIXES: Dict[str, str] = {
"Gündogan": "Gündoğan",
}
def _votes_name_to_meta_format(votes_name: str) -> str:
"""Convert an mp_votes individual-record name to mp_metadata canonical format.
mp_votes format: ``{surname} {lowercase_tussenvoegsel}, {initials} ({FirstName})``
e.g. ``Dijk van, I. (Inge)`` ``Van Dijk, I.``
``Beer de, M.E.E.`` ``De Beer, M.E.E.``
``Abassi el, I.`` ``El Abassi, I.``
``Baarle van, S.R.T.`` ``Van Baarle, S.R.T.``
mp_metadata format: ``{Capital_tussenvoegsel} {Achternaam}, {initials}``
Steps:
1. Split on ``, `` name_part, initials_part.
2. Strip parenthetical first name from initials_part.
3. In name_part, isolate trailing lowercase words as tussenvoegsel;
the rest is the achternaam.
4. Reconstruct as ``{Capitalized tussenvoegsel} {achternaam}, {initials}``.
5. Apply special-character fixes.
"""
if "," not in votes_name:
return votes_name
comma_idx = votes_name.index(",")
name_part = votes_name[:comma_idx].strip()
initials_part = votes_name[comma_idx + 1 :].strip()
# Remove parenthetical first name, e.g. "(Inge)" or "(Jan-Willem)"
initials_part = re.sub(r"\s*\([^)]+\)$", "", initials_part).strip()
# Split name_part into words; trailing lowercase words are tussenvoegsel
words = name_part.split()
# Find split point: last run of lowercase words at the end
split = len(words)
for i in range(len(words) - 1, -1, -1):
if words[i][0].islower():
split = i
else:
break
achternaam_words = words[:split]
tussenvoegsel_words = words[split:]
if tussenvoegsel_words:
# Capitalize the first letter of the first tussenvoegsel word
tussenvoegsel_words[0] = tussenvoegsel_words[0].capitalize()
canonical = (
" ".join(tussenvoegsel_words + achternaam_words) + ", " + initials_part
)
else:
canonical = " ".join(achternaam_words) + ", " + initials_part
# Apply special-character fixes
for bad, good in _NAME_CHAR_FIXES.items():
canonical = canonical.replace(bad, good)
return canonical
def _build_expanded_rows( def _build_expanded_rows(
db_path: str, start_date: str, end_date: str db_path: str, start_date: str, end_date: str
@ -136,9 +199,11 @@ def _build_expanded_rows(
all_motion_ids = set(motion_individual.keys()) | set(motion_party.keys()) all_motion_ids = set(motion_individual.keys()) | set(motion_party.keys())
for mid in all_motion_ids: for mid in all_motion_ids:
if mid in motion_individual and motion_individual[mid]: if mid in motion_individual and motion_individual[mid]:
# Motion already has individual MP rows — use them directly, skip party rows # Motion already has individual MP rows — convert to mp_metadata name format,
# then use directly; skip party rows for this motion.
for mp_name, vote, date in motion_individual[mid]: for mp_name, vote, date in motion_individual[mid]:
expanded.append((mid, mp_name, vote, str(date))) canonical_name = _votes_name_to_meta_format(str(mp_name))
expanded.append((mid, canonical_name, vote, str(date)))
else: else:
# Party-only motion — expand each party row to individual MPs # Party-only motion — expand each party row to individual MPs
for party_name, vote, date in motion_party[mid]: for party_name, vote, date in motion_party[mid]:

File diff suppressed because one or more lines are too long

@ -0,0 +1,202 @@
# Deployment Plan: motief.sgeboers.nl
**Date:** 2026-03-26
**Subdomain:** `motief.sgeboers.nl`
**Stack:** Streamlit · DuckDB · Docker · Nginx · Drone CI
**Target:** VPS, `webapps` user at `/home/webapps/motief/`
---
## What's already ready (no changes needed)
- `Dockerfile` — builds `streamlit run Home.py --server.port=8501`
- `docker-compose.yml``motief` + `scheduler` services, `DATA_DIR` env override
- `.drone.yml` — builds image, pushes to registry, SSH-deploys on push to `main`
- `Home.py`, `pages/1_Stemwijzer.py`, `pages/2_Explorer.py` — all exist
---
## Step A — VPS: one-time directory setup
SSH in as `webapps`:
```bash
mkdir -p /home/webapps/motief/data
```
Create `/home/webapps/motief/.env`:
```env
DOCKER_REGISTRY=<your-registry-url>
DOCKER_USERNAME=<registry-user>
DOCKER_PASSWORD=<registry-password>
OPENROUTER_API_KEY=<key>
OPENAI_API_KEY=<key>
```
Copy `docker-compose.yml` into place:
```bash
# From local machine
scp docker-compose.yml webapps@<vps>:/home/webapps/motief/
```
Or just clone the repo there and symlink — either works since Drone will overwrite it.
---
## Step B — Transfer the database
From local machine (~4 GB, takes a few minutes):
```bash
rsync -avz --progress data/motions.db webapps@<vps>:/home/webapps/motief/data/motions.db
```
Do this as close to go-live as possible so the data isn't stale on launch.
---
## Step C — DNS
Add an **A record** in your DNS provider:
```
stematlas → (obsolete, skip)
motief → <VPS IPv4>
```
TTL 300 for the first deploy so you can iterate quickly; bump to 3600 after it's stable.
---
## Step D — Nginx vhost
Create `/etc/nginx/sites-available/motief`:
```nginx
server {
listen 80;
server_name motief.sgeboers.nl;
return 301 https://$host$request_uri;
}
server {
listen 443 ssl;
server_name motief.sgeboers.nl;
ssl_certificate /etc/letsencrypt/live/motief.sgeboers.nl/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/motief.sgeboers.nl/privkey.pem;
# Streamlit requires WebSocket upgrade for live updates
location / {
proxy_pass http://127.0.0.1:8501;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_read_timeout 86400;
}
}
```
Enable and reload:
```bash
sudo ln -s /etc/nginx/sites-available/motief /etc/nginx/sites-enabled/
sudo nginx -t && sudo systemctl reload nginx
```
---
## Step E — TLS cert
```bash
sudo certbot --nginx -d motief.sgeboers.nl
```
(Assumes Certbot is already installed and working for other subdomains.)
---
## Step F — Configure Drone secrets
In the Gitea/Drone repo settings for `sgeboers/stemwijzer`, add:
| Secret | Value |
|--------|-------|
| `DOCKER_REGISTRY` | Your registry URL |
| `DOCKER_USERNAME` | Registry login |
| `DOCKER_PASSWORD` | Registry password |
| `DEPLOY_HOST` | VPS hostname/IP |
| `DEPLOY_SSH_PORT` | SSH port (usually 22) |
| `DEPLOY_USER` | `webapps` |
| `DEPLOY_PASSWORD` | webapps SSH password |
---
## Step G — First deploy
Option 1 — trigger Drone automatically:
```bash
git push origin main
```
Drone builds → pushes image → SSH into VPS → `docker-compose up -d`.
Option 2 — manual first deploy (on VPS):
```bash
cd /home/webapps/motief
docker-compose pull
docker-compose up -d
```
---
## Step H — Verify
```bash
# On VPS
docker-compose -f /home/webapps/motief/docker-compose.yml logs -f motief
# From local browser
open https://motief.sgeboers.nl
```
Checklist:
- [ ] Home.py loads with nav to Stemwijzer and Explorer
- [ ] Compass tab renders with correct party positions (GL-PvdA top-left, PVV bottom-right)
- [ ] SVD tab scree plot shows with highlighted top-2 bars
- [ ] Similarity search returns results
- [ ] Scheduler container is running (`docker-compose ps`)
---
## Ongoing: data updates
The `scheduler` service runs the weekly pipeline inside the container:
- Scrapes new motions from the TK OData API
- Re-embeds new motion text via OpenRouter
- Updates similarity cache
The `motions.db` file on the VPS is the single source of truth — it's bind-mounted into both containers. No cron job needed on the host.
If you ever need to force a full re-run:
```bash
docker-compose exec scheduler python pipeline/run_pipeline.py --db-path data/motions.db
```
---
## Dependency order
```
A (dirs + .env) ─┐
B (rsync DB) ─┤─► G (first deploy) ─► H (verify)
C (DNS) ─┤
D (nginx) ─┤
E (certbot) ─┘
F (Drone secrets) ──► future auto-deploys on push to main
```
Steps A–F can all be done in one SSH session. Total estimated time: **45 minutes** (mostly waiting on rsync).
Loading…
Cancel
Save