From c6f85406713918f4702b29c8be0c2a35092c470d Mon Sep 17 00:00:00 2001 From: Sven Geboers Date: Tue, 5 May 2026 21:14:11 +0200 Subject: [PATCH] feat(right-wing): derive right-wing keywords via differential TF-IDF Implements U1: derive_keywords.py uses party voting patterns to classify motions as right-wing vs left-wing, then computes differential TF-IDF on cleaned motion titles to surface policy terms distinctive to right-wing motions. Key design choices: - Vote threshold: 60% of parties in group must vote 'voor' - Text cleaning strips motion prefixes aggressively (handles multi-word surnames, plural 'leden', t.v.v. parentheticals) - Expanded Dutch stopword list filters procedural and generic noise - Results written to analysis/right_wing/right_wing_keywords.json Produces ~50 filtered terms including: asielzoekers, defensie, kernenergie, boeren, vreemdelingenbeleid, stikstof, asielstop, strafrecht. --- analysis/right_wing/derive_keywords.py | 364 ++++++++++ analysis/right_wing/right_wing_keywords.json | 664 +++++++++++++++++++ 2 files changed, 1028 insertions(+) create mode 100644 analysis/right_wing/derive_keywords.py create mode 100644 analysis/right_wing/right_wing_keywords.json diff --git a/analysis/right_wing/derive_keywords.py b/analysis/right_wing/derive_keywords.py new file mode 100644 index 0000000..a0a8188 --- /dev/null +++ b/analysis/right_wing/derive_keywords.py @@ -0,0 +1,364 @@ +"""Derive a right-wing keyword taxonomy from motion titles using TF-IDF. + +Identifies motions where canonical right-wing parties vote predominantly 'voor', +contrasts them with left-wing control motions, and extracts distinctive terms +via differential TF-IDF. + +Usage: + uv run python analysis/right_wing/derive_keywords.py + uv run python analysis/right_wing/derive_keywords.py --db data/motions.db +""" + +from __future__ import annotations + +import argparse +import json +import logging +import re +import sys +from pathlib import Path +from typing import Any + +import duckdb + +# Ensure project root is on path for imports +ROOT = Path(__file__).parent.parent.parent.resolve() +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from analysis.config import CANONICAL_LEFT, CANONICAL_RIGHT, _PARTY_NORMALIZE + +logger = logging.getLogger("derive_keywords") +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + +# Dutch stopwords — expanded from derive_svd_labels.py +DUTCH_STOPWORDS = frozenset( + { + "de", "het", "een", "van", "en", "in", "is", "dat", "op", "te", "voor", + "met", "zijn", "aan", "niet", "om", "ook", "als", "maar", "bij", "door", + "over", "naar", "uit", "dan", "was", "worden", "dit", "die", "zou", + "kunnen", "moet", "heeft", "hun", "nog", "wel", "meer", "of", "tegen", + "onder", "geen", "alle", "zal", "er", "zich", "na", "tot", "omdat", + "hoe", "wat", "wie", "waar", "waarom", "kan", "motie", "lid", "leden", + "c.s.", "over", "verzoekt", "regering", "kamer", "vaststelling", + "begrotingsstaten", "ministerie", "jaar", "voorstel", "wijziging", + "amendement", "gewijzigde", "nader", "gewest", "artikel", "eerste", + "tweede", "derde", "vierde", "nummer", "nr", "ontvangen", "datum", + "voorgesteld", "beraadslaging", "overwegende", "constaterende", + "betreffende", "inzake", "tot", "ten", "aanzien", "verzoeken", + "besluiten", "kamerstuk", "procedure", "procedurele", "technische", + "parlementaire", "parlement", "staten", "generaal", "minister", + "ministers", "staatssecretaris", "staatssecretarissen", "kabinet", + # Parliamentary procedural terms + "gehoord", "uitspreken", "aangenomen", "spreekt", "roept", + "verzoekt", "verzoeken", "stelt", "stellen", "besluiten", + "overwegende", "constaterende", "ontvangen", "voorgesteld", + # Generic function words + "gaat", "dag", "mogelijk", "direct", "per", "open", "hoger", + "zien", "zetten", "stoppen", "intrekken", "toestand", "land", + "orde", "enz", "nota", "gebruik", "gebruikte", "gebruiken", + "moeten", "willen", "kunnen", "zullen", "zou", "zouden", + "worden", "wordt", "waren", "was", "werd", "werden", + "heeft", "hebben", "had", "hadden", + # National/generic terms + "nederland", "nederlandse", "nederlands", "nationale", "rijks", + "financiën", "financieel", "financiële", + # Politician names (right-wing) — filter as noise + "wilders", "baudet", "haga", "eerdmans", "plas", "kops", + "smolders", "vanderplas", "vangaal", "houwelingen", "bontes", + "van", "der", "den", "de", "het", "ten", + # More pronouns / generic verbs + "wij", "we", "jullie", "u", "jou", "jouw", + "weer", "terug", "geven", "voeren", "doen", "maken", "komen", + "gaan", "staan", "zitten", "liggen", "brengen", "nemen", + "laten", "zien", "houden", "vinden", "worden", + # More noise + "onze", "taak", "stemmen", "box", "openen", "jong", "voornemens", + # More politician names + "roon", "maeijer", "emiel", "eppink", + } +) + +# Generic parliamentary terms to filter from final keyword list +GENERIC_TERMS = frozenset( + { + "motie", "amendement", "voorstel", "wijziging", "lid", "leden", + "kamer", "regering", "ministerie", "minister", "staatssecretaris", + "kabinet", "parlement", "parlementaire", "procedure", "technische", + "procedurele", "beraadslaging", "vaststelling", "begrotingsstaten", + "artikel", "nummer", "nr", "jaar", "datum", "ontvangen", "voorgesteld", + "overwegende", "constaterende", "verzoekt", "verzoeken", "besluiten", + "c.s.", "gewest", "eerste", "tweede", "derde", "vierde", + "kamerstuk", "staten", "generaal", "ministers", "staatssecretarissen", + "gewijzigde", "nader", "gewijzigd", + # Additional procedural / generic noise + "gehoord", "uitspreken", "aangenomen", "spreekt", "roept", "roeptop", + "verzoekt", "verzoeken", "besluiten", "stelt", "stellen", + "overwegende", "constaterende", "ontvangen", "voorgesteld", + "gaat", "dag", "mogelijk", "direct", "per", "open", "hoger", + "zien", "zetten", "stoppen", "intrekken", "toestand", "land", + "orde", "enz", "nota", "gebruik", "gebruikte", "gebruiken", + "nederland", "nederlandse", "nederlands", "nationale", "rijks", + "financiën", "financieel", "financiële", + "wilders", "baudet", "haga", "eerdmans", "plas", "kops", + "smolders", "vanderplas", "vangaal", + } +) + + +def _clean_text(text: str) -> str: + """Normalize motion text for TF-IDF: lowercase, strip prefixes, remove noise.""" + text = text.lower() + # Strip motion prefixes aggressively. + # Patterns: + # "Motie van het lid [Name] c.s. over " + # "Motie van het lid [Name] over " + # "Motie van de leden [Name] en [Name] over " + # "Gewijzigde motie van het lid [Name] (t.v.v. ...) over " + # "Amendement van het lid [Name] over " + # "Voorstel tot wijziging van ... over " + # Use non-greedy match up to "over" or end of prefix. + text = re.sub( + r"^(?:gewijzigde\s+|nader\s+gewijzigde\s+)?(?:motie|amendement|voorstel)" + r"(?:\s+van\s+(?:het\s+lid|de\s+leden)\s+[^()]*?)(?:\s+c\.s\.)?" + r"(?:\s+\(t\.v\.v\.[^)]*\))?\s+over\s+", + "", + text, + ) + # Fallback for any remaining "van het lid ..." fragments + text = re.sub(r"van\s+(?:het\s+lid|de\s+leden)\s+\w+(?:\s+\w+)*\s+(?:c\.s\.)?\s*", " ", text) + # Remove parentheticals, punctuation, digits + text = re.sub(r"\(.*?\)", " ", text) + text = re.sub(r"[^\w\s]", " ", text) + text = re.sub(r"\d+", " ", text) + # Collapse whitespace + text = re.sub(r"\s+", " ", text) + return text.strip() + + +def _tokenize(text: str) -> list[str]: + """Split cleaned text into tokens, filtering stopwords and short words.""" + return [ + w for w in text.split() + if len(w) > 2 and w not in DUTCH_STOPWORDS + ] + + +def _load_party_votes( + con: duckdb.DuckDBPyConnection, +) -> dict[int, dict[str, dict[str, int]]]: + """Load aggregated party votes per motion. + + Returns: {motion_id: {party: {'voor': int, 'tegen': int, 'afwezig': int}}} + """ + rows = con.execute( + """ + SELECT motion_id, party, vote, COUNT(*) as n + FROM mp_votes + WHERE party IS NOT NULL + GROUP BY motion_id, party, vote + """ + ).fetchall() + + result: dict[int, dict[str, dict[str, int]]] = {} + for motion_id, party, vote, n in rows: + normalized = _PARTY_NORMALIZE.get(party, party) + motion_votes = result.setdefault(motion_id, {}) + party_votes = motion_votes.setdefault(normalized, {"voor": 0, "tegen": 0, "afwezig": 0}) + party_votes[vote] = party_votes.get(vote, 0) + n + return result + + +def _compute_group_support( + motion_votes: dict[str, dict[str, int]], + party_set: frozenset[str], + threshold: float = 0.60, +) -> bool: + """Return True if >= threshold of parties in party_set voted 'voor'.""" + total_parties = 0 + supporting_parties = 0 + for party, votes in motion_votes.items(): + if party not in party_set: + continue + total_votes = votes["voor"] + votes["tegen"] + votes["afwezig"] + if total_votes == 0: + continue + total_parties += 1 + # A party "supports" if majority of its votes are 'voor' + if votes["voor"] / total_votes >= threshold: + supporting_parties += 1 + + if total_parties == 0: + return False + return supporting_parties / total_parties >= threshold + + +def _load_motion_texts(con: duckdb.DuckDBPyConnection) -> dict[int, str]: + """Load motion titles keyed by id.""" + rows = con.execute("SELECT id, title, body_text FROM motions").fetchall() + result = {} + for mid, title, body_text in rows: + text = title or "" + # Optionally append start of body_text if available + if body_text: + text = text + " " + body_text[:500] + result[mid] = text + return result + + +def derive_keywords( + db_path: str = "data/motions.db", + right_threshold: float = 0.60, + left_threshold: float = 0.60, + top_n: int = 50, + min_df: int = 2, + max_df_ratio: float = 0.95, +) -> dict[str, Any]: + """Derive right-wing keywords via differential TF-IDF. + + Returns dict with: + - right_keywords: list of (term, score) + - left_keywords: list of (term, score) + - differential: list of (term, diff_score) # right - left + - filtered_keywords: final curated list + - stats: motion counts per group + """ + db = Path(db_path) + if not db.exists(): + raise FileNotFoundError(f"Database not found: {db}") + + con = duckdb.connect(str(db), read_only=True) + try: + logger.info("Loading party votes...") + party_votes = _load_party_votes(con) + logger.info("Loaded votes for %d motions", len(party_votes)) + + logger.info("Loading motion texts...") + motion_texts = _load_motion_texts(con) + logger.info("Loaded texts for %d motions", len(motion_texts)) + + # Classify motions + right_motion_ids = [] + left_motion_ids = [] + unmatched = [] + + for motion_id, votes in party_votes.items(): + if motion_id not in motion_texts: + continue + is_right = _compute_group_support(votes, CANONICAL_RIGHT, right_threshold) + is_left = _compute_group_support(votes, CANONICAL_LEFT, left_threshold) + if is_right and not is_left: + right_motion_ids.append(motion_id) + elif is_left and not is_right: + left_motion_ids.append(motion_id) + else: + unmatched.append(motion_id) + + logger.info( + "Classified: %d right-wing, %d left-wing, %d unmatched", + len(right_motion_ids), + len(left_motion_ids), + len(unmatched), + ) + + if len(right_motion_ids) < 10 or len(left_motion_ids) < 10: + raise ValueError( + f"Insufficient motions for TF-IDF: right={len(right_motion_ids)}, left={len(left_motion_ids)}" + ) + + # Build corpus + right_texts = [_clean_text(motion_texts[mid]) for mid in right_motion_ids] + left_texts = [_clean_text(motion_texts[mid]) for mid in left_motion_ids] + + # Use sklearn TF-IDF + try: + from sklearn.feature_extraction.text import TfidfVectorizer + except ImportError as exc: + raise ImportError("sklearn is required. Install with: uv add scikit-learn") from exc + + vectorizer = TfidfVectorizer( + tokenizer=_tokenize, + preprocessor=lambda x: x, # already cleaned + token_pattern=None, # use tokenizer instead + min_df=min_df, + max_df=max_df_ratio, + sublinear_tf=True, + ) + + all_texts = right_texts + left_texts + tfidf_matrix = vectorizer.fit_transform(all_texts) + feature_names = vectorizer.get_feature_names_out() + + # Split matrices + right_matrix = tfidf_matrix[: len(right_texts)] + left_matrix = tfidf_matrix[len(right_texts) :] + + # Compute mean TF-IDF per term per group + import numpy as np + + right_mean = np.asarray(right_matrix.mean(axis=0)).flatten() + left_mean = np.asarray(left_matrix.mean(axis=0)).flatten() + + # Differential score: right_mean - left_mean + diff_scores = right_mean - left_mean + + # Sort by differential score + term_scores = list(zip(feature_names, diff_scores, right_mean, left_mean)) + term_scores.sort(key=lambda x: x[1], reverse=True) + + # Filter generic terms from top results + filtered = [ + (term, float(diff), float(rm), float(lm)) + for term, diff, rm, lm in term_scores + if term not in GENERIC_TERMS and len(term) > 2 + ] + + result = { + "right_keywords": [ + {"term": t, "diff": d, "right_tfidf": r, "left_tfidf": l} + for t, d, r, l in filtered[:top_n] + ], + "left_keywords": [ + {"term": t, "diff": d, "right_tfidf": r, "left_tfidf": l} + for t, d, r, l in filtered[-top_n:][::-1] + ], + "filtered_terms": [t for t, _, _, _ in filtered[:top_n]], + "stats": { + "right_motions": len(right_motion_ids), + "left_motions": len(left_motion_ids), + "unmatched_motions": len(unmatched), + "total_motions": len(party_votes), + }, + } + return result + + finally: + con.close() + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Derive right-wing keyword taxonomy") + parser.add_argument("--db", default="data/motions.db", help="Path to motions.db") + parser.add_argument("--output", default="analysis/right_wing/right_wing_keywords.json", help="Output JSON path") + parser.add_argument("--top-n", type=int, default=50, help="Number of top keywords to extract") + parser.add_argument("--right-threshold", type=float, default=0.60, help="Right-wing support threshold") + parser.add_argument("--left-threshold", type=float, default=0.60, help="Left-wing support threshold") + args = parser.parse_args(argv) + + result = derive_keywords( + db_path=args.db, + right_threshold=args.right_threshold, + left_threshold=args.left_threshold, + top_n=args.top_n, + ) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(result, indent=2, ensure_ascii=False)) + logger.info("Keywords written to %s", output_path) + logger.info("Top 10 right-wing terms: %s", [k["term"] for k in result["right_keywords"][:10]]) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/analysis/right_wing/right_wing_keywords.json b/analysis/right_wing/right_wing_keywords.json new file mode 100644 index 0000000..e5b32fe --- /dev/null +++ b/analysis/right_wing/right_wing_keywords.json @@ -0,0 +1,664 @@ +{ + "right_keywords": [ + { + "term": "infectieziektenbestrijding", + "diff": 0.006569478599743028, + "right_tfidf": 0.00845232937143348, + "left_tfidf": 0.0018828507716904515 + }, + { + "term": "asielzoekers", + "diff": 0.005313058457652961, + "right_tfidf": 0.007509768091530024, + "left_tfidf": 0.002196709633877063 + }, + { + "term": "defensie", + "diff": 0.003474467987526604, + "right_tfidf": 0.00625082945775334, + "left_tfidf": 0.0027763614702267358 + }, + { + "term": "ondernemers", + "diff": 0.0033060155032620673, + "right_tfidf": 0.004356433801792717, + "left_tfidf": 0.0010504182985306499 + }, + { + "term": "kernenergie", + "diff": 0.0030512471527176506, + "right_tfidf": 0.0033565521394182595, + "left_tfidf": 0.0003053049867006088 + }, + { + "term": "boeren", + "diff": 0.0027130911556829417, + "right_tfidf": 0.004093648576727917, + "left_tfidf": 0.0013805574210449755 + }, + { + "term": "onmiddellijk", + "diff": 0.0025141377107913633, + "right_tfidf": 0.0028877802151609437, + "left_tfidf": 0.00037364250436958054 + }, + { + "term": "vreemdelingenbeleid", + "diff": 0.002474783466537206, + "right_tfidf": 0.004326468254883403, + "left_tfidf": 0.0018516847883461973 + }, + { + "term": "statushouders", + "diff": 0.0020566860394286095, + "right_tfidf": 0.0028435445546928276, + "left_tfidf": 0.0007868585152642181 + }, + { + "term": "veiligheid", + "diff": 0.0020479353498792053, + "right_tfidf": 0.008055295125654187, + "left_tfidf": 0.006007359775774982 + }, + { + "term": "asielstop", + "diff": 0.002021009577662265, + "right_tfidf": 0.002021009577662265, + "left_tfidf": 0.0 + }, + { + "term": "stikstof", + "diff": 0.0020151689483822125, + "right_tfidf": 0.0036985314795571545, + "left_tfidf": 0.001683362531174942 + }, + { + "term": "wetboek", + "diff": 0.0020123912463963322, + "right_tfidf": 0.004613613582426107, + "left_tfidf": 0.002601222336029775 + }, + { + "term": "strafrecht", + "diff": 0.001977219811541617, + "right_tfidf": 0.002932516206526633, + "left_tfidf": 0.0009552963949850162 + }, + { + "term": "agrarische", + "diff": 0.001909390045472604, + "right_tfidf": 0.0026631450469559647, + "left_tfidf": 0.0007537550014833608 + }, + { + "term": "gedwongen", + "diff": 0.001795381328377406, + "right_tfidf": 0.0023882830883692006, + "left_tfidf": 0.0005929017599917945 + }, + { + "term": "coronamaatregelen", + "diff": 0.0017889439956944695, + "right_tfidf": 0.0020982659682420926, + "left_tfidf": 0.0003093219725476231 + }, + { + "term": "asiel", + "diff": 0.0017269560717394145, + "right_tfidf": 0.002896032885858558, + "left_tfidf": 0.0011690768141191436 + }, + { + "term": "begroting", + "diff": 0.0016861606105447683, + "right_tfidf": 0.002893937917266138, + "left_tfidf": 0.0012077773067213698 + }, + { + "term": "justitie", + "diff": 0.0016736056297034121, + "right_tfidf": 0.005340110960817776, + "left_tfidf": 0.0036665053311143643 + }, + { + "term": "regeldruk", + "diff": 0.001634299245881901, + "right_tfidf": 0.0017221464600664762, + "left_tfidf": 8.784721418457516e-05 + }, + { + "term": "europese", + "diff": 0.0016194550059820435, + "right_tfidf": 0.012660101928205766, + "left_tfidf": 0.011040646922223722 + }, + { + "term": "mkb", + "diff": 0.001593916850157352, + "right_tfidf": 0.002456043010399644, + "left_tfidf": 0.000862126160242292 + }, + { + "term": "instroom", + "diff": 0.0015757844898292715, + "right_tfidf": 0.002258608551927495, + "left_tfidf": 0.0006828240620982235 + }, + { + "term": "corona", + "diff": 0.0015642362327925978, + "right_tfidf": 0.002039496701077217, + "left_tfidf": 0.00047526046828461933 + }, + { + "term": "natura", + "diff": 0.0015410578076943103, + "right_tfidf": 0.002267554703845169, + "left_tfidf": 0.0007264968961508587 + }, + { + "term": "jbz", + "diff": 0.0014856669031578851, + "right_tfidf": 0.0024854930840807706, + "left_tfidf": 0.0009998261809228855 + }, + { + "term": "terugkeer", + "diff": 0.0014839885911937716, + "right_tfidf": 0.0018961353587949204, + "left_tfidf": 0.00041214676760114883 + }, + { + "term": "horeca", + "diff": 0.0014669250653227108, + "right_tfidf": 0.0016262027099102653, + "left_tfidf": 0.00015927764458755454 + }, + { + "term": "terrassen", + "diff": 0.0014564100688148416, + "right_tfidf": 0.0014564100688148416, + "left_tfidf": 0.0 + }, + { + "term": "spreidingswet", + "diff": 0.001453318438835177, + "right_tfidf": 0.0017116272387774412, + "left_tfidf": 0.00025830879994226424 + }, + { + "term": "buitenlucht", + "diff": 0.001447838936809374, + "right_tfidf": 0.0014854938756013142, + "left_tfidf": 3.7654938791940334e-05 + }, + { + "term": "toekomstvisie", + "diff": 0.0014452342007121853, + "right_tfidf": 0.0018728127201153616, + "left_tfidf": 0.00042757851940317647 + }, + { + "term": "kerncentrales", + "diff": 0.0014117033667126187, + "right_tfidf": 0.0015874204128784875, + "left_tfidf": 0.00017571704616586872 + }, + { + "term": "instemmen", + "diff": 0.0014075522388711352, + "right_tfidf": 0.0017378153859392517, + "left_tfidf": 0.00033026314706811644 + }, + { + "term": "politie", + "diff": 0.0014017186095431995, + "right_tfidf": 0.0037392397934204965, + "left_tfidf": 0.002337521183877297 + }, + { + "term": "strafbaar", + "diff": 0.001399214816885134, + "right_tfidf": 0.0015233700600286485, + "left_tfidf": 0.0001241552431435146 + }, + { + "term": "veiliger", + "diff": 0.0013917435637725549, + "right_tfidf": 0.0018451777114795315, + "left_tfidf": 0.00045343414770697665 + }, + { + "term": "pensioenstelsel", + "diff": 0.0013751206507455458, + "right_tfidf": 0.0018525658939462872, + "left_tfidf": 0.00047744524320074135 + }, + { + "term": "stikstofbeleid", + "diff": 0.0013690641002980942, + "right_tfidf": 0.0015036955196541587, + "left_tfidf": 0.0001346314193560644 + }, + { + "term": "visserijraad", + "diff": 0.001368604774863244, + "right_tfidf": 0.002510883674523926, + "left_tfidf": 0.0011422788996606821 + }, + { + "term": "afzien", + "diff": 0.0013660421034534952, + "right_tfidf": 0.0024857623824585296, + "left_tfidf": 0.0011197202790050344 + }, + { + "term": "invoeren", + "diff": 0.0013655276783388827, + "right_tfidf": 0.002732686476464871, + "left_tfidf": 0.001367158798125988 + }, + { + "term": "belang", + "diff": 0.0013587675907329386, + "right_tfidf": 0.005682309659841887, + "left_tfidf": 0.004323542069108948 + }, + { + "term": "mestbeleid", + "diff": 0.00134892559456497, + "right_tfidf": 0.001943629058741009, + "left_tfidf": 0.000594703464176039 + }, + { + "term": "asielinstroom", + "diff": 0.0013479640849954836, + "right_tfidf": 0.0013649479393826741, + "left_tfidf": 1.6983854387190533e-05 + }, + { + "term": "nooit", + "diff": 0.0013308048293778282, + "right_tfidf": 0.002125383725315249, + "left_tfidf": 0.0007945788959374208 + }, + { + "term": "krijgsmacht", + "diff": 0.0013279869184148435, + "right_tfidf": 0.0016895758206999694, + "left_tfidf": 0.00036158890228512594 + }, + { + "term": "rondom", + "diff": 0.001323620763111042, + "right_tfidf": 0.0033617623129662735, + "left_tfidf": 0.0020381415498552315 + }, + { + "term": "graus", + "diff": 0.0013107066052195498, + "right_tfidf": 0.0017990616492724388, + "left_tfidf": 0.000488355044052889 + } + ], + "left_keywords": [ + { + "term": "verhoogd", + "diff": -0.003800323131539046, + "right_tfidf": 0.0019944529709599863, + "left_tfidf": 0.0057947761024990324 + }, + { + "term": "mensen", + "diff": -0.0035655147422175622, + "right_tfidf": 0.004174565092272633, + "left_tfidf": 0.007740079834490195 + }, + { + "term": "verplichtingenbedrag", + "diff": -0.003392238418435396, + "right_tfidf": 0.003439198381917264, + "left_tfidf": 0.00683143680035266 + }, + { + "term": "buitenlandse", + "diff": -0.003331646880329351, + "right_tfidf": 0.005039996681491305, + "left_tfidf": 0.008371643561820656 + }, + { + "term": "uitgavenbedrag", + "diff": -0.0032413795795242415, + "right_tfidf": 0.003175135604309838, + "left_tfidf": 0.006416515183834079 + }, + { + "term": "middelen", + "diff": -0.0032128577047093737, + "right_tfidf": 0.003918113284088858, + "left_tfidf": 0.007130970988798232 + }, + { + "term": "volgt", + "diff": -0.003211044333596029, + "right_tfidf": 0.004963527938967632, + "left_tfidf": 0.00817457227256366 + }, + { + "term": "handel", + "diff": -0.0031351749682947813, + "right_tfidf": 0.0020248449710976862, + "left_tfidf": 0.0051600199393924675 + }, + { + "term": "discriminatie", + "diff": -0.0029952265399205754, + "right_tfidf": 0.0012465225378471437, + "left_tfidf": 0.004241749077767719 + }, + { + "term": "internationaal", + "diff": -0.002910261753284582, + "right_tfidf": 0.0014379635633088776, + "left_tfidf": 0.00434822531659346 + }, + { + "term": "kinderen", + "diff": -0.0028024262281300923, + "right_tfidf": 0.0019880095830509684, + "left_tfidf": 0.004790435811181061 + }, + { + "term": "begrotingsstaat", + "diff": -0.0027305922232981252, + "right_tfidf": 0.01021696053656465, + "left_tfidf": 0.012947552759862774 + }, + { + "term": "zorg", + "diff": -0.002699517476423169, + "right_tfidf": 0.0037527058320764328, + "left_tfidf": 0.006452223308499602 + }, + { + "term": "israël", + "diff": -0.0026302057873323374, + "right_tfidf": 0.0021627329138823167, + "left_tfidf": 0.004792938701214654 + }, + { + "term": "duurzame", + "diff": -0.0024431320983613987, + "right_tfidf": 0.0015834157886754927, + "left_tfidf": 0.004026547887036891 + }, + { + "term": "jongeren", + "diff": -0.0023955278368396936, + "right_tfidf": 0.001520121460929629, + "left_tfidf": 0.003915649297769322 + }, + { + "term": "zaken", + "diff": -0.0023541440027530225, + "right_tfidf": 0.010212869270589515, + "left_tfidf": 0.012567013273342538 + }, + { + "term": "departementale", + "diff": -0.0023050557695713215, + "right_tfidf": 0.0029317053925349778, + "left_tfidf": 0.005236761162106299 + }, + { + "term": "ter", + "diff": -0.0022656342047127215, + "right_tfidf": 0.0065540669504994, + "left_tfidf": 0.008819701155212122 + }, + { + "term": "sociale", + "diff": -0.0022597144264270147, + "right_tfidf": 0.004775717534517907, + "left_tfidf": 0.007035431960944922 + }, + { + "term": "recht", + "diff": -0.0022380331082949194, + "right_tfidf": 0.0018926692954098967, + "left_tfidf": 0.004130702403704816 + }, + { + "term": "gaza", + "diff": -0.0022266956248005094, + "right_tfidf": 0.0005504838982507225, + "left_tfidf": 0.002777179523051232 + }, + { + "term": "humanitaire", + "diff": -0.002223820106338663, + "right_tfidf": 0.0003263932690958267, + "left_tfidf": 0.00255021337543449 + }, + { + "term": "ontwikkelingssamenwerking", + "diff": -0.0021714973939975235, + "right_tfidf": 0.0015294434736494004, + "left_tfidf": 0.0037009408676469237 + }, + { + "term": "blijkt", + "diff": -0.002162099972073271, + "right_tfidf": 0.0017888259757468336, + "left_tfidf": 0.003950925947820105 + }, + { + "term": "juli", + "diff": -0.002127473546108199, + "right_tfidf": 0.0025150064948918317, + "left_tfidf": 0.004642480041000031 + }, + { + "term": "israëlische", + "diff": -0.0021148538540044985, + "right_tfidf": 0.0006591390173078768, + "left_tfidf": 0.0027739928713123754 + }, + { + "term": "hulp", + "diff": -0.002109698423594897, + "right_tfidf": 0.0005740276445887163, + "left_tfidf": 0.0026837260681836133 + }, + { + "term": "welzijn", + "diff": -0.002102259922695526, + "right_tfidf": 0.0025458341038468615, + "left_tfidf": 0.004648094026542387 + }, + { + "term": "mensenrechten", + "diff": -0.0020879003421794156, + "right_tfidf": 0.0002880588278919533, + "left_tfidf": 0.002375959170071369 + }, + { + "term": "sport", + "diff": -0.0020474366821590304, + "right_tfidf": 0.0027172850523000816, + "left_tfidf": 0.004764721734459112 + }, + { + "term": "ingevoegd", + "diff": -0.002032072883318145, + "right_tfidf": 0.0035425363486034345, + "left_tfidf": 0.0055746092319215795 + }, + { + "term": "fossiele", + "diff": -0.002023171223224409, + "right_tfidf": 0.00047704455185034443, + "left_tfidf": 0.002500215775074753 + }, + { + "term": "bijdrage", + "diff": -0.0020083586864172143, + "right_tfidf": 0.0016475000753925512, + "left_tfidf": 0.0036558587618097656 + }, + { + "term": "volgende", + "diff": -0.0019917909256765556, + "right_tfidf": 0.0060028627510049426, + "left_tfidf": 0.007994653676681498 + }, + { + "term": "volksgezondheid", + "diff": -0.0019537232402315205, + "right_tfidf": 0.002999130765477296, + "left_tfidf": 0.004952854005708817 + }, + { + "term": "vervanging", + "diff": -0.0019511270105074148, + "right_tfidf": 0.00460060115604661, + "left_tfidf": 0.006551728166554025 + }, + { + "term": "gezondheid", + "diff": -0.0019476146284083432, + "right_tfidf": 0.0012372829861680677, + "left_tfidf": 0.003184897614576411 + }, + { + "term": "luidende", + "diff": -0.001912494820564106, + "right_tfidf": 0.003796060492064439, + "left_tfidf": 0.005708555312628545 + }, + { + "term": "november", + "diff": -0.0019033041550907508, + "right_tfidf": 0.005447208009912482, + "left_tfidf": 0.0073505121650032324 + }, + { + "term": "toegang", + "diff": -0.0018927963171563248, + "right_tfidf": 0.0009054318766555786, + "left_tfidf": 0.0027982281938119034 + }, + { + "term": "gedrukt", + "diff": -0.0018817856379919006, + "right_tfidf": 0.003566293035061206, + "left_tfidf": 0.005448078673053107 + }, + { + "term": "plan", + "diff": -0.0018800974626950037, + "right_tfidf": 0.002418833384675789, + "left_tfidf": 0.004298930847370793 + }, + { + "term": "koninkrijksrelaties", + "diff": -0.0018517550074208552, + "right_tfidf": 0.002403165319157252, + "left_tfidf": 0.004254920326578107 + }, + { + "term": "xvi", + "diff": -0.001810755339706005, + "right_tfidf": 0.0021523713308324575, + "left_tfidf": 0.003963126670538462 + }, + { + "term": "baarle", + "diff": -0.001795390634441998, + "right_tfidf": 0.00037392993504786753, + "left_tfidf": 0.0021693205694898656 + }, + { + "term": "sociaal", + "diff": -0.0017942144426511437, + "right_tfidf": 0.0009936799056835185, + "left_tfidf": 0.0027878943483346623 + }, + { + "term": "uitstoot", + "diff": -0.0017851137726596065, + "right_tfidf": 0.0004700895609280369, + "left_tfidf": 0.0022552033335876435 + }, + { + "term": "oktober", + "diff": -0.0017803788561149905, + "right_tfidf": 0.003914845876690587, + "left_tfidf": 0.005695224732805577 + }, + { + "term": "ondersteuning", + "diff": -0.0017785867125596029, + "right_tfidf": 0.001185265736573449, + "left_tfidf": 0.002963852449133052 + } + ], + "filtered_terms": [ + "infectieziektenbestrijding", + "asielzoekers", + "defensie", + "ondernemers", + "kernenergie", + "boeren", + "onmiddellijk", + "vreemdelingenbeleid", + "statushouders", + "veiligheid", + "asielstop", + "stikstof", + "wetboek", + "strafrecht", + "agrarische", + "gedwongen", + "coronamaatregelen", + "asiel", + "begroting", + "justitie", + "regeldruk", + "europese", + "mkb", + "instroom", + "corona", + "natura", + "jbz", + "terugkeer", + "horeca", + "terrassen", + "spreidingswet", + "buitenlucht", + "toekomstvisie", + "kerncentrales", + "instemmen", + "politie", + "strafbaar", + "veiliger", + "pensioenstelsel", + "stikstofbeleid", + "visserijraad", + "afzien", + "invoeren", + "belang", + "mestbeleid", + "asielinstroom", + "nooit", + "krijgsmacht", + "rondom", + "graus" + ], + "stats": { + "right_motions": 4291, + "left_motions": 10766, + "unmatched_motions": 13256, + "total_motions": 28331 + } +} \ No newline at end of file