# Extracted pattern examples (representative snippets) Note: snippets are verbatim extracts from repository files (Phase 1). Paths shown. ## DuckDB connect + schema init (database.py) ```python conn = duckdb.connect(self.db_path) # Create sequence for auto-incrementing IDs try: conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1") except: pass # Create tables with proper ID handling conn.execute(""" CREATE TABLE IF NOT EXISTS motions ( id INTEGER DEFAULT nextval('motions_id_seq'), title TEXT NOT NULL, description TEXT, date DATE, policy_area TEXT, voting_results JSON, winning_margin FLOAT, controversy_score FLOAT, layman_explanation TEXT, externe_identifier TEXT, body_text TEXT, url TEXT UNIQUE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (id) ) """) conn.close() ``` ## Read-only compute worker (svd_pipeline.py) ```python conn = duckdb.connect(db_path, read_only=True) try: rows = conn.execute( "SELECT motion_id, mp_name, vote FROM mp_votes WHERE date BETWEEN ? AND ?", (start_date, end_date), ).fetchall() finally: conn.close() ``` ## Requests with retry/backoff (ai_provider.py) ```python resp = requests.post(url, json=json, headers=headers, timeout=10) ... if getattr(resp, "status_code", 0) == 429: if attempt == retries: raise ProviderError(f"Provider returned HTTP {resp.status_code}") retry_after = None raw = resp.headers.get("Retry-After") if getattr(resp, "headers", None) else None if raw: try: retry_after = int(raw) except Exception: try: dt = parsedate_to_datetime(raw) now = datetime.now(tz=dt.tzinfo or timezone.utc) secs = (dt - now).total_seconds() retry_after = max(0, int(secs)) except Exception: retry_after = None if retry_after is not None: time.sleep(retry_after) continue ``` ## Embedding batch + per-item fallback (pipeline/ai_provider_wrapper.py) ```python for start in range(0, len(texts), batch_size): chunk = texts[i:end] emb_chunk, emb_exc = _attempt_batch(chunk, i) if emb_chunk is not None: for j, emb in enumerate(emb_chunk): results[i + j] = emb i = end continue # batch failed -> fallback to per-item attempts for j in range(i, end): t = texts[j] single, single_exc = _attempt_batch([t], j) if single: results[j] = single[0] continue results[j] = None ``` ## Similarity compute (similarity/compute.py) ```python # Ensure consistent dimensionality: pad shorter vectors with zeros lengths = [len(v) for v in vecs] max_dim = max(lengths) if len(set(lengths)) != 1: logger.warning( "Inconsistent vector dimensions detected (max=%d). Padding shorter vectors with zeros.", max_dim, ) matrix = np.zeros((len(vecs), max_dim), dtype=np.float32) for i, v in enumerate(vecs): matrix[i, : len(v)] = v # Normalize rows and compute cosine similarity norms = np.linalg.norm(matrix, axis=1, keepdims=True) norms[norms == 0] = 1.0 normalized = matrix / norms sim = normalized @ normalized.T ```