# DuckDB Database Patterns ## Connection Management ### Pattern 1: Short-lived per Method (Most Common) Always create a new connection, use try/finally for cleanup: ```python # database.py class MotionDatabase: def get_motion(self, motion_id: int) -> Optional[Dict]: conn = duckdb.connect(self.db_path) try: result = conn.execute( "SELECT * FROM motions WHERE id = ?", (motion_id,) ).fetchone() conn.close() return result except Exception: conn.close() return None def get_filtered_motions( self, policy_area: str = "Alle", min_margin: float = 0.0, max_margin: float = 1.0, limit: int = 10 ) -> List[Dict]: conn = duckdb.connect(self.db_path) try: query = """ SELECT * FROM motions WHERE (? = 'Alle' OR policy_area = ?) AND winning_margin BETWEEN ? AND ? ORDER BY RANDOM() LIMIT ? """ rows = conn.execute(query, (policy_area, policy_area, min_margin, max_margin, limit)).fetchall() conn.close() return rows except Exception: conn.close() return [] ``` ### Pattern 2: With Statement (Cleaner) ```python def execute_query(self, query: str, params: tuple = ()): with duckdb.connect(self.db_path) as conn: return conn.execute(query, params).fetchall() ``` ### Pattern 3: Lazy Connection Caching For frequently accessed connections: ```python class MotionDatabase: def __init__(self, db_path: str = config.DATABASE_PATH): self.db_path = db_path self._conn = None @property def connection(self): if self._conn is None: self._conn = duckdb.connect(self.db_path) return self._conn def close(self): if self._conn: self._conn.close() self._conn = None ``` ## Table Initialization Create tables with proper constraints and sequences: ```python def _init_database(self): conn = duckdb.connect(self.db_path) # Create sequence for auto-incrementing IDs try: conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1") except: pass # Create tables conn.execute(""" CREATE TABLE IF NOT EXISTS motions ( id INTEGER DEFAULT nextval('motions_id_seq'), title TEXT NOT NULL, description TEXT, date DATE, policy_area TEXT, voting_results JSON, winning_margin FLOAT, controversy_score FLOAT, layman_explanation TEXT, externe_identifier TEXT, body_text TEXT, url TEXT UNIQUE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (id) ) """) # Add columns to existing tables safely try: conn.execute("ALTER TABLE motions ADD COLUMN IF NOT EXISTS body_text TEXT") except Exception: pass # Column may already exist conn.close() ``` ## JSON Column Handling Store and retrieve JSON data: ```python # Insert JSON def store_motion(self, motion: Dict): conn = duckdb.connect(self.db_path) try: conn.execute( "INSERT INTO motions (title, voting_results) VALUES (?, ?)", (motion["title"], json.dumps(motion["voting_results"])) ) conn.close() except Exception: conn.close() # Query JSON def get_motions_with_votes(self, party: str) -> List[Dict]: conn = duckdb.connect(self.db_path) try: rows = conn.execute(""" SELECT title, voting_results FROM motions WHERE JSON_EXTRACT(voting_results, '$.party') = ? """, (party,)).fetchall() conn.close() return rows except Exception: conn.close() return [] ``` ## Query Patterns ### Parameterized Queries (Always!) ```python # SAFE - uses parameterized query conn.execute("SELECT * FROM motions WHERE id = ?", (motion_id,)) # AVOID - SQL injection risk # conn.execute(f"SELECT * FROM motions WHERE id = {motion_id}") # BAD! ``` ### Batch Inserts ```python def bulk_insert_motions(self, motions: List[Dict]): conn = duckdb.connect(self.db_path) try: for motion in motions: conn.execute( """INSERT OR IGNORE INTO motions (title, date, policy_area) VALUES (?, ?, ?)""", (motion["title"], motion["date"], motion["policy_area"]) ) conn.close() except Exception: conn.close() ``` ### Aggregation Queries ```python def get_party_vote_stats(self, party: str) -> Dict: conn = duckdb.connect(self.db_path) try: result = conn.execute(""" SELECT COUNT(*) as total_votes, SUM(CASE WHEN vote = 'Voor' THEN 1 ELSE 0 END) as voor, SUM(CASE WHEN vote = 'Tegen' THEN 1 ELSE 0 END) as tegen FROM mp_votes WHERE party = ? """, (party,)).fetchone() conn.close() return {"total": result[0], "voor": result[1], "tegen": result[2]} except Exception: conn.close() return {"total": 0, "voor": 0, "tegen": 0} ``` ## Error Handling Always close connections in finally block or with context manager: ```python def safe_query(self, query: str, params: tuple = ()): conn = None try: conn = duckdb.connect(self.db_path) result = conn.execute(query, params).fetchall() return result except Exception as e: _logger.error(f"Query failed: {e}") return [] finally: if conn: conn.close() ``` ## Testing with Mock For unit tests without DuckDB: ```python # In MotionDatabase.__init__ def __init__(self, db_path: str = config.DATABASE_PATH): self.db_path = db_path self._file_mode = duckdb is None if duckdb is None: # Create JSON fallback files for p in (f"{db_path}.embeddings.json", f"{db_path}.similarity_cache.json"): if not os.path.exists(p): with open(p, "w") as fh: fh.write("[]") else: self._init_database() ```