You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
239 lines
6.2 KiB
239 lines
6.2 KiB
# DuckDB Database Patterns
|
|
|
|
## Connection Management
|
|
|
|
### Pattern 1: Short-lived per Method (Most Common)
|
|
|
|
Always create a new connection, use try/finally for cleanup:
|
|
|
|
```python
|
|
# database.py
|
|
class MotionDatabase:
|
|
def get_motion(self, motion_id: int) -> Optional[Dict]:
|
|
conn = duckdb.connect(self.db_path)
|
|
try:
|
|
result = conn.execute(
|
|
"SELECT * FROM motions WHERE id = ?",
|
|
(motion_id,)
|
|
).fetchone()
|
|
conn.close()
|
|
return result
|
|
except Exception:
|
|
conn.close()
|
|
return None
|
|
|
|
def get_filtered_motions(
|
|
self,
|
|
policy_area: str = "Alle",
|
|
min_margin: float = 0.0,
|
|
max_margin: float = 1.0,
|
|
limit: int = 10
|
|
) -> List[Dict]:
|
|
conn = duckdb.connect(self.db_path)
|
|
try:
|
|
query = """
|
|
SELECT * FROM motions
|
|
WHERE (? = 'Alle' OR policy_area = ?)
|
|
AND winning_margin BETWEEN ? AND ?
|
|
ORDER BY RANDOM()
|
|
LIMIT ?
|
|
"""
|
|
rows = conn.execute(query, (policy_area, policy_area, min_margin, max_margin, limit)).fetchall()
|
|
conn.close()
|
|
return rows
|
|
except Exception:
|
|
conn.close()
|
|
return []
|
|
```
|
|
|
|
### Pattern 2: With Statement (Cleaner)
|
|
|
|
```python
|
|
def execute_query(self, query: str, params: tuple = ()):
|
|
with duckdb.connect(self.db_path) as conn:
|
|
return conn.execute(query, params).fetchall()
|
|
```
|
|
|
|
### Pattern 3: Lazy Connection Caching
|
|
|
|
For frequently accessed connections:
|
|
|
|
```python
|
|
class MotionDatabase:
|
|
def __init__(self, db_path: str = config.DATABASE_PATH):
|
|
self.db_path = db_path
|
|
self._conn = None
|
|
|
|
@property
|
|
def connection(self):
|
|
if self._conn is None:
|
|
self._conn = duckdb.connect(self.db_path)
|
|
return self._conn
|
|
|
|
def close(self):
|
|
if self._conn:
|
|
self._conn.close()
|
|
self._conn = None
|
|
```
|
|
|
|
## Table Initialization
|
|
|
|
Create tables with proper constraints and sequences:
|
|
|
|
```python
|
|
def _init_database(self):
|
|
conn = duckdb.connect(self.db_path)
|
|
|
|
# Create sequence for auto-incrementing IDs
|
|
try:
|
|
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
|
|
except:
|
|
pass
|
|
|
|
# Create tables
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS motions (
|
|
id INTEGER DEFAULT nextval('motions_id_seq'),
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
date DATE,
|
|
policy_area TEXT,
|
|
voting_results JSON,
|
|
winning_margin FLOAT,
|
|
controversy_score FLOAT,
|
|
layman_explanation TEXT,
|
|
externe_identifier TEXT,
|
|
body_text TEXT,
|
|
url TEXT UNIQUE,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (id)
|
|
)
|
|
""")
|
|
|
|
# Add columns to existing tables safely
|
|
try:
|
|
conn.execute("ALTER TABLE motions ADD COLUMN IF NOT EXISTS body_text TEXT")
|
|
except Exception:
|
|
pass # Column may already exist
|
|
|
|
conn.close()
|
|
```
|
|
|
|
## JSON Column Handling
|
|
|
|
Store and retrieve JSON data:
|
|
|
|
```python
|
|
# Insert JSON
|
|
def store_motion(self, motion: Dict):
|
|
conn = duckdb.connect(self.db_path)
|
|
try:
|
|
conn.execute(
|
|
"INSERT INTO motions (title, voting_results) VALUES (?, ?)",
|
|
(motion["title"], json.dumps(motion["voting_results"]))
|
|
)
|
|
conn.close()
|
|
except Exception:
|
|
conn.close()
|
|
|
|
# Query JSON
|
|
def get_motions_with_votes(self, party: str) -> List[Dict]:
|
|
conn = duckdb.connect(self.db_path)
|
|
try:
|
|
rows = conn.execute("""
|
|
SELECT title, voting_results
|
|
FROM motions
|
|
WHERE JSON_EXTRACT(voting_results, '$.party') = ?
|
|
""", (party,)).fetchall()
|
|
conn.close()
|
|
return rows
|
|
except Exception:
|
|
conn.close()
|
|
return []
|
|
```
|
|
|
|
## Query Patterns
|
|
|
|
### Parameterized Queries (Always!)
|
|
```python
|
|
# SAFE - uses parameterized query
|
|
conn.execute("SELECT * FROM motions WHERE id = ?", (motion_id,))
|
|
|
|
# AVOID - SQL injection risk
|
|
# conn.execute(f"SELECT * FROM motions WHERE id = {motion_id}") # BAD!
|
|
```
|
|
|
|
### Batch Inserts
|
|
```python
|
|
def bulk_insert_motions(self, motions: List[Dict]):
|
|
conn = duckdb.connect(self.db_path)
|
|
try:
|
|
for motion in motions:
|
|
conn.execute(
|
|
"""INSERT OR IGNORE INTO motions
|
|
(title, date, policy_area) VALUES (?, ?, ?)""",
|
|
(motion["title"], motion["date"], motion["policy_area"])
|
|
)
|
|
conn.close()
|
|
except Exception:
|
|
conn.close()
|
|
```
|
|
|
|
### Aggregation Queries
|
|
```python
|
|
def get_party_vote_stats(self, party: str) -> Dict:
|
|
conn = duckdb.connect(self.db_path)
|
|
try:
|
|
result = conn.execute("""
|
|
SELECT
|
|
COUNT(*) as total_votes,
|
|
SUM(CASE WHEN vote = 'Voor' THEN 1 ELSE 0 END) as voor,
|
|
SUM(CASE WHEN vote = 'Tegen' THEN 1 ELSE 0 END) as tegen
|
|
FROM mp_votes
|
|
WHERE party = ?
|
|
""", (party,)).fetchone()
|
|
conn.close()
|
|
return {"total": result[0], "voor": result[1], "tegen": result[2]}
|
|
except Exception:
|
|
conn.close()
|
|
return {"total": 0, "voor": 0, "tegen": 0}
|
|
```
|
|
|
|
## Error Handling
|
|
|
|
Always close connections in finally block or with context manager:
|
|
|
|
```python
|
|
def safe_query(self, query: str, params: tuple = ()):
|
|
conn = None
|
|
try:
|
|
conn = duckdb.connect(self.db_path)
|
|
result = conn.execute(query, params).fetchall()
|
|
return result
|
|
except Exception as e:
|
|
_logger.error(f"Query failed: {e}")
|
|
return []
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
```
|
|
|
|
## Testing with Mock
|
|
|
|
For unit tests without DuckDB:
|
|
|
|
```python
|
|
# In MotionDatabase.__init__
|
|
def __init__(self, db_path: str = config.DATABASE_PATH):
|
|
self.db_path = db_path
|
|
self._file_mode = duckdb is None
|
|
|
|
if duckdb is None:
|
|
# Create JSON fallback files
|
|
for p in (f"{db_path}.embeddings.json", f"{db_path}.similarity_cache.json"):
|
|
if not os.path.exists(p):
|
|
with open(p, "w") as fh:
|
|
fh.write("[]")
|
|
else:
|
|
self._init_database()
|
|
```
|
|
|