You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/.mindmodel/patterns/database.yaml

239 lines
6.2 KiB

# DuckDB Database Patterns
## Connection Management
### Pattern 1: Short-lived per Method (Most Common)
Always create a new connection, use try/finally for cleanup:
```python
# database.py
class MotionDatabase:
def get_motion(self, motion_id: int) -> Optional[Dict]:
conn = duckdb.connect(self.db_path)
try:
result = conn.execute(
"SELECT * FROM motions WHERE id = ?",
(motion_id,)
).fetchone()
conn.close()
return result
except Exception:
conn.close()
return None
def get_filtered_motions(
self,
policy_area: str = "Alle",
min_margin: float = 0.0,
max_margin: float = 1.0,
limit: int = 10
) -> List[Dict]:
conn = duckdb.connect(self.db_path)
try:
query = """
SELECT * FROM motions
WHERE (? = 'Alle' OR policy_area = ?)
AND winning_margin BETWEEN ? AND ?
ORDER BY RANDOM()
LIMIT ?
"""
rows = conn.execute(query, (policy_area, policy_area, min_margin, max_margin, limit)).fetchall()
conn.close()
return rows
except Exception:
conn.close()
return []
```
### Pattern 2: With Statement (Cleaner)
```python
def execute_query(self, query: str, params: tuple = ()):
with duckdb.connect(self.db_path) as conn:
return conn.execute(query, params).fetchall()
```
### Pattern 3: Lazy Connection Caching
For frequently accessed connections:
```python
class MotionDatabase:
def __init__(self, db_path: str = config.DATABASE_PATH):
self.db_path = db_path
self._conn = None
@property
def connection(self):
if self._conn is None:
self._conn = duckdb.connect(self.db_path)
return self._conn
def close(self):
if self._conn:
self._conn.close()
self._conn = None
```
## Table Initialization
Create tables with proper constraints and sequences:
```python
def _init_database(self):
conn = duckdb.connect(self.db_path)
# Create sequence for auto-incrementing IDs
try:
conn.execute("CREATE SEQUENCE IF NOT EXISTS motions_id_seq START 1")
except:
pass
# Create tables
conn.execute("""
CREATE TABLE IF NOT EXISTS motions (
id INTEGER DEFAULT nextval('motions_id_seq'),
title TEXT NOT NULL,
description TEXT,
date DATE,
policy_area TEXT,
voting_results JSON,
winning_margin FLOAT,
controversy_score FLOAT,
layman_explanation TEXT,
externe_identifier TEXT,
body_text TEXT,
url TEXT UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id)
)
""")
# Add columns to existing tables safely
try:
conn.execute("ALTER TABLE motions ADD COLUMN IF NOT EXISTS body_text TEXT")
except Exception:
pass # Column may already exist
conn.close()
```
## JSON Column Handling
Store and retrieve JSON data:
```python
# Insert JSON
def store_motion(self, motion: Dict):
conn = duckdb.connect(self.db_path)
try:
conn.execute(
"INSERT INTO motions (title, voting_results) VALUES (?, ?)",
(motion["title"], json.dumps(motion["voting_results"]))
)
conn.close()
except Exception:
conn.close()
# Query JSON
def get_motions_with_votes(self, party: str) -> List[Dict]:
conn = duckdb.connect(self.db_path)
try:
rows = conn.execute("""
SELECT title, voting_results
FROM motions
WHERE JSON_EXTRACT(voting_results, '$.party') = ?
""", (party,)).fetchall()
conn.close()
return rows
except Exception:
conn.close()
return []
```
## Query Patterns
### Parameterized Queries (Always!)
```python
# SAFE - uses parameterized query
conn.execute("SELECT * FROM motions WHERE id = ?", (motion_id,))
# AVOID - SQL injection risk
# conn.execute(f"SELECT * FROM motions WHERE id = {motion_id}") # BAD!
```
### Batch Inserts
```python
def bulk_insert_motions(self, motions: List[Dict]):
conn = duckdb.connect(self.db_path)
try:
for motion in motions:
conn.execute(
"""INSERT OR IGNORE INTO motions
(title, date, policy_area) VALUES (?, ?, ?)""",
(motion["title"], motion["date"], motion["policy_area"])
)
conn.close()
except Exception:
conn.close()
```
### Aggregation Queries
```python
def get_party_vote_stats(self, party: str) -> Dict:
conn = duckdb.connect(self.db_path)
try:
result = conn.execute("""
SELECT
COUNT(*) as total_votes,
SUM(CASE WHEN vote = 'Voor' THEN 1 ELSE 0 END) as voor,
SUM(CASE WHEN vote = 'Tegen' THEN 1 ELSE 0 END) as tegen
FROM mp_votes
WHERE party = ?
""", (party,)).fetchone()
conn.close()
return {"total": result[0], "voor": result[1], "tegen": result[2]}
except Exception:
conn.close()
return {"total": 0, "voor": 0, "tegen": 0}
```
## Error Handling
Always close connections in finally block or with context manager:
```python
def safe_query(self, query: str, params: tuple = ()):
conn = None
try:
conn = duckdb.connect(self.db_path)
result = conn.execute(query, params).fetchall()
return result
except Exception as e:
_logger.error(f"Query failed: {e}")
return []
finally:
if conn:
conn.close()
```
## Testing with Mock
For unit tests without DuckDB:
```python
# In MotionDatabase.__init__
def __init__(self, db_path: str = config.DATABASE_PATH):
self.db_path = db_path
self._file_mode = duckdb is None
if duckdb is None:
# Create JSON fallback files
for p in (f"{db_path}.embeddings.json", f"{db_path}.similarity_cache.json"):
if not os.path.exists(p):
with open(p, "w") as fh:
fh.write("[]")
else:
self._init_database()
```