You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/.mindmodel/patterns/architecture.yaml

230 lines
6.1 KiB

# Architectural Patterns
## Repository Pattern
The `MotionDatabase` class acts as a repository, encapsulating all database operations behind a clean interface.
```python
# database.py
class MotionDatabase:
def __init__(self, db_path: str = config.DATABASE_PATH):
self.db_path = db_path
self._init_database()
def get_motion(self, motion_id: int) -> Optional[Dict]:
"""Get a single motion by ID."""
conn = duckdb.connect(self.db_path)
try:
result = conn.execute(
"SELECT * FROM motions WHERE id = ?", (motion_id,)
).fetchone()
return result
finally:
conn.close()
def get_filtered_motions(
self,
policy_area: str = "Alle",
min_margin: float = 0.0,
max_margin: float = 1.0,
limit: int = 10
) -> List[Dict]:
"""Get filtered list of motions."""
...
```
**Usage**: Import the singleton instance for all DB operations.
```python
from database import db
motions = db.get_filtered_motions(policy_area="Klimaat", limit=20)
```
## Facade Pattern
Simplified interfaces over complex subsystems.
### MotionDatabase Facade
```python
# Single entry point for all database operations
db = MotionDatabase() # Singleton instance
# Operations are abstracted:
db.create_session(total_motions)
db.record_vote(session_id, motion_id, vote)
db.get_party_results(session_id)
```
### API Client Facade
```python
# api_client.py
class TweedeKamerAPI:
def __init__(self):
self.session = requests.Session() # Connection pooling
def get_motions(self, start_date, end_date) -> List[Dict]:
"""Simple interface hiding OData pagination details."""
voting_records, besluit_meta = self._get_voting_records(start_date, end_date)
return self._process_voting_records(voting_records, besluit_meta)
```
### MotionScraper Facade
```python
# scraper.py (if used)
class MotionScraper:
def get_motion_content(self, url: str) -> Optional[str]:
"""Extract body text from official website."""
...
```
## Pipeline Pattern
Sequential phases with explicit dependencies:
```
pipeline/run_pipeline.py
├── Phase 1: fetch_mp_metadata
│ └── pipeline/fetch_mp_metadata.py
├── Phase 2: extract_mp_votes
│ └── pipeline/extract_mp_votes.py
├── Phase 3: svd_pipeline
│ └── pipeline/svd_pipeline.py
├── Phase 4: text_pipeline (gap-fill)
│ └── pipeline/text_pipeline.py
└── Phase 5: fusion (combine SVD + text)
└── pipeline/fusion.py
```
### Phase Orchestration
```python
# pipeline/run_pipeline.py
def run(args: argparse.Namespace) -> int:
db = MotionDatabase(args.db_path)
# Phase 1: MP metadata
if not args.skip_metadata:
from pipeline.fetch_mp_metadata import fetch_mp_metadata
fetch_mp_metadata(db_path=db.db_path)
# Phase 2: Extract votes
if not args.skip_extract:
from pipeline.extract_mp_votes import extract_mp_votes
extract_mp_votes(db_path=db.db_path)
# Phase 3: SVD per window
if not args.skip_svd:
from pipeline.svd_pipeline import run_svd_pipeline
run_svd_pipeline(db, windows, args.svd_k)
# ... additional phases
```
## Strategy Pattern
Interchangeable algorithms for axis computation:
```python
# analysis/political_axis.py
def compute_political_axis(
vectors: Dict[str, np.ndarray],
method: str = "pca" # or "anchor"
) -> Tuple[np.ndarray, np.ndarray]:
"""Compute political axis using specified method.
Methods:
- 'pca': Use first principal component
- 'anchor': Use predefined anchor motions
"""
if method == "pca":
return _compute_pca_axis(vectors)
elif method == "anchor":
return _compute_anchor_axis(vectors)
```
## Visitor Pattern
External operations on data structures:
```python
# analysis/trajectory.py
def _procrustes_align_windows(
window_vecs: Dict[str, Dict[str, np.ndarray]],
min_overlap: int = 5,
) -> Dict[str, Dict[str, np.ndarray]]:
"""Align SVD vectors across windows using Procrustes rotations.
Takes the first window as reference and aligns each subsequent window
to it via orthogonal Procrustes on the set of common entities.
"""
```
## Builder Pattern
Configuration via method chaining:
```python
# CLI argument parsing
parser = argparse.ArgumentParser(description="Pipeline runner")
parser.add_argument("--db-path", default="data/motions.db")
parser.add_argument("--start-date", default=None)
parser.add_argument("--end-date", default=None)
parser.add_argument("--window-size", choices=["quarterly", "annual"], default="quarterly")
parser.add_argument("--svd-k", type=int, default=50)
```
## Decorator Pattern
Retry logic for transient failures:
```python
# pipeline/ai_provider_wrapper.py
def get_embeddings_with_retry(
texts: List[str],
retries: int = 3,
batch_size: int = 50,
) -> List[Optional[List[float]]]:
"""Return embeddings with automatic retry on failure."""
for attempt in range(1, retries + 1):
try:
return _embedder(texts, batch_size=len(texts))
except Exception as exc:
if attempt == retries:
break
time.sleep(backoff * (2 ** (attempt - 1)))
return [None] * len(texts) # Safe fallback
```
## Data Patterns
### Batch Processing
Process items in chunks to manage memory and API limits:
```python
for i in range(0, len(items), batch_size):
chunk = items[i:i + batch_size]
process_batch(chunk)
```
### Caching
Pre-compute and store expensive results:
```python
# SimilarityCache table stores computed similarities
db.get_similarity(motion_a, motion_b)
```
### Lazy Loading
Load data only when needed:
```python
class MotionDatabase:
@property
def _connection(self):
if self._conn is None:
self._conn = duckdb.connect(self.db_path)
return self._conn
```
### Vectorization
Use numpy for batch operations:
```python
vectors = np.array([v for v in entity_vectors.values()])
normalized = vectors / np.linalg.norm(vectors, axis=1, keepdims=True)
```