You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
motief/tools/query_tk_api.py

129 lines
4.0 KiB

#!/usr/bin/env python3
"""Query Tweede Kamer OData endpoints to locate motion body text.
This script performs the API calls described in the task and prints
structured information about responses (status code, keys, candidate
fields that may contain text or content URLs).
File: tools/query_tk_api.py
"""
import json
import sys
from urllib.parse import quote
try:
import requests
except Exception:
print("missing requests library", file=sys.stderr)
raise
BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
ZAAK_ID = "e6fd62f1-29be-4955-9811-03d46da2fc3a"
def try_get(path):
url = BASE.rstrip("/") + "/" + path.lstrip("/")
print("\nGET", url)
r = requests.get(url, headers={"Accept": "application/json"})
print("->", r.status_code, r.headers.get("Content-Type"))
# try to print JSON keys or text length
ct = r.headers.get("Content-Type", "")
if "application/json" in ct or r.text.strip().startswith("{"):
try:
j = r.json()
print("JSON keys:", list(j.keys()))
# pretty-print limited
print("JSON preview:", json.dumps(j, indent=2)[:4000])
return j
except Exception as e:
print("failed to parse json:", e)
else:
print("text length:", len(r.content))
print("headers:", dict(r.headers))
print("first 800 bytes:\n", r.content[:800])
return None
def main():
# 1. Zaak expand Document
tried = []
patterns = [
f"Zaak({ZAAK_ID})?$expand=Document",
f"Zaak(guid'{ZAAK_ID}')?$expand=Document",
f"Zaak('{ZAAK_ID}')?$expand=Document",
]
zaak_json = None
for p in patterns:
tried.append(p)
zaak_json = try_get(p)
if zaak_json and "Document" in (zaak_json.get("value") or zaak_json):
break
# If top-level 'value' exists (collection), try to find first
if zaak_json and "value" in zaak_json:
# If API returned a collection, pick first
val = zaak_json["value"]
if isinstance(val, list) and val:
zaak = val[0]
else:
zaak = None
else:
zaak = zaak_json
print("\n--- Zaak object (extracted) ---")
print(json.dumps(zaak, indent=2)[:4000])
docs = []
if zaak:
# Document may be navigation property 'Document' or 'Documents'
for key in ("Document", "Documents"):
if key in zaak:
val = zaak[key]
if isinstance(val, list):
docs.extend(val)
elif isinstance(val, dict):
docs.append(val)
print("\nFound", len(docs), "Document entries")
for i, d in enumerate(docs):
print("\n--- Document", i, "---")
print(json.dumps(d, indent=2)[:4000])
# 2. Try DocumentVersie endpoint
# We'll attempt: DocumentVersie?$filter=DocumentId eq guid'...'
for d in docs:
doc_id = d.get("Id") or d.get("DocumentId") or d.get("IdDocument")
if not doc_id:
# maybe OData provided @odata.id
if "@odata.id" in d:
# extract id from URI - last segment
seg = d["@odata.id"].rstrip("/").split("/")[-1]
doc_id = seg
if not doc_id:
continue
print("\nQuerying DocumentVersie for Document id:", doc_id)
q1 = f"DocumentVersie?$filter=DocumentId%20eq%20guid'{doc_id}'"
j = try_get(q1)
# also try expanding from Document
q2 = f"Document({quote(doc_id)})?$expand=DocumentVersie"
j2 = try_get(q2)
# try direct DocumentVersie by key
q3 = f"DocumentVersie(guid'{doc_id}')"
j3 = try_get(q3)
# 3. Try content stream patterns
candidates = [
f"Document({quote(doc_id)})/Content",
f"Document({quote(doc_id)})/$value",
f"Document({quote(doc_id)})/Inhoud",
f"Resource('{doc_id}')",
f"Resource({quote(doc_id)})",
]
for c in candidates:
try_get(c)
if __name__ == "__main__":
main()