You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
129 lines
4.0 KiB
129 lines
4.0 KiB
#!/usr/bin/env python3
|
|
"""Query Tweede Kamer OData endpoints to locate motion body text.
|
|
|
|
This script performs the API calls described in the task and prints
|
|
structured information about responses (status code, keys, candidate
|
|
fields that may contain text or content URLs).
|
|
|
|
File: tools/query_tk_api.py
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from urllib.parse import quote
|
|
|
|
try:
|
|
import requests
|
|
except Exception:
|
|
print("missing requests library", file=sys.stderr)
|
|
raise
|
|
|
|
|
|
BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
|
|
ZAAK_ID = "e6fd62f1-29be-4955-9811-03d46da2fc3a"
|
|
|
|
|
|
def try_get(path):
|
|
url = BASE.rstrip("/") + "/" + path.lstrip("/")
|
|
print("\nGET", url)
|
|
r = requests.get(url, headers={"Accept": "application/json"})
|
|
print("->", r.status_code, r.headers.get("Content-Type"))
|
|
# try to print JSON keys or text length
|
|
ct = r.headers.get("Content-Type", "")
|
|
if "application/json" in ct or r.text.strip().startswith("{"):
|
|
try:
|
|
j = r.json()
|
|
print("JSON keys:", list(j.keys()))
|
|
# pretty-print limited
|
|
print("JSON preview:", json.dumps(j, indent=2)[:4000])
|
|
return j
|
|
except Exception as e:
|
|
print("failed to parse json:", e)
|
|
else:
|
|
print("text length:", len(r.content))
|
|
print("headers:", dict(r.headers))
|
|
print("first 800 bytes:\n", r.content[:800])
|
|
return None
|
|
|
|
|
|
def main():
|
|
# 1. Zaak expand Document
|
|
tried = []
|
|
patterns = [
|
|
f"Zaak({ZAAK_ID})?$expand=Document",
|
|
f"Zaak(guid'{ZAAK_ID}')?$expand=Document",
|
|
f"Zaak('{ZAAK_ID}')?$expand=Document",
|
|
]
|
|
zaak_json = None
|
|
for p in patterns:
|
|
tried.append(p)
|
|
zaak_json = try_get(p)
|
|
if zaak_json and "Document" in (zaak_json.get("value") or zaak_json):
|
|
break
|
|
|
|
# If top-level 'value' exists (collection), try to find first
|
|
if zaak_json and "value" in zaak_json:
|
|
# If API returned a collection, pick first
|
|
val = zaak_json["value"]
|
|
if isinstance(val, list) and val:
|
|
zaak = val[0]
|
|
else:
|
|
zaak = None
|
|
else:
|
|
zaak = zaak_json
|
|
|
|
print("\n--- Zaak object (extracted) ---")
|
|
print(json.dumps(zaak, indent=2)[:4000])
|
|
|
|
docs = []
|
|
if zaak:
|
|
# Document may be navigation property 'Document' or 'Documents'
|
|
for key in ("Document", "Documents"):
|
|
if key in zaak:
|
|
val = zaak[key]
|
|
if isinstance(val, list):
|
|
docs.extend(val)
|
|
elif isinstance(val, dict):
|
|
docs.append(val)
|
|
|
|
print("\nFound", len(docs), "Document entries")
|
|
for i, d in enumerate(docs):
|
|
print("\n--- Document", i, "---")
|
|
print(json.dumps(d, indent=2)[:4000])
|
|
|
|
# 2. Try DocumentVersie endpoint
|
|
# We'll attempt: DocumentVersie?$filter=DocumentId eq guid'...'
|
|
for d in docs:
|
|
doc_id = d.get("Id") or d.get("DocumentId") or d.get("IdDocument")
|
|
if not doc_id:
|
|
# maybe OData provided @odata.id
|
|
if "@odata.id" in d:
|
|
# extract id from URI - last segment
|
|
seg = d["@odata.id"].rstrip("/").split("/")[-1]
|
|
doc_id = seg
|
|
if not doc_id:
|
|
continue
|
|
print("\nQuerying DocumentVersie for Document id:", doc_id)
|
|
q1 = f"DocumentVersie?$filter=DocumentId%20eq%20guid'{doc_id}'"
|
|
j = try_get(q1)
|
|
# also try expanding from Document
|
|
q2 = f"Document({quote(doc_id)})?$expand=DocumentVersie"
|
|
j2 = try_get(q2)
|
|
# try direct DocumentVersie by key
|
|
q3 = f"DocumentVersie(guid'{doc_id}')"
|
|
j3 = try_get(q3)
|
|
|
|
# 3. Try content stream patterns
|
|
candidates = [
|
|
f"Document({quote(doc_id)})/Content",
|
|
f"Document({quote(doc_id)})/$value",
|
|
f"Document({quote(doc_id)})/Inhoud",
|
|
f"Resource('{doc_id}')",
|
|
f"Resource({quote(doc_id)})",
|
|
]
|
|
for c in candidates:
|
|
try_get(c)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|