motief/tools/query_tk_api.py

#!/usr/bin/env python3
"""Query Tweede Kamer OData endpoints to locate motion body text.

This script performs the API calls described in the task and prints
structured information about responses (status code, keys, candidate
fields that may contain text or content URLs).

File: tools/query_tk_api.py
"""

import json
import sys
from urllib.parse import quote

try:
    import requests
except Exception:
    print("missing requests library", file=sys.stderr)
    raise


BASE = "https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0"
ZAAK_ID = "e6fd62f1-29be-4955-9811-03d46da2fc3a"


def try_get(path):
    url = BASE.rstrip("/") + "/" + path.lstrip("/")
    print("\nGET", url)
    r = requests.get(url, headers={"Accept": "application/json"})
    print("->", r.status_code, r.headers.get("Content-Type"))
    # try to print JSON keys or text length
    ct = r.headers.get("Content-Type", "")
    if "application/json" in ct or r.text.strip().startswith("{"):
        try:
            j = r.json()
            print("JSON keys:", list(j.keys()))
            # pretty-print limited
            print("JSON preview:", json.dumps(j, indent=2)[:4000])
            return j
        except Exception as e:
            print("failed to parse json:", e)
    else:
        print("text length:", len(r.content))
        print("headers:", dict(r.headers))
        print("first 800 bytes:\n", r.content[:800])
    return None


def main():
    # 1. Zaak expand Document
    tried = []
    patterns = [
        f"Zaak({ZAAK_ID})?$expand=Document",
        f"Zaak(guid'{ZAAK_ID}')?$expand=Document",
        f"Zaak('{ZAAK_ID}')?$expand=Document",
    ]
    zaak_json = None
    for p in patterns:
        tried.append(p)
        zaak_json = try_get(p)
        if zaak_json and "Document" in (zaak_json.get("value") or zaak_json):
            break

    # If top-level 'value' exists (collection), try to find first
    if zaak_json and "value" in zaak_json:
        # If API returned a collection, pick first
        val = zaak_json["value"]
        if isinstance(val, list) and val:
            zaak = val[0]
        else:
            zaak = None
    else:
        zaak = zaak_json

    print("\n--- Zaak object (extracted) ---")
    print(json.dumps(zaak, indent=2)[:4000])

    docs = []
    if zaak:
        # Document may be navigation property 'Document' or 'Documents'
        for key in ("Document", "Documents"):
            if key in zaak:
                val = zaak[key]
                if isinstance(val, list):
                    docs.extend(val)
                elif isinstance(val, dict):
                    docs.append(val)

    print("\nFound", len(docs), "Document entries")
    for i, d in enumerate(docs):
        print("\n--- Document", i, "---")
        print(json.dumps(d, indent=2)[:4000])

    # 2. Try DocumentVersie endpoint
    # We'll attempt: DocumentVersie?$filter=DocumentId eq guid'...'
    for d in docs:
        doc_id = d.get("Id") or d.get("DocumentId") or d.get("IdDocument")
        if not doc_id:
            # maybe OData provided @odata.id
            if "@odata.id" in d:
                # extract id from URI - last segment
                seg = d["@odata.id"].rstrip("/").split("/")[-1]
                doc_id = seg
        if not doc_id:
            continue
        print("\nQuerying DocumentVersie for Document id:", doc_id)
        q1 = f"DocumentVersie?$filter=DocumentId%20eq%20guid'{doc_id}'"
        j = try_get(q1)
        # also try expanding from Document
        q2 = f"Document({quote(doc_id)})?$expand=DocumentVersie"
        j2 = try_get(q2)
        # try direct DocumentVersie by key
        q3 = f"DocumentVersie(guid'{doc_id}')"
        j3 = try_get(q3)

        # 3. Try content stream patterns
        candidates = [
            f"Document({quote(doc_id)})/Content",
            f"Document({quote(doc_id)})/$value",
            f"Document({quote(doc_id)})/Inhoud",
            f"Resource('{doc_id}')",
            f"Resource({quote(doc_id)})",
        ]
        for c in candidates:
            try_get(c)


if __name__ == "__main__":
    main()