#!/usr/bin/env python3
"""
fetch_all_system_notes.py

Demonstrates how to walk past Chartstone's 500,000-row /suiteql cap by
issuing a sequence of bounded queries, then writes the combined result
as a single TOON document.

Approach: ID-RANGE PAGINATION
-----------------------------
Most NetSuite tables (systemNote among them) have an auto-incrementing
numeric `id`. We use that to slice the table into id-bounded chunks:

    SELECT <cols> FROM systemNote WHERE id BETWEEN <a> AND <b>

This avoids the cost of `ORDER BY id` over a 1.5M-row table on every
call — a "cursor" pattern works but adds a full sort to every batch.
A bounded range scan can use the id index directly and is several
times faster per row.

If your target table doesn't have a contiguous numeric id, the cursor
pattern is the right alternative:

    SELECT <cols> FROM <table> WHERE id > <last_seen> ORDER BY id

It's slower but works on any monotonically-orderable column.

Output is a single TOON document — Chartstone's native LLM-friendly
encoding (sent natively when you put `Accept: application/toon` on any
JSON endpoint). This script uses JSON internally so the cursor / id
parsing is straightforward, then encodes the saved file as TOON.

Usage:
    export CHARTSTONE_PORT=58541
    export CHARTSTONE_SECRET=...
    python3 fetch_all_system_notes.py [output.toon]

Requires Python 3.9+. No third-party packages.
"""

import json
import os
import sys
import time
import urllib.error
import urllib.request

# ---------- config ----------------------------------------------------------

PORT = os.environ.get("CHARTSTONE_PORT", "58541")
SECRET = os.environ.get("CHARTSTONE_SECRET")
if not SECRET:
    sys.exit("Set the CHARTSTONE_SECRET environment variable.")

OUTPUT_PATH = sys.argv[1] if len(sys.argv) > 1 else "system_notes.toon"

# Columns to pull. Pick the ones useful for whatever the consumer plans
# to do with the data — fewer columns = smaller file = cheaper LLM context.
COLUMNS = ["id", "date", "recordtypeid", "type", "field", "name"]

# Width of each id range, in id units. With ~75% density on systemNote,
# 150K-id ranges return ~110K rows each, well under the /suiteql 500K
# cap and finishing inside NetSuite's 5-min per-script timeout. Smaller
# ranges trade fewer-rows-per-call for more round-trips. Tune to taste.
ID_RANGE_WIDTH = 150_000

# How long Chartstone should wait for one underlying SuiteScript run
# before giving up. NS's own ceiling is 30 minutes; we keep this at the
# default 5 min (300_000) and rely on small range widths to stay under it.
SCRIPT_TIMEOUT_MS = 5 * 60 * 1000

# HTTP-side timeout — slightly longer than SCRIPT_TIMEOUT_MS so
# Chartstone's own timeout fires first.
HTTP_TIMEOUT_SECONDS = (SCRIPT_TIMEOUT_MS // 1000) + 60

URL = f"http://127.0.0.1:{PORT}/suiteql"


# ---------- HTTP ------------------------------------------------------------


def post_query(query: str) -> dict:
    """POST a SuiteQL query and return the parsed JSON response."""
    body = {"query": query, "timeoutMs": SCRIPT_TIMEOUT_MS}
    req = urllib.request.Request(
        URL,
        data=json.dumps(body).encode("utf-8"),
        headers={
            "Authorization": f"Bearer {SECRET}",
            "Content-Type": "application/json",
            "Accept": "application/json",
        },
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT_SECONDS) as resp:
            return json.load(resp)
    except urllib.error.HTTPError as e:
        sys.exit(f"HTTP {e.code} from /suiteql:\n{e.read().decode('utf-8', 'replace')}")


# ---------- TOON encoder ----------------------------------------------------
#
# Minimal encoder for our specific shape: a flat array of records with
# uniform columns. Mirrors the format Chartstone's @toon-format encoder
# produces: `items[N]{cols}:` followed by indented comma-separated rows.


def _to_toon_cell(value: object) -> str:
    if value is None:
        return "null"
    if isinstance(value, bool):
        return "true" if value else "false"
    if isinstance(value, (int, float)):
        return str(value)
    s = str(value)
    needs_quoting = (
        ("," in s)
        or ('"' in s)
        or ("\n" in s)
        or s != s.strip()
    )
    if needs_quoting:
        return '"' + s.replace('"', '\\"') + '"'
    return s


def encode_toon(rows: list, columns: list) -> str:
    out = [f"items[{len(rows)}]{{{','.join(columns)}}}:"]
    for row in rows:
        out.append("  " + ",".join(_to_toon_cell(row.get(c)) for c in columns))
    return "\n".join(out) + "\n"


# ---------- main ------------------------------------------------------------


def main() -> None:
    started = time.time()

    # Step 1: probe the id span and total row count so we can plan ranges
    # and report progress against a known target.
    print("probing systemNote id range ...", flush=True)
    probe = post_query(
        "SELECT MIN(id) AS min_id, MAX(id) AS max_id, COUNT(*) AS n FROM systemNote"
    )
    info = probe["items"][0]
    min_id = int(info["min_id"])
    max_id = int(info["max_id"])
    expected = int(info["n"])
    span = max_id - min_id + 1
    n_ranges = (span + ID_RANGE_WIDTH - 1) // ID_RANGE_WIDTH
    print(
        f"  id range {min_id:,} → {max_id:,} ({span:,} ids); "
        f"{expected:,} rows expected; {n_ranges} batches @ "
        f"{ID_RANGE_WIDTH:,} ids each.",
        flush=True,
    )

    # Step 2: walk id ranges left to right.
    all_rows: list = []
    cols_select = ",".join(COLUMNS)
    for batch, start in enumerate(range(min_id, max_id + 1, ID_RANGE_WIDTH), 1):
        end = min(start + ID_RANGE_WIDTH - 1, max_id)
        t0 = time.time()
        result = post_query(
            f"SELECT {cols_select} FROM systemNote WHERE id BETWEEN {start} AND {end}"
        )
        items = result.get("items", [])
        all_rows.extend(items)
        elapsed = time.time() - t0
        cap = bool(result.get("capReached"))
        print(
            f"batch {batch:>3}/{n_ranges}: "
            f"id {start:>9,}–{end:<9,}  "
            f"{len(items):>7,} rows  "
            f"(total {len(all_rows):>9,} / {expected:,};  "
            f"capReached={str(cap).lower()};  "
            f"{elapsed:5.1f}s)",
            flush=True,
        )
        # capReached=true here means our 150K-id range packed in more rows
        # than expected and hit the 500K /suiteql cap. If you see this in
        # practice, lower ID_RANGE_WIDTH. Won't happen on systemNote with
        # default settings.
        if cap:
            print(
                f"  ! capReached on a single range — lower ID_RANGE_WIDTH and rerun.",
                file=sys.stderr,
            )

    elapsed = time.time() - started
    rate = len(all_rows) / max(elapsed, 0.001)
    print(
        f"\nfetched {len(all_rows):,} rows in {elapsed:.1f}s "
        f"({rate:,.0f} rows/sec).",
        flush=True,
    )
    if len(all_rows) != expected:
        print(
            f"  ! row count differs from initial probe ({expected:,}); "
            "rows may have been added or deleted while paginating.",
            file=sys.stderr,
        )

    # Step 3: write the file.
    print(f"writing {OUTPUT_PATH} ...", flush=True)
    with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
        f.write(encode_toon(all_rows, COLUMNS))
    size_mb = os.path.getsize(OUTPUT_PATH) / 1_000_000
    print(f"done. {OUTPUT_PATH} ({size_mb:.1f} MB).")


if __name__ == "__main__":
    main()
