pi-extensions/scripts/ai-complete

#!/usr/bin/env bash
# ai-complete — minimal CLI for the home AI server.
#
# Usage:
#   ai-complete <model> "<prompt>"                       # one-shot completion
#   echo "prompt" | ai-complete <model>                  # stdin prompt
#   ai-complete --list                                   # list available models
#   ai-complete --load <model>                           # warm up a model (cold load can take 1-3 min)
#   ai-complete --status                                 # show currently loaded model
#   ai-complete --raw <model> '<full-json-body>'         # send full /v1/chat/completions body
#   ai-complete --stream <model> "<prompt>"              # streaming SSE output
#   ai-complete --temperature N --top-p N <model> "..."  # sampling overrides
#
# Auth: uses the client cert at ~/.pi/agent/certs/{client.pem,client-key.pem}
# Override with $AI_CERT_DIR.
#
# Output: prints just the assistant text to stdout (no JSON wrapping).
# Errors go to stderr and exit non-zero.

set -euo pipefail

CERT_DIR="${AI_CERT_DIR:-$HOME/.pi/agent/certs}"
ENDPOINT="${AI_ENDPOINT:-https://ai.shahondin1624.de}"
CRT="$CERT_DIR/client.pem"
KEY="$CERT_DIR/client-key.pem"

[[ -f "$CRT" && -f "$KEY" ]] || { echo "missing $CRT or $KEY (set AI_CERT_DIR)" >&2; exit 1; }
command -v jq >/dev/null || { echo "needs jq" >&2; exit 1; }

curl_args=(-sS --cert "$CRT" --key "$KEY")

case "${1:-}" in
    --list)
        curl "${curl_args[@]}" "$ENDPOINT/v1/models" | jq -r '.data[].id'
        ;;
    --status)
        # Router exposes load state at /models (not /v1/models) under .data[].status.value
        out=$(curl "${curl_args[@]}" "$ENDPOINT/models" | jq -r '.data[] | select(.status.value=="loaded") | .id')
        if [[ -z "$out" ]]; then echo "none"; else echo "$out"; fi
        ;;
    --load)
        # No dedicated load endpoint; the router auto-loads on first request.
        # Send a 1-token completion to trigger the load and wait until it's done.
        model="$2"
        body=$(jq -n --arg m "$model" '{model:$m, messages:[{role:"user",content:"."}], max_tokens:1, stream:false}')
        curl "${curl_args[@]}" -X POST --max-time 600 "$ENDPOINT/v1/chat/completions" -H 'Content-Type: application/json' -d "$body" >/dev/null
        echo "loaded: $model"
        ;;
    --raw)
        # body source: $3 (argv-capped) or @path syntax to read body from file
        model="$2"; body_arg="$3"
        body_path=$(mktemp); trap 'rm -f "$body_path"' EXIT
        if [[ "${body_arg:0:1}" == "@" ]]; then
            cp "${body_arg:1}" "$body_path"
        else
            printf '%s' "$body_arg" > "$body_path"
        fi
        curl "${curl_args[@]}" -X POST "$ENDPOINT/v1/chat/completions" -H 'Content-Type: application/json' --data-binary @"$body_path"
        ;;
    --stream)
        shift
        # Reuse the same prompt-source logic as the default path
        prompt_path=""
        cleanup_paths=()
        trap 'rm -f "${cleanup_paths[@]}"' EXIT
        if [[ "${1:-}" == "--prompt-file" ]]; then
            [[ -f "$2" ]] || { echo "prompt-file not found: $2" >&2; exit 1; }
            prompt_path="$2"; shift 2
        fi
        model="$1"; shift
        if [[ -z "$prompt_path" ]]; then
            prompt_path=$(mktemp); cleanup_paths+=("$prompt_path")
            if [[ -n "${1:-}" ]]; then printf '%s' "$1" > "$prompt_path"
            else cat > "$prompt_path"; fi
        fi
        body_path=$(mktemp); cleanup_paths+=("$body_path")
        jq -n --arg m "$model" --rawfile p "$prompt_path" \
            '{model:$m,messages:[{role:"user",content:$p}],stream:true}' > "$body_path"
        curl "${curl_args[@]}" -N -X POST "$ENDPOINT/v1/chat/completions" \
            -H 'Content-Type: application/json' --data-binary @"$body_path" \
            | sed -u 's/^data: //' \
            | grep -v '^$' \
            | grep -v '^\[DONE\]' \
            | jq -j --unbuffered '.choices[0].delta.content // empty'
        echo
        ;;
    --help|-h)
        sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//'
        ;;
    *)
        # Default: one-shot non-streaming completion
        # Optional flags: --temperature N --top-p N --max-tokens N
        opts='{}'
        while [[ "${1:-}" == --* ]]; do
            case "$1" in
                --temperature)  opts=$(jq --argjson v "$2" '.temperature=$v'  <<<"$opts"); shift 2 ;;
                --top-p)        opts=$(jq --argjson v "$2" '."top_p"=$v'      <<<"$opts"); shift 2 ;;
                --max-tokens)   opts=$(jq --argjson v "$2" '."max_tokens"=$v' <<<"$opts"); shift 2 ;;
                --system)       opts=$(jq --arg v "$2"   '.system=$v'         <<<"$opts"); shift 2 ;;
                --prompt-file)  opts=$(jq --arg v "$2"   '."_prompt_file"=$v' <<<"$opts"); shift 2 ;;
                *) echo "unknown flag $1" >&2; exit 1 ;;
            esac
        done
        model="${1:-}"
        [[ -n "$model" ]] || { echo "usage: ai-complete <model> \"prompt\"" >&2; exit 1; }
        # Materialize the prompt to a temp file so it never travels through any argv
        # (argv is capped at MAX_ARG_STRLEN = 128KB per element on Linux).
        # Source priority: --prompt-file (zero-copy) > $2 (capped at 128KB) > stdin (uncapped).
        prompt_file=$(jq -r '."_prompt_file" // empty' <<<"$opts")
        opts=$(jq 'del(."_prompt_file")' <<<"$opts")
        prompt_path=""
        cleanup_paths=()
        trap 'rm -f "${cleanup_paths[@]}"' EXIT
        if [[ -n "$prompt_file" ]]; then
            [[ -f "$prompt_file" ]] || { echo "prompt-file not found: $prompt_file" >&2; exit 1; }
            prompt_path="$prompt_file"
        elif [[ -n "${2:-}" ]]; then
            prompt_path=$(mktemp); cleanup_paths+=("$prompt_path")
            printf '%s' "$2" > "$prompt_path"
        else
            prompt_path=$(mktemp); cleanup_paths+=("$prompt_path")
            cat > "$prompt_path"
        fi

        # Build the full request body in a single jq call — uses --rawfile for the prompt
        # so it's read from disk, not argv. Write the body to a temp file too so curl -d @file
        # avoids the same argv cap.
        sys=$(jq -r '.system // empty' <<<"$opts")
        body_path=$(mktemp); cleanup_paths+=("$body_path")
        jq -n --arg m "$model" --arg s "$sys" --rawfile p "$prompt_path" --argjson o "$opts" \
            'def msgs: if $s == "" then [{role:"user",content:$p}] else [{role:"system",content:$s},{role:"user",content:$p}] end;
             {model:$m, messages:msgs, stream:false} + ($o | del(.system))' > "$body_path"

        # Retry loop: model auto-load can take 1-3 min for big models. On the first call
        # after eviction, the response can be empty / non-JSON / a 503 — wait and retry.
        attempts="${AI_RETRIES:-4}"
        delay=2
        text=""
        last_resp=""
        resp_path=$(mktemp); cleanup_paths+=("$resp_path")
        for ((i=1; i<=attempts; i++)); do
            curl "${curl_args[@]}" -X POST --max-time 600 \
                "$ENDPOINT/v1/chat/completions" -H 'Content-Type: application/json' \
                --data-binary @"$body_path" -o "$resp_path" 2>/dev/null || true
            last_resp=$(cat "$resp_path" 2>/dev/null || echo "")
            text=$(jq -r '.choices[0].message.content // empty' "$resp_path" 2>/dev/null || echo "")
            if [[ -n "$text" ]]; then break; fi
            # If the response is structured JSON with an "error" key (eg 400 exceed_context_size),
            # don't retry — it's a real client-side error, not a transient load timeout.
            err=$(jq -r '.error.message // empty' "$resp_path" 2>/dev/null || echo "")
            if [[ -n "$err" ]]; then
                echo "[ai-complete] API error (no retry): $err" >&2
                break
            fi
            echo "[ai-complete] attempt $i/$attempts produced no text (likely model loading), retrying in ${delay}s…" >&2
            sleep "$delay"
            delay=$(( delay * 2 ))
        done
        if [[ -n "$text" ]]; then
            printf '%s\n' "$text"
        else
            echo "[ai-complete] gave up after $attempts attempts. Last response:" >&2
            echo "$last_resp" >&2
            exit 1
        fi
        ;;
esac