CI : support IOT device (IQ9) (#22987)

* update test scripts

* align CI behavior between linux and android

* remove automatically cancel in 15min

* enable cancel-in-progress

* fix ty check issue

* update and fix pylint issue

* update runner such that we are not restricted by the 15min limit rule

* fix flake8 lint issue

* update runner according to review feedback

* code update according to review feedback

* switch from llama-cli to llama-completion binary with -no-cnv flag
This commit is contained in:
Zack Li
2026-05-14 13:58:34 -07:00
committed by GitHub
parent 834a243664
commit d81e63dcfd
7 changed files with 793 additions and 167 deletions
@@ -58,14 +58,45 @@ jobs:
name: llama-cpp-android-arm64-snapdragon name: llama-cpp-android-arm64-snapdragon
path: pkg-snapdragon/llama.cpp path: pkg-snapdragon/llama.cpp
linux-iot-snapdragon:
runs-on: ubuntu-latest
container:
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
defaults:
run:
shell: bash
steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
lfs: false
- name: Build Llama.CPP for Snapdragon Linux IoT
id: build_llama_cpp_snapdragon_linux
run: |
cp docs/backend/snapdragon/CMakeUserPresets.json .
cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
cmake --build build-snapdragon -j $(nproc)
cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
- name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
uses: actions/upload-artifact@v6
with:
name: llama-cpp-linux-arm64-snapdragon
path: pkg-snapdragon/llama.cpp
test-snapdragon-qdc: test-snapdragon-qdc:
name: Test on QDC Android Device (${{ matrix.device }}) name: Test on QDC Device (${{ matrix.device }})
needs: [android-ndk-snapdragon] needs: [android-ndk-snapdragon, linux-iot-snapdragon]
runs-on: ubuntu-slim runs-on: ubuntu-24.04-arm
timeout-minutes: 90
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
device: [SM8750, SM8650, SM8850] device: [SM8750, SM8850, QCS9075M]
steps: steps:
- name: Checkout - name: Checkout
@@ -74,11 +105,11 @@ jobs:
- name: Download build artifact - name: Download build artifact
uses: actions/download-artifact@v7 uses: actions/download-artifact@v7
with: with:
name: llama-cpp-android-arm64-snapdragon name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
path: pkg-snapdragon/llama.cpp path: pkg-snapdragon/llama.cpp
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v5 uses: actions/setup-python@v6
with: with:
python-version: '3.x' python-version: '3.x'
cache: pip cache: pip
@@ -107,7 +138,8 @@ jobs:
--test all \ --test all \
--pkg-dir pkg-snapdragon/llama.cpp \ --pkg-dir pkg-snapdragon/llama.cpp \
--model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \ --model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
--device ${{ matrix.device }} --device ${{ matrix.device }} \
${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
env: env:
QDC_API_KEY: ${{ secrets.QDC_API_KEY }} QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
+368 -85
View File
@@ -1,4 +1,4 @@
"""Run llama.cpp Hexagon Android tests in a single QDC Appium job. """Run llama.cpp Hexagon tests in a single QDC job.
Bundles test scripts into one artifact and submits a single QDC job: Bundles test scripts into one artifact and submits a single QDC job:
@@ -10,6 +10,10 @@ Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
Prerequisites: Prerequisites:
pip install /path/to/qualcomm_device_cloud_sdk*.whl pip install /path/to/qualcomm_device_cloud_sdk*.whl
Platform is inferred from --device:
android Appium + pytest (Android phones: SM8750 / SM8650 / SM8850)
linux BASH (Linux IoT: QCS9075M)
Required environment variables: Required environment variables:
QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys
@@ -23,6 +27,7 @@ Usage:
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import enum
import logging import logging
import os import os
import re import re
@@ -30,15 +35,35 @@ import shutil
import sys import sys
import tempfile import tempfile
import time import time
import urllib.request
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Callable
from qualcomm_device_cloud_sdk.api import qdc_api # ty: ignore[unresolved-import] from qualcomm_device_cloud_sdk.api import qdc_api
from qualcomm_device_cloud_sdk.logging import configure_logging # ty: ignore[unresolved-import] from qualcomm_device_cloud_sdk.logging import configure_logging
from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework # ty: ignore[unresolved-import] from qualcomm_device_cloud_sdk.models import (
ArtifactType,
JobMode,
JobState,
JobSubmissionParameter,
JobType,
TestFramework,
)
# configure_logging only sets up the SDK logger; basicConfig is needed for
# our own log.info to reach stdout.
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(name)s %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()],
)
configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()]) configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
# Silence per-poll GET/status spam from the SDK and its HTTP client.
logging.getLogger("qualcomm_device_cloud").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
POLL_INTERVAL = 30 POLL_INTERVAL = 30
@@ -47,23 +72,56 @@ LOG_UPLOAD_TIMEOUT = 600
CAPACITY_TIMEOUT = 1800 CAPACITY_TIMEOUT = 1800
CAPACITY_POLL = 60 CAPACITY_POLL = 60
MAX_CONCURRENT_JOBS = 5 MAX_CONCURRENT_JOBS = 5
DEFAULT_RETRIES = 0
RETRY_DELAY = 300
TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED} TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED}
NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED} NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
_SCRIPTS_DIR = Path(__file__).parent
_TESTS_DIR = _SCRIPTS_DIR / "tests"
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
_UTILS = _TESTS_DIR / "utils.py"
_CONFTEST = _TESTS_DIR / "conftest.py"
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
class DeviceUnavailableError(Exception):
"""Raised when the QDC device resource is not available (retryable)."""
_SCRIPTS_DIR = Path(__file__).parent
_TESTS_DIR = _SCRIPTS_DIR / "tests"
# --- Shared test assets -------------------------------------------------------
_UTILS = _TESTS_DIR / "utils.py"
_CONFTEST = _TESTS_DIR / "conftest.py"
_PYTEST_LINE_RE = re.compile( _PYTEST_LINE_RE = re.compile(
r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)" r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
) )
_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"} _EXCLUDED_LOGS = {
"qdc_android_whole_host-000.log",
"qdc_kernel_host-000.log",
"qdc_LE_whole_host-000.log",
"qdc_LE_kernel_host-000.log",
"script.log",
}
_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES} _NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
# --- Android (Appium + pytest) assets ----------------------------------------
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
_UPSTREAM_ADB_SCRIPTS = (
"https://raw.githubusercontent.com/ggml-org/llama.cpp/master/scripts/snapdragon/adb"
)
_ADB_SCRIPT_NAMES = [
"run-bench.sh",
"run-cli.sh",
"run-completion.sh",
"run-tool.sh",
]
# --- Linux (BASH) assets ------------------------------------------------------
_RUN_LINUX_TEMPLATE = _TESTS_DIR / "linux" / "run_linux.sh"
_LINUX_ENTRY_SCRIPT = "/bin/bash /data/local/tmp/TestContent/run_linux.sh"
# =============================================================================
# Artifact builders (per platform)
# =============================================================================
@dataclass @dataclass
class JobResult: class JobResult:
@@ -73,35 +131,58 @@ class JobResult:
failure_details: dict[str, str] = field(default_factory=dict) failure_details: dict[str, str] = field(default_factory=dict)
def build_artifact_zip( def _write_lf(path: Path, content: str) -> None:
"""Write text with LF line endings (required by /bin/bash on Linux)."""
with open(path, "w", encoding="utf-8", newline="\n") as f:
f.write(content)
def _build_android_artifact(
pkg_dir: Path, pkg_dir: Path,
stage_dir: Path, stage_dir: Path,
*, test_mode: str,
test_mode: str = "bench", model_url: str | None,
model_url: str | None = None,
) -> Path: ) -> Path:
"""Bundle everything into a single QDC artifact zip. """Android zip (Appium/pytest). Extracted by QDC under /qdc/appium/.
Zip structure (extracted by QDC to /qdc/appium/ on the runner): Zip structure:
llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/) llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/)
run-{bench,cli,completion,tool}.sh upstream adb wrappers (patched)
tests/ tests/
utils.py shared helpers (paths, run_adb_command, …) utils.py shared adb helpers
conftest.py shared pytest fixtures (driver) conftest.py Appium pytest fixtures
test_bench_posix.py bench + cli tests (<<MODEL_URL>> substituted) test_bench_posix.py bench + cli tests (for --test bench or all)
AND/OR test_backend_ops_posix.py test-backend-ops on HTP0
test_backend_ops_posix.py test-backend-ops -b HTP0
requirements.txt requirements.txt
pytest.ini addopts = --junitxml=results.xml
""" """
shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle") bundle_dir = stage_dir / "llama_cpp_bundle"
shutil.copytree(pkg_dir, bundle_dir)
# Download upstream adb scripts so they land at /qdc/appium/ on the QDC
# runner. They wrap `adb shell` internally. Patch in `chmod +x bin/* lib/*`
# right after `cd $basedir` so device binaries are executable.
for name in _ADB_SCRIPT_NAMES:
url = f"{_UPSTREAM_ADB_SCRIPTS}/{name}"
dest = stage_dir / name
log.info("Downloading %s", url)
urllib.request.urlretrieve(url, str(dest))
content = dest.read_text()
content = content.replace(
"cd $basedir;",
"cd $basedir; chmod +x bin/* lib/* 2>/dev/null;",
)
dest.write_text(content)
dest.chmod(0o755)
tests_dir = stage_dir / "tests" tests_dir = stage_dir / "tests"
tests_dir.mkdir() tests_dir.mkdir()
shutil.copy(_UTILS, tests_dir / "utils.py") shutil.copy(_UTILS, tests_dir / "utils.py")
shutil.copy(_CONFTEST, tests_dir / "conftest.py") shutil.copy(_CONFTEST, tests_dir / "conftest.py")
if test_mode in ("bench", "all"): if test_mode in ("bench", "all"):
assert model_url is not None, "--model-url is required for bench/all test modes" assert model_url is not None
(tests_dir / "test_bench_posix.py").write_text( (tests_dir / "test_bench_posix.py").write_text(
_RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url) _RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
) )
@@ -109,33 +190,140 @@ def build_artifact_zip(
shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py") shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt") shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
(stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n") (stage_dir / "pytest.ini").write_text(
"[pytest]\naddopts = --junitxml=results.xml\n"
)
zip_base = str(stage_dir / "artifact") zip_base = str(stage_dir / "artifact")
shutil.make_archive(zip_base, "zip", stage_dir) shutil.make_archive(zip_base, "zip", stage_dir)
return Path(f"{zip_base}.zip") return Path(f"{zip_base}.zip")
def _build_linux_artifact(
pkg_dir: Path,
stage_dir: Path,
test_mode: str,
model_url: str | None,
) -> Path:
"""Linux IoT zip (BASH framework). Extracted by QDC to /data/local/tmp/TestContent/.
Zip structure:
run_linux.sh entry script (placeholder-substituted, LF line endings)
llama_cpp_bundle/ installed package
"""
bundle_dir = stage_dir / "llama_cpp_bundle"
shutil.copytree(pkg_dir, bundle_dir)
template = _RUN_LINUX_TEMPLATE.read_text(encoding="utf-8")
rendered = template.replace("{MODEL_URL}", model_url or "").replace(
"{TEST_MODE}", test_mode
)
script_path = stage_dir / "run_linux.sh"
_write_lf(script_path, rendered)
script_path.chmod(0o755)
zip_base = str(stage_dir / "artifact")
shutil.make_archive(zip_base, "zip", stage_dir)
return Path(f"{zip_base}.zip")
# =============================================================================
# Platform enum + strategy table
# =============================================================================
class Platform(enum.Enum):
ANDROID = "android"
LINUX = "linux"
@dataclass(frozen=True)
class PlatformSpec:
test_framework: TestFramework
entry_script: str | None
build_artifact: Callable[[Path, Path, str, str | None], Path]
job_name_fmt: str
PLATFORM_SPECS: dict[Platform, PlatformSpec] = {
Platform.ANDROID: PlatformSpec(
test_framework=TestFramework.APPIUM,
entry_script=None,
build_artifact=_build_android_artifact,
job_name_fmt="{base}",
),
Platform.LINUX: PlatformSpec(
test_framework=TestFramework.BASH,
entry_script=_LINUX_ENTRY_SCRIPT,
build_artifact=_build_linux_artifact,
job_name_fmt="{base} (Linux)",
),
}
DEVICE_PLATFORM: dict[str, Platform] = {
"SM8750": Platform.ANDROID,
"SM8650": Platform.ANDROID,
"SM8850": Platform.ANDROID,
"QCS9075M": Platform.LINUX,
}
# =============================================================================
# Shared QDC job plumbing
# =============================================================================
def wait_for_job(client, job_id: str, timeout: int) -> str: def wait_for_job(client, job_id: str, timeout: int) -> str:
elapsed = 0 elapsed = 0
last_state = None
consecutive_errors = 0
max_consecutive_errors = 5
while elapsed < timeout: while elapsed < timeout:
raw = qdc_api.get_job_status(client, job_id) try:
raw = qdc_api.get_job_status(client, job_id)
consecutive_errors = 0
except Exception as e:
consecutive_errors += 1
log.warning(
"Transient error polling job %s (%d/%d): %s",
job_id,
consecutive_errors,
max_consecutive_errors,
e,
)
if consecutive_errors >= max_consecutive_errors:
raise
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
continue
try: try:
status = JobState(raw) status = JobState(raw)
except ValueError: except ValueError:
status = raw status = raw
if status in TERMINAL_STATES: if status in TERMINAL_STATES:
return raw.lower() return raw.lower()
log.info("Job %s: %s", job_id, raw) if raw != last_state:
log.info("Job %s: %s", job_id, raw)
last_state = raw
time.sleep(POLL_INTERVAL) time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL elapsed += POLL_INTERVAL
# Abort to free the QDC concurrency slot instead of leaking it.
try:
qdc_api.abort_job(client, job_id)
log.warning("Aborted job %s after timeout to free concurrency slot", job_id)
except Exception as e:
log.warning("Failed to abort job %s: %s", job_id, e)
raise TimeoutError(f"Job {job_id} did not finish within {timeout}s") raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
def wait_for_log_upload(client, job_id: str) -> None: def wait_for_log_upload(client, job_id: str) -> None:
elapsed = 0 elapsed = 0
while elapsed <= LOG_UPLOAD_TIMEOUT: while elapsed <= LOG_UPLOAD_TIMEOUT:
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower() try:
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
except Exception as e:
log.warning("get_job_log_upload_status failed: %s — will retry", e)
status = ""
if status in {"completed", "failed"}: if status in {"completed", "failed"}:
return return
log.info("Waiting for log upload (status=%s) ...", status) log.info("Waiting for log upload (status=%s) ...", status)
@@ -150,17 +338,33 @@ def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
while elapsed < CAPACITY_TIMEOUT: while elapsed < CAPACITY_TIMEOUT:
jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50) jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
if jobs_page is None: if jobs_page is None:
log.warning("Could not retrieve job list; proceeding without capacity check") log.warning(
"Could not retrieve job list; proceeding without capacity check"
)
return return
items = getattr(jobs_page, "data", []) or [] items = getattr(jobs_page, "data", []) or []
active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES) active = sum(
1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES
)
if active < max_jobs: if active < max_jobs:
log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs) log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
return return
log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL) log.info(
"Active QDC jobs: %d / %d — waiting %ds ...",
active,
max_jobs,
CAPACITY_POLL,
)
time.sleep(CAPACITY_POLL) time.sleep(CAPACITY_POLL)
elapsed += CAPACITY_POLL elapsed += CAPACITY_POLL
log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT) raise TimeoutError(
f"Capacity wait timed out after {CAPACITY_TIMEOUT}s"
)
# ---------------------------------------------------------------------------
# Log parsing helpers
# ---------------------------------------------------------------------------
def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]: def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
@@ -192,10 +396,26 @@ def _parse_pytest_output(content: str) -> dict[str, bool]:
def fetch_logs_and_parse_tests( def fetch_logs_and_parse_tests(
client, job_id: str client, job_id: str, max_retries: int = 5, retry_delay: int = 30
) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]: ) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
"""Returns (test_results, raw_logs, failure_details).""" """Returns (test_results, raw_logs, failure_details)."""
log_files = qdc_api.get_job_log_files(client, job_id) log_files = None
for attempt in range(1, max_retries + 1):
try:
log_files = qdc_api.get_job_log_files(client, job_id)
break
except Exception as e:
if attempt < max_retries:
log.warning(
"get_job_log_files failed (attempt %d/%d): %s — retrying in %ds",
attempt, max_retries, e, retry_delay,
)
time.sleep(retry_delay)
else:
log.error(
"get_job_log_files failed after %d attempts: %s", max_retries, e
)
return {}, {}, {}
if not log_files: if not log_files:
log.warning("No log files returned for job %s", job_id) log.warning("No log files returned for job %s", job_id)
return {}, {}, {} return {}, {}, {}
@@ -207,8 +427,8 @@ def fetch_logs_and_parse_tests(
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
for lf in log_files: for lf in log_files:
log.info("Downloading log file: %s", lf.filename)
zip_path = os.path.join(tmpdir, "log.zip") zip_path = os.path.join(tmpdir, "log.zip")
log.info("Downloading log file: %s", lf.filename)
qdc_api.download_job_log_files(client, lf.filename, zip_path) qdc_api.download_job_log_files(client, lf.filename, zip_path)
try: try:
shutil.unpack_archive(zip_path, tmpdir, "zip") shutil.unpack_archive(zip_path, tmpdir, "zip")
@@ -226,12 +446,15 @@ def fetch_logs_and_parse_tests(
elif fname.endswith(".log"): elif fname.endswith(".log"):
if fname in _EXCLUDED_LOGS: if fname in _EXCLUDED_LOGS:
continue continue
log.info("--- %s ---", fname) log.info("--- %s ---\n%s", fname, content)
log.info("%s", content)
raw_logs[fname] = content raw_logs[fname] = content
pytest_fallback.update(_parse_pytest_output(content)) pytest_fallback.update(_parse_pytest_output(content))
return (test_results if test_results else pytest_fallback), raw_logs, failure_details return (
(test_results if test_results else pytest_fallback),
raw_logs,
failure_details,
)
def write_summary(result: JobResult, title: str = "QDC Test Results") -> None: def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
@@ -289,30 +512,106 @@ def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
f.write("\n".join(lines) + "\n") f.write("\n".join(lines) + "\n")
# =============================================================================
# CLI + main
# =============================================================================
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser( p = argparse.ArgumentParser(
description=__doc__, description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
) )
p.add_argument("--pkg-dir", required=True, type=Path, p.add_argument("--pkg-dir", required=True, type=Path,
help="Installed llama.cpp package directory (contains bin/ and lib/)") help="Installed llama.cpp package directory (contains bin/ and lib/)")
p.add_argument("--model-url", p.add_argument("--model-url",
help="Direct URL to the GGUF model file (required for --test bench)") help="Direct URL to the GGUF model file (required for --test bench)")
p.add_argument("--device", required=True, p.add_argument("--device", required=True,
help="QDC chipset name, e.g. SM8750") help="QDC chipset name, e.g. SM8750")
p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench", p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
help="Test suite to run (default: bench)") help="Test suite to run (default: bench)")
p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS", p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})") help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
p.add_argument("--retries", type=int, default=DEFAULT_RETRIES, metavar="N",
help="Number of retries when device is unavailable (default: 0)")
p.add_argument("--retry-delay", type=int, default=RETRY_DELAY, metavar="SECONDS",
help=f"Seconds to wait between retries (default: {RETRY_DELAY})")
args = p.parse_args() args = p.parse_args()
if args.test in ("bench", "all") and not args.model_url: if args.test in ("bench", "all") and not args.model_url:
p.error("--model-url is required when --test bench or --test all") p.error("--model-url is required when --test bench or --test all")
return args return args
def _submit_and_run_job(client, args, spec, target_id, artifact_id) -> JobResult:
"""Submit a QDC job and wait for results.
Raises DeviceUnavailableError for transient device/resource issues that
are worth retrying. Returns JobResult for definitive outcomes (pass or
test failure).
"""
try:
wait_for_capacity(client)
except TimeoutError:
raise DeviceUnavailableError("Capacity wait timed out — device busy")
job_name = spec.job_name_fmt.format(base="llama.cpp Hexagon tests")
job_id = qdc_api.submit_job(
public_api_client=client,
target_id=target_id,
job_name=job_name,
external_job_id=None,
job_type=JobType.AUTOMATED,
job_mode=JobMode.APPLICATION,
timeout=max(1, args.job_timeout // 60),
test_framework=spec.test_framework,
entry_script=spec.entry_script,
job_artifacts=[artifact_id],
monkey_events=None,
monkey_session_timeout=None,
job_parameters=[JobSubmissionParameter.WIFIENABLED],
)
if job_id is None:
raise DeviceUnavailableError("Job submission failed — device may be unavailable")
log.info("Job submitted: %s (device=%s)", job_id, args.device)
try:
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
except TimeoutError as e:
raise DeviceUnavailableError(str(e))
log.info("Job %s finished: %s", job_id, job_status)
wait_for_log_upload(client, job_id)
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
job_ok = job_status == JobState.COMPLETED.value.lower()
if not job_ok and not tests:
raise DeviceUnavailableError(
f"Job did not complete (status={job_status}) and produced no test results"
)
passed = job_ok and all(tests.values()) if tests else job_ok
if spec.test_framework == TestFramework.BASH and not tests:
log.error("No test results recovered (state=%s). Script likely never ran.", job_status)
passed = False
if not passed:
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
return JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
def main() -> int: def main() -> int:
args = parse_args() args = parse_args()
platform = DEVICE_PLATFORM.get(args.device)
if platform is None:
log.error(
"Unknown device %r. Known: %s",
args.device, ", ".join(sorted(DEVICE_PLATFORM.keys())),
)
return 1
spec = PLATFORM_SPECS[platform]
api_key = os.environ.get("QDC_API_KEY") api_key = os.environ.get("QDC_API_KEY")
if not api_key: if not api_key:
log.error("QDC_API_KEY environment variable must be set") log.error("QDC_API_KEY environment variable must be set")
@@ -334,10 +633,9 @@ def main() -> int:
return 1 return 1
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
log.info("Building artifact ...") log.info("Building %s artifact (test=%s) ...", platform.value, args.test)
zip_path = build_artifact_zip( zip_path = spec.build_artifact(
args.pkg_dir, Path(tmpdir), args.pkg_dir, Path(tmpdir), args.test, args.model_url
test_mode=args.test, model_url=args.model_url,
) )
log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000) log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT) artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
@@ -346,46 +644,31 @@ def main() -> int:
log.error("Artifact upload failed") log.error("Artifact upload failed")
return 1 return 1
wait_for_capacity(client) max_attempts = 1 + args.retries
for attempt in range(1, max_attempts + 1):
job_id = qdc_api.submit_job( try:
public_api_client=client, result = _submit_and_run_job(client, args, spec, target_id, artifact_id)
target_id=target_id, break
job_name="llama.cpp Hexagon tests", except DeviceUnavailableError as e:
external_job_id=None, if attempt < max_attempts:
job_type=JobType.AUTOMATED, log.warning(
job_mode=JobMode.APPLICATION, "Attempt %d/%d failed (device unavailable): %s — retrying in %ds",
timeout=max(1, args.job_timeout // 60), attempt, max_attempts, e, args.retry_delay,
test_framework=TestFramework.APPIUM, )
entry_script=None, time.sleep(args.retry_delay)
job_artifacts=[artifact_id], else:
monkey_events=None, log.error(
monkey_session_timeout=None, "Attempt %d/%d failed (device unavailable): %s — no retries left",
job_parameters=[JobSubmissionParameter.WIFIENABLED], attempt, max_attempts, e,
) )
if job_id is None: write_summary(
log.error("Job submission failed") JobResult(passed=False, tests={}),
title=f"QDC Device Unavailable ({args.device})",
)
return 1
else:
return 1 return 1
log.info("Job submitted: %s (device=%s)", job_id, args.device)
try:
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
except TimeoutError as e:
log.error("%s", e)
write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
return 1
log.info("Job %s finished: %s", job_id, job_status)
wait_for_log_upload(client, job_id)
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
passed = job_status == JobState.COMPLETED.value.lower()
if tests:
passed = passed and all(tests.values())
if not passed:
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
if args.test == "backend-ops": if args.test == "backend-ops":
title = f"Backend Ops — HTP0 ({args.device})" title = f"Backend Ops — HTP0 ({args.device})"
elif args.test == "all": elif args.test == "all":
@@ -394,7 +677,7 @@ def main() -> int:
title = f"QDC Test Results ({args.device})" title = f"QDC Test Results ({args.device})"
write_summary(result, title=title) write_summary(result, title=title)
return 0 if passed else 1 return 0 if result.passed else 1
if __name__ == "__main__": if __name__ == "__main__":
@@ -0,0 +1,232 @@
#!/bin/bash
# llama.cpp Hexagon test entry script for QDC Linux IoT (BASH framework).
#
# Placeholders substituted by run_qdc_jobs.py (--platform linux) before upload:
# {MODEL_URL} direct URL to a .gguf model file
# {TEST_MODE} bench | backend-ops | all
#
# QDC extracts the artifact zip to /data/local/tmp/TestContent/ and invokes
# this script via: /bin/bash /data/local/tmp/TestContent/run_linux.sh
# Any files written under /data/local/tmp/QDC_logs/ are auto-uploaded.
set +e
umask 022
LOG_DIR=/data/local/tmp/QDC_logs
BUNDLE_DIR=/data/local/tmp/TestContent/llama_cpp_bundle
MODEL_DIR=/data/local/tmp/gguf
MODEL_PATH="$MODEL_DIR/model.gguf"
RESULTS_XML="$LOG_DIR/results.xml"
mkdir -p "$LOG_DIR" "$MODEL_DIR"
# Redirect all parent-shell output to script.log so QDC auto-uploads it;
# per-case runs still capture their own stdout/stderr into dedicated logs.
exec > "$LOG_DIR/script.log" 2>&1
echo "=== env ==="
date -u
uname -a
pwd
mount -o rw,remount / 2>/dev/null || true
cd "$BUNDLE_DIR" || { echo "FATAL: bundle missing at $BUNDLE_DIR"; exit 1; }
chmod +x bin/* 2>/dev/null
export LD_LIBRARY_PATH="$BUNDLE_DIR/lib:$LD_LIBRARY_PATH"
export ADSP_LIBRARY_PATH="$BUNDLE_DIR/lib"
export GGML_HEXAGON_EXPERIMENTAL=1
echo "=== download model ==="
MODEL_URL="{MODEL_URL}"
if [ -z "$MODEL_URL" ]; then
echo "No model URL provided, skipping download"
elif [ ! -f "$MODEL_PATH" ]; then
curl -L -fS --retry 3 --retry-delay 5 -o "$MODEL_PATH" "$MODEL_URL"
curl_rc=$?
if [ $curl_rc -ne 0 ]; then
echo "FATAL: model download failed (rc=$curl_rc)"
exit 1
fi
ls -la "$MODEL_PATH"
fi
# ---------------------------------------------------------------------------
# JUnit XML helpers
# ---------------------------------------------------------------------------
xml_open() {
printf '%s\n' \
'<?xml version="1.0" encoding="utf-8"?>' \
"<testsuites>" \
"<testsuite name=\"llama_cpp_linux\">" \
> "$RESULTS_XML"
}
xml_close() {
printf '%s\n' '</testsuite>' '</testsuites>' >> "$RESULTS_XML"
}
xml_case_pass() {
local classname=$1 name=$2
printf '<testcase classname="%s" name="%s"/>\n' "$classname" "$name" >> "$RESULTS_XML"
}
xml_case_fail() {
local classname=$1 name=$2 rc=$3 logfile=$4
{
printf '<testcase classname="%s" name="%s">\n' "$classname" "$name"
printf '<failure message="exit %s"><![CDATA[\n' "$rc"
tail -c 4096 "$logfile" 2>/dev/null | sed 's/]]>/]] >/g'
printf '\n]]></failure>\n</testcase>\n'
} >> "$RESULTS_XML"
}
# Map backend name -> "NDEV --device" pair. "none" means no offload (CPU).
backend_env() {
case "$1" in
cpu) echo "0 none" ;;
gpu) echo "0 GPUOpenCL" ;;
npu) echo "1 HTP0" ;;
esac
}
backend_log_name() {
case "$1" in
cpu) echo "cpu" ;;
gpu) echo "gpu" ;;
npu) echo "htp" ;;
esac
}
backend_device_name() {
case "$1" in
cpu) echo "none" ;;
gpu) echo "GPUOpenCL" ;;
npu) echo "HTP0" ;;
esac
}
# Append a diagnostic block when a per-case `timeout N` fires (rc=124). The
# naked log file at that point usually just ends mid-OpenCL-init with no
# stderr, which is hard to read in CI summaries.
note_timeout_if_triggered() {
local rc=$1 budget=$2 log=$3
[ "$rc" -eq 124 ] || return 0
{
printf '\n'
printf '=== TIMEOUT after %ss ===\n' "$budget"
printf 'uptime: '; uptime 2>/dev/null
printf 'free -m:\n'; free -m 2>/dev/null
printf 'loadavg: '; cat /proc/loadavg 2>/dev/null
} >> "$log"
}
completion_extra_args() {
case "$1" in
cpu) echo "--device none --ctx-size 128 -no-cnv -n 32 --seed 42 --batch-size 128" ;;
gpu) echo "--device GPUOpenCL --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 512" ;;
npu) echo "--device HTP0 --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 1024" ;;
esac
}
run_completion_case() {
local name=$1
local parts=($(backend_env "$name"))
local ndev=${parts[0]} device=${parts[1]}
local device_log_name=$(backend_device_name "$name")
local log="$LOG_DIR/llama_completion_${device_log_name}.log"
local prompt="$LOG_DIR/bench_prompt.txt"
echo 'What is the capital of France?' > "$prompt"
local extra
extra=$(completion_extra_args "$name")
echo "=== [completion:$name] llama-completion --device $device (NDEV=$ndev) ==="
timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-completion \
-m "$MODEL_PATH" \
-f "$prompt" \
$extra \
> "$log" 2>&1 < /dev/null
local rc=$?
note_timeout_if_triggered "$rc" 600 "$log"
if [ $rc -eq 0 ]; then
xml_case_pass "tests.test_bench_posix" "test_llama_completion[$name]"
else
xml_case_fail "tests.test_bench_posix" "test_llama_completion[$name]" "$rc" "$log"
fi
}
run_bench_case() {
local name=$1
local parts=($(backend_env "$name"))
local ndev=${parts[0]} device=${parts[1]}
local log_suffix=$(backend_log_name "$name")
local log="$LOG_DIR/llama_bench_${log_suffix}.log"
echo "=== [bench:$name] llama-bench --device $device (NDEV=$ndev) ==="
timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-bench \
-m "$MODEL_PATH" \
--device "$device" \
-ngl 99 \
--batch-size 128 \
-t 4 \
-p 128 \
-n 32 \
> "$log" 2>&1
local rc=$?
note_timeout_if_triggered "$rc" 600 "$log"
if [ $rc -eq 0 ]; then
xml_case_pass "tests.test_bench_posix" "test_llama_bench[$name]"
else
xml_case_fail "tests.test_bench_posix" "test_llama_bench[$name]" "$rc" "$log"
fi
}
run_backend_ops_case() {
local dtype=$1
local log="$LOG_DIR/backend_ops_${dtype}.log"
local pattern
case "$dtype" in
q4_0)
# Matches Android: exclude a known-broken shape on NPU.
pattern='^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
;;
*)
pattern="type_a=${dtype}"
;;
esac
echo "=== [backend-ops:$dtype] test-backend-ops -b HTP0 -o MUL_MAT ==="
timeout 600 env GGML_HEXAGON_NDEV=1 GGML_HEXAGON_HOSTBUF=0 ./bin/test-backend-ops \
-b HTP0 -o MUL_MAT -p "$pattern" \
> "$log" 2>&1
local rc=$?
note_timeout_if_triggered "$rc" 600 "$log"
if [ $rc -eq 0 ]; then
xml_case_pass "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]"
else
xml_case_fail "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]" "$rc" "$log"
fi
}
xml_open
case "{TEST_MODE}" in
bench)
for b in cpu gpu npu; do run_completion_case "$b"; done
for b in cpu gpu npu; do run_bench_case "$b"; done
;;
backend-ops)
for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
;;
all)
for b in cpu gpu npu; do run_completion_case "$b"; done
for b in cpu gpu npu; do run_bench_case "$b"; done
for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
;;
*)
echo "FATAL: unsupported TEST_MODE={TEST_MODE}"
;;
esac
xml_close
echo "=== done ==="
# Host parses results.xml to decide pass/fail.
exit 0
@@ -1,8 +1,9 @@
""" """
On-device test-backend-ops runner for llama.cpp (HTP0 backend). On-device test-backend-ops runner for llama.cpp (HTP0 backend).
Executed by QDC's Appium test framework on the QDC runner. On Android: executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device. The runner has ADB access to the allocated device.
On Linux: runs test-backend-ops directly via run_linux.sh (BASH framework).
""" """
import os import os
@@ -10,7 +11,12 @@ import sys
import pytest import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log from utils import (
BIN_PATH,
push_bundle_if_needed,
run_script,
write_qdc_log,
)
@pytest.fixture(scope="session", autouse=True) @pytest.fixture(scope="session", autouse=True)
@@ -20,17 +26,21 @@ def install(driver):
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"]) @pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
def test_backend_ops_htp0(type_a): def test_backend_ops_htp0(type_a):
cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
if type_a == "q4_0": if type_a == "q4_0":
cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"' pattern = r'^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
else: else:
cmd += f" -p type_a={type_a}" pattern = f"type_a={type_a}"
result = run_adb_command(
cmd, quoted_pattern = f'"{pattern}"' if type_a == "q4_0" else pattern
check=False, result = run_script(
"run-tool.sh",
extra_env={"HB": "0"},
extra_args=["test-backend-ops", "-b", "HTP0", "-o", "MUL_MAT", "-p", quoted_pattern],
) )
write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "") write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})" assert result.returncode == 0, (
f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
)
if __name__ == "__main__": if __name__ == "__main__":
@@ -1,11 +1,13 @@
""" """
On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends). On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
Executed by QDC's Appium test framework on the QDC runner. On Android: calls upstream run-*.sh scripts from llama.cpp/scripts/snapdragon/adb/
The runner has ADB access to the allocated device. on the QDC runner host (scripts wrap commands in ``adb shell`` internally).
On Linux: runs llama-bench directly via run_linux.sh (BASH framework).
Placeholders replaced at artifact creation time by run_qdc_jobs.py: Placeholders replaced at artifact creation time by run_qdc_jobs.py:
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl) <<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device)
""" """
import os import os
@@ -14,58 +16,75 @@ import sys
import pytest import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log from utils import (
BIN_PATH,
MODEL_DEVICE_PATH,
MODEL_NAME,
PROMPT_DIR,
push_bundle_if_needed,
run_adb_command,
run_script,
write_qdc_log,
)
MODEL_PATH = "/data/local/tmp/model.gguf" MODEL_URL = "<<MODEL_URL>>"
PROMPT = "What is the capital of France?"
CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
@pytest.fixture(scope="session", autouse=True) @pytest.fixture(scope="session", autouse=True)
def install(driver): def install(driver):
push_bundle_if_needed(f"{BIN_PATH}/llama-cli") push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
run_adb_command(f"mkdir -p /data/local/tmp/gguf {PROMPT_DIR}")
# Skip model download if already present run_adb_command(f"echo 'What is the capital of France?' > {PROMPT_DIR}/bench_prompt.txt")
check = subprocess.run( check = subprocess.run(
["adb", "shell", f"ls {MODEL_PATH}"], ["adb", "shell", f"ls {MODEL_DEVICE_PATH}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
) )
if check.returncode != 0: if check.returncode != 0:
run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"') run_adb_command(f'curl -L -J --output {MODEL_DEVICE_PATH} "{MODEL_URL}"')
@pytest.mark.parametrize("device,extra_flags", [ @pytest.mark.parametrize(
pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"), "device",
pytest.param("GPUOpenCL", "", id="gpu"), [
pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"), pytest.param("none", id="cpu"),
]) pytest.param("GPUOpenCL", id="gpu"),
def test_llama_completion(device, extra_flags): pytest.param("HTP0", id="npu"),
result = run_adb_command( ],
f'{CMD_PREFIX} {BIN_PATH}/llama-completion' )
f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on' def test_llama_completion(device):
f' -p "{PROMPT}"', result = run_script(
check=False, "run-completion.sh",
extra_env={"D": device, "M": MODEL_NAME},
extra_args=["--batch-size", "128", "-n", "128", "--seed", "42",
"-f", f"{PROMPT_DIR}/bench_prompt.txt"],
) )
write_qdc_log(f"llama_completion_{device}.log", result.stdout or "") write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})" assert result.returncode == 0, (
f"llama-completion {device} failed (exit {result.returncode})"
)
_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"} _DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
@pytest.mark.parametrize("device", [ @pytest.mark.parametrize(
pytest.param("none", id="cpu"), "device",
pytest.param("GPUOpenCL", id="gpu"), [
pytest.param("HTP0", id="npu"), pytest.param("none", id="cpu"),
]) pytest.param("GPUOpenCL", id="gpu"),
pytest.param("HTP0", id="npu"),
],
)
def test_llama_bench(device): def test_llama_bench(device):
result = run_adb_command( result = run_script(
f"{CMD_PREFIX} {BIN_PATH}/llama-bench" "run-bench.sh",
f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32", extra_env={"D": device, "M": MODEL_NAME},
check=False, extra_args=["--batch-size", "128", "-p", "128", "-n", "32"],
) )
write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "") write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})" assert result.returncode == 0, (
f"llama-bench {device} failed (exit {result.returncode})"
)
if __name__ == "__main__": if __name__ == "__main__":
+82 -32
View File
@@ -1,5 +1,7 @@
"""Shared helpers for QDC on-device test runners.""" """Shared helpers for QDC on-device test runners."""
from __future__ import annotations
import logging import logging
import os import os
import subprocess import subprocess
@@ -13,16 +15,14 @@ log = logging.getLogger(__name__)
# On-device paths # On-device paths
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle" BUNDLE_PATH = "/data/local/tmp/llama.cpp"
BIN_PATH = f"{BUNDLE_PATH}/bin"
LIB_PATH = f"{BUNDLE_PATH}/lib"
QDC_LOGS_PATH = "/data/local/tmp/QDC_logs" QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
LIB_PATH = f"{BUNDLE_PATH}/lib" SCRIPTS_DIR = "/qdc/appium"
BIN_PATH = f"{BUNDLE_PATH}/bin" MODEL_NAME = "model.gguf"
ENV_PREFIX = ( MODEL_DEVICE_PATH = "/data/local/tmp/gguf/model.gguf"
f"export LD_LIBRARY_PATH={LIB_PATH} && " PROMPT_DIR = "/data/local/tmp/scorecard_prompts"
f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
f"chmod +x {BIN_PATH}/* &&"
)
CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Appium session options # Appium session options
@@ -34,16 +34,47 @@ options.set_capability("platformName", "Android")
options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION")) options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ADB helpers # Shell / process helpers
# ---------------------------------------------------------------------------
def write_qdc_log(filename: str, content: str) -> None:
"""Write content as a log file for QDC log collection."""
subprocess.run(
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
f.write(content)
tmp_path = f.name
try:
subprocess.run(
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
finally:
os.unlink(tmp_path)
def ensure_bundle(check_binary: str | None = None) -> None:
"""Ensure the llama_cpp_bundle is available on the target device."""
push_bundle_if_needed(check_binary or f"{BIN_PATH}/llama-cli")
# ---------------------------------------------------------------------------
# Android / Linux host helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess: def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
# Append exit-code sentinel because `adb shell` doesn't reliably propagate """Run a command on-device via ``adb shell`` with exit-code sentinel."""
# the on-device exit code (older ADB versions always return 0).
raw = subprocess.run( raw = subprocess.run(
["adb", "shell", f"{cmd}; echo __RC__:$?"], ["adb", "shell", f"{cmd}; echo __RC__:$?"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
) )
stdout = raw.stdout stdout = raw.stdout
returncode = raw.returncode returncode = raw.returncode
@@ -55,39 +86,58 @@ def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProc
stdout = "\n".join(lines[:-1]) + "\n" stdout = "\n".join(lines[:-1]) + "\n"
except ValueError: except ValueError:
pass pass
log.info("%s", stdout) log.info(stdout)
result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout) result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
if check: if check:
assert returncode == 0, f"Command failed (exit {returncode})" assert returncode == 0, f"Command failed (exit {returncode})"
return result return result
def write_qdc_log(filename: str, content: str) -> None: def run_script(
"""Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection.""" script: str,
subprocess.run( extra_env: dict[str, str] | None = None,
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"], extra_args: list[str] | None = None,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) -> subprocess.CompletedProcess:
"""Run an upstream shell script from /qdc/appium/ on the QDC runner host."""
env = os.environ.copy()
env["GGML_HEXAGON_EXPERIMENTAL"] = "1"
if extra_env:
env.update(extra_env)
cmd = [f"{SCRIPTS_DIR}/{script}"] + (extra_args or [])
result = subprocess.run(
cmd, env=env,
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
log.info(result.stdout)
return result
def adb_shell(cmd: str) -> None:
"""Run a command via adb shell (fire-and-forget, no error check)."""
subprocess.run(
["adb", "shell", "sh", "-c", cmd],
capture_output=True, encoding="utf-8", errors="replace", check=False,
) )
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
f.write(content)
tmp_path = f.name
try:
subprocess.run(
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
finally:
os.unlink(tmp_path)
def push_bundle_if_needed(check_binary: str) -> None: def push_bundle_if_needed(check_binary: str) -> None:
"""Push llama_cpp_bundle to the device if check_binary is not already present.""" """Push llama_cpp_bundle to the device if check_binary is not already present."""
result = subprocess.run( result = subprocess.run(
["adb", "shell", f"ls {check_binary}"], ["adb", "shell", f"ls {check_binary}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
) )
if result.returncode != 0: if result.returncode != 0:
subprocess.run( subprocess.run(
["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"], ["adb", "push", "/qdc/appium/llama_cpp_bundle/", BUNDLE_PATH],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
subprocess.run(
["adb", "shell", f"find {BUNDLE_PATH}/bin -type f -exec chmod 755 {{}} +"],
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
) )
+1 -1
View File
@@ -13,7 +13,7 @@ exclude = [
[[overrides]] [[overrides]]
include = [ include = [
"./tools/server/tests/**", "./tools/server/tests/**",
"./scripts/snapdragon/qdc/tests/**", "./scripts/snapdragon/qdc/**",
] ]
[overrides.rules] [overrides.rules]