CI : support IOT device (IQ9) (#22987)

* update test scripts * align CI behavior between linux and android * remove automatically cancel in 15min * enable cancel-in-progress * fix ty check issue * update and fix pylint issue * update runner such that we are not restricted by the 15min limit rule * fix flake8 lint issue * update runner according to review feedback * code update according to review feedback * switch from llama-cli to llama-completion binary with -no-cnv flag
2026-05-14 13:58:34 -07:00
parent 834a243664
commit d81e63dcfd
7 changed files with 793 additions and 167 deletions
@@ -58,14 +58,45 @@ jobs:
          name: llama-cpp-android-arm64-snapdragon
          path: pkg-snapdragon/llama.cpp
  linux-iot-snapdragon:
    runs-on: ubuntu-latest
    container:
      image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
    defaults:
      run:
        shell: bash
    steps:
      - name: Clone
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: false
      - name: Build Llama.CPP for Snapdragon Linux IoT
        id: build_llama_cpp_snapdragon_linux
        run: |
          cp docs/backend/snapdragon/CMakeUserPresets.json .
          cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
          cmake --build build-snapdragon -j $(nproc)
          cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
      - name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
        if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
        uses: actions/upload-artifact@v6
        with:
          name: llama-cpp-linux-arm64-snapdragon
          path: pkg-snapdragon/llama.cpp
  test-snapdragon-qdc:
-    name: Test on QDC Android Device (${{ matrix.device }})
+    name: Test on QDC Device (${{ matrix.device }})
-    needs: [android-ndk-snapdragon]
+    needs: [android-ndk-snapdragon, linux-iot-snapdragon]
-    runs-on: ubuntu-slim
+    runs-on: ubuntu-24.04-arm
    timeout-minutes: 90
    strategy:
      fail-fast: false
      matrix:
-        device: [SM8750, SM8650, SM8850]
+        device: [SM8750, SM8850, QCS9075M]
    steps:
      - name: Checkout
@@ -74,11 +105,11 @@ jobs:
      - name: Download build artifact
        uses: actions/download-artifact@v7
        with:
-          name: llama-cpp-android-arm64-snapdragon
+          name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
          path: pkg-snapdragon/llama.cpp
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
          python-version: '3.x'
          cache: pip
@@ -107,7 +138,8 @@ jobs:
              --test       all \
              --pkg-dir    pkg-snapdragon/llama.cpp \
              --model-url  "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
-              --device     ${{ matrix.device }}
+              --device     ${{ matrix.device }} \
              ${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
        env:
          QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
@@ -1,4 +1,4 @@
-"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
+"""Run llama.cpp Hexagon tests in a single QDC job.
 Bundles test scripts into one artifact and submits a single QDC job:
@@ -10,6 +10,10 @@ Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
 Prerequisites:
  pip install /path/to/qualcomm_device_cloud_sdk*.whl
 Platform is inferred from --device:
  android  Appium + pytest (Android phones: SM8750 / SM8650 / SM8850)
  linux    BASH (Linux IoT: QCS9075M)
 Required environment variables:
  QDC_API_KEY   API key from QDC UI -> Users -> Settings -> API Keys
@@ -23,6 +27,7 @@ Usage:
 from __future__ import annotations
 import argparse
 import enum
 import logging
 import os
 import re
@@ -30,15 +35,35 @@ import shutil
 import sys
 import tempfile
 import time
 import urllib.request
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Callable
-from qualcomm_device_cloud_sdk.api import qdc_api  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.api import qdc_api
-from qualcomm_device_cloud_sdk.logging import configure_logging  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.logging import configure_logging
-from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.models import (
    ArtifactType,
    JobMode,
    JobState,
    JobSubmissionParameter,
    JobType,
    TestFramework,
 )
 # configure_logging only sets up the SDK logger; basicConfig is needed for
 # our own log.info to reach stdout.
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)s %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler()],
 )
 configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
 # Silence per-poll GET/status spam from the SDK and its HTTP client.
 logging.getLogger("qualcomm_device_cloud").setLevel(logging.WARNING)
 logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.getLogger("httpcore").setLevel(logging.WARNING)
 log = logging.getLogger(__name__)
 POLL_INTERVAL        = 30
@@ -47,23 +72,56 @@ LOG_UPLOAD_TIMEOUT   = 600
 CAPACITY_TIMEOUT     = 1800
 CAPACITY_POLL        = 60
 MAX_CONCURRENT_JOBS  = 5
 DEFAULT_RETRIES      = 0
 RETRY_DELAY          = 300
 TERMINAL_STATES     = {JobState.COMPLETED, JobState.CANCELED}
 NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
 _SCRIPTS_DIR      = Path(__file__).parent
 _TESTS_DIR        = _SCRIPTS_DIR / "tests"
 _RUN_BENCH        = _TESTS_DIR / "run_bench_tests_posix.py"
 _RUN_BACKEND_OPS  = _TESTS_DIR / "run_backend_ops_posix.py"
 _UTILS            = _TESTS_DIR / "utils.py"
 _CONFTEST         = _TESTS_DIR / "conftest.py"
 _REQUIREMENTS     = _SCRIPTS_DIR / "requirements.txt"
 class DeviceUnavailableError(Exception):
    """Raised when the QDC device resource is not available (retryable)."""
 _SCRIPTS_DIR = Path(__file__).parent
 _TESTS_DIR = _SCRIPTS_DIR / "tests"
 # --- Shared test assets -------------------------------------------------------
 _UTILS = _TESTS_DIR / "utils.py"
 _CONFTEST = _TESTS_DIR / "conftest.py"
 _PYTEST_LINE_RE = re.compile(
    r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
 )
-_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
+_EXCLUDED_LOGS = {
    "qdc_android_whole_host-000.log",
    "qdc_kernel_host-000.log",
    "qdc_LE_whole_host-000.log",
    "qdc_LE_kernel_host-000.log",
    "script.log",
 }
 _NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
 # --- Android (Appium + pytest) assets ----------------------------------------
 _RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
 _RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
 _REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
 _UPSTREAM_ADB_SCRIPTS = (
    "https://raw.githubusercontent.com/ggml-org/llama.cpp/master/scripts/snapdragon/adb"
 )
 _ADB_SCRIPT_NAMES = [
    "run-bench.sh",
    "run-cli.sh",
    "run-completion.sh",
    "run-tool.sh",
 ]
 # --- Linux (BASH) assets ------------------------------------------------------
 _RUN_LINUX_TEMPLATE = _TESTS_DIR / "linux" / "run_linux.sh"
 _LINUX_ENTRY_SCRIPT = "/bin/bash /data/local/tmp/TestContent/run_linux.sh"
 # =============================================================================
 # Artifact builders (per platform)
 # =============================================================================
@dataclass
 class JobResult:
@@ -73,35 +131,58 @@ class JobResult:
    failure_details: dict[str, str] = field(default_factory=dict)
-def build_artifact_zip(
+def _write_lf(path: Path, content: str) -> None:
    """Write text with LF line endings (required by /bin/bash on Linux)."""
    with open(path, "w", encoding="utf-8", newline="\n") as f:
        f.write(content)
 def _build_android_artifact(
    pkg_dir: Path,
    stage_dir: Path,
-    *,
+    test_mode: str,
-    test_mode: str = "bench",
+    model_url: str | None,
    model_url: str | None = None,
 ) -> Path:
-    """Bundle everything into a single QDC artifact zip.
+    """Android zip (Appium/pytest). Extracted by QDC under /qdc/appium/.
-    Zip structure (extracted by QDC to /qdc/appium/ on the runner):
+    Zip structure:
      llama_cpp_bundle/            installed package (adb pushed to /data/local/tmp/)
      run-{bench,cli,completion,tool}.sh  upstream adb wrappers (patched)
      tests/
-        utils.py                   shared helpers (paths, run_adb_command, …)
+        utils.py                   shared adb helpers
-        conftest.py                shared pytest fixtures (driver)
+        conftest.py                Appium pytest fixtures
-        test_bench_posix.py        bench + cli tests (<<MODEL_URL>> substituted)
+        test_bench_posix.py        bench + cli tests (for --test bench or all)
-          AND/OR
+        test_backend_ops_posix.py  test-backend-ops on HTP0
        test_backend_ops_posix.py  test-backend-ops -b HTP0
      requirements.txt
      pytest.ini                   addopts = --junitxml=results.xml
    """
-    shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
+    bundle_dir = stage_dir / "llama_cpp_bundle"
    shutil.copytree(pkg_dir, bundle_dir)
    # Download upstream adb scripts so they land at /qdc/appium/ on the QDC
    # runner. They wrap `adb shell` internally. Patch in `chmod +x bin/* lib/*`
    # right after `cd $basedir` so device binaries are executable.
    for name in _ADB_SCRIPT_NAMES:
        url = f"{_UPSTREAM_ADB_SCRIPTS}/{name}"
        dest = stage_dir / name
        log.info("Downloading %s", url)
        urllib.request.urlretrieve(url, str(dest))
        content = dest.read_text()
        content = content.replace(
            "cd $basedir;",
            "cd $basedir; chmod +x bin/* lib/* 2>/dev/null;",
        )
        dest.write_text(content)
        dest.chmod(0o755)
    tests_dir = stage_dir / "tests"
    tests_dir.mkdir()
-    shutil.copy(_UTILS,    tests_dir / "utils.py")
+    shutil.copy(_UTILS, tests_dir / "utils.py")
    shutil.copy(_CONFTEST, tests_dir / "conftest.py")
    if test_mode in ("bench", "all"):
-        assert model_url is not None, "--model-url is required for bench/all test modes"
+        assert model_url is not None
        (tests_dir / "test_bench_posix.py").write_text(
            _RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
        )
@@ -109,33 +190,140 @@ def build_artifact_zip(
        shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
    shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
-    (stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
+    (stage_dir / "pytest.ini").write_text(
        "[pytest]\naddopts = --junitxml=results.xml\n"
    )
    zip_base = str(stage_dir / "artifact")
    shutil.make_archive(zip_base, "zip", stage_dir)
    return Path(f"{zip_base}.zip")
 def _build_linux_artifact(
    pkg_dir: Path,
    stage_dir: Path,
    test_mode: str,
    model_url: str | None,
 ) -> Path:
    """Linux IoT zip (BASH framework). Extracted by QDC to /data/local/tmp/TestContent/.
    Zip structure:
      run_linux.sh               entry script (placeholder-substituted, LF line endings)
      llama_cpp_bundle/          installed package
    """
    bundle_dir = stage_dir / "llama_cpp_bundle"
    shutil.copytree(pkg_dir, bundle_dir)
    template = _RUN_LINUX_TEMPLATE.read_text(encoding="utf-8")
    rendered = template.replace("{MODEL_URL}", model_url or "").replace(
        "{TEST_MODE}", test_mode
    )
    script_path = stage_dir / "run_linux.sh"
    _write_lf(script_path, rendered)
    script_path.chmod(0o755)
    zip_base = str(stage_dir / "artifact")
    shutil.make_archive(zip_base, "zip", stage_dir)
    return Path(f"{zip_base}.zip")
 # =============================================================================
 # Platform enum + strategy table
 # =============================================================================
 class Platform(enum.Enum):
    ANDROID = "android"
    LINUX = "linux"
@dataclass(frozen=True)
 class PlatformSpec:
    test_framework: TestFramework
    entry_script: str | None
    build_artifact: Callable[[Path, Path, str, str | None], Path]
    job_name_fmt: str
 PLATFORM_SPECS: dict[Platform, PlatformSpec] = {
    Platform.ANDROID: PlatformSpec(
        test_framework=TestFramework.APPIUM,
        entry_script=None,
        build_artifact=_build_android_artifact,
        job_name_fmt="{base}",
    ),
    Platform.LINUX: PlatformSpec(
        test_framework=TestFramework.BASH,
        entry_script=_LINUX_ENTRY_SCRIPT,
        build_artifact=_build_linux_artifact,
        job_name_fmt="{base} (Linux)",
    ),
 }
 DEVICE_PLATFORM: dict[str, Platform] = {
    "SM8750": Platform.ANDROID,
    "SM8650": Platform.ANDROID,
    "SM8850": Platform.ANDROID,
    "QCS9075M": Platform.LINUX,
 }
 # =============================================================================
 # Shared QDC job plumbing
 # =============================================================================
 def wait_for_job(client, job_id: str, timeout: int) -> str:
    elapsed = 0
    last_state = None
    consecutive_errors = 0
    max_consecutive_errors = 5
    while elapsed < timeout:
-        raw = qdc_api.get_job_status(client, job_id)
+        try:
            raw = qdc_api.get_job_status(client, job_id)
            consecutive_errors = 0
        except Exception as e:
            consecutive_errors += 1
            log.warning(
                "Transient error polling job %s (%d/%d): %s",
                job_id,
                consecutive_errors,
                max_consecutive_errors,
                e,
            )
            if consecutive_errors >= max_consecutive_errors:
                raise
            time.sleep(POLL_INTERVAL)
            elapsed += POLL_INTERVAL
            continue
        try:
            status = JobState(raw)
        except ValueError:
            status = raw
        if status in TERMINAL_STATES:
            return raw.lower()
-        log.info("Job %s: %s", job_id, raw)
+        if raw != last_state:
            log.info("Job %s: %s", job_id, raw)
            last_state = raw
        time.sleep(POLL_INTERVAL)
        elapsed += POLL_INTERVAL
    # Abort to free the QDC concurrency slot instead of leaking it.
    try:
        qdc_api.abort_job(client, job_id)
        log.warning("Aborted job %s after timeout to free concurrency slot", job_id)
    except Exception as e:
        log.warning("Failed to abort job %s: %s", job_id, e)
    raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
 def wait_for_log_upload(client, job_id: str) -> None:
    elapsed = 0
    while elapsed <= LOG_UPLOAD_TIMEOUT:
-        status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
+        try:
            status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
        except Exception as e:
            log.warning("get_job_log_upload_status failed: %s — will retry", e)
            status = ""
        if status in {"completed", "failed"}:
            return
        log.info("Waiting for log upload (status=%s) ...", status)
@@ -150,17 +338,33 @@ def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
    while elapsed < CAPACITY_TIMEOUT:
        jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
        if jobs_page is None:
-            log.warning("Could not retrieve job list; proceeding without capacity check")
+            log.warning(
                "Could not retrieve job list; proceeding without capacity check"
            )
            return
        items = getattr(jobs_page, "data", []) or []
-        active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
+        active = sum(
            1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES
        )
        if active < max_jobs:
            log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
            return
-        log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
+        log.info(
            "Active QDC jobs: %d / %d — waiting %ds ...",
            active,
            max_jobs,
            CAPACITY_POLL,
        )
        time.sleep(CAPACITY_POLL)
        elapsed += CAPACITY_POLL
-    log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
+    raise TimeoutError(
        f"Capacity wait timed out after {CAPACITY_TIMEOUT}s"
    )
 # ---------------------------------------------------------------------------
 # Log parsing helpers
 # ---------------------------------------------------------------------------
 def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
@@ -192,10 +396,26 @@ def _parse_pytest_output(content: str) -> dict[str, bool]:
 def fetch_logs_and_parse_tests(
-    client, job_id: str
+    client, job_id: str, max_retries: int = 5, retry_delay: int = 30
 ) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
    """Returns (test_results, raw_logs, failure_details)."""
-    log_files = qdc_api.get_job_log_files(client, job_id)
+    log_files = None
    for attempt in range(1, max_retries + 1):
        try:
            log_files = qdc_api.get_job_log_files(client, job_id)
            break
        except Exception as e:
            if attempt < max_retries:
                log.warning(
                    "get_job_log_files failed (attempt %d/%d): %s — retrying in %ds",
                    attempt, max_retries, e, retry_delay,
                )
                time.sleep(retry_delay)
            else:
                log.error(
                    "get_job_log_files failed after %d attempts: %s", max_retries, e
                )
                return {}, {}, {}
    if not log_files:
        log.warning("No log files returned for job %s", job_id)
        return {}, {}, {}
@@ -207,8 +427,8 @@ def fetch_logs_and_parse_tests(
    with tempfile.TemporaryDirectory() as tmpdir:
        for lf in log_files:
            log.info("Downloading log file: %s", lf.filename)
            zip_path = os.path.join(tmpdir, "log.zip")
            log.info("Downloading log file: %s", lf.filename)
            qdc_api.download_job_log_files(client, lf.filename, zip_path)
            try:
                shutil.unpack_archive(zip_path, tmpdir, "zip")
@@ -226,12 +446,15 @@ def fetch_logs_and_parse_tests(
                elif fname.endswith(".log"):
                    if fname in _EXCLUDED_LOGS:
                        continue
-                    log.info("--- %s ---", fname)
+                    log.info("--- %s ---\n%s", fname, content)
                    log.info("%s", content)
                    raw_logs[fname] = content
                    pytest_fallback.update(_parse_pytest_output(content))
-    return (test_results if test_results else pytest_fallback), raw_logs, failure_details
+    return (
        (test_results if test_results else pytest_fallback),
        raw_logs,
        failure_details,
    )
 def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
@@ -289,30 +512,106 @@ def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
        f.write("\n".join(lines) + "\n")
 # =============================================================================
 # CLI + main
 # =============================================================================
 def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
-    p.add_argument("--pkg-dir",   required=True, type=Path,
+    p.add_argument("--pkg-dir", required=True, type=Path,
                   help="Installed llama.cpp package directory (contains bin/ and lib/)")
    p.add_argument("--model-url",
                   help="Direct URL to the GGUF model file (required for --test bench)")
-    p.add_argument("--device",    required=True,
+    p.add_argument("--device", required=True,
                   help="QDC chipset name, e.g. SM8750")
    p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
                   help="Test suite to run (default: bench)")
    p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
                   help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
    p.add_argument("--retries", type=int, default=DEFAULT_RETRIES, metavar="N",
                   help="Number of retries when device is unavailable (default: 0)")
    p.add_argument("--retry-delay", type=int, default=RETRY_DELAY, metavar="SECONDS",
                   help=f"Seconds to wait between retries (default: {RETRY_DELAY})")
    args = p.parse_args()
    if args.test in ("bench", "all") and not args.model_url:
        p.error("--model-url is required when --test bench or --test all")
    return args
 def _submit_and_run_job(client, args, spec, target_id, artifact_id) -> JobResult:
    """Submit a QDC job and wait for results.
    Raises DeviceUnavailableError for transient device/resource issues that
    are worth retrying. Returns JobResult for definitive outcomes (pass or
    test failure).
    """
    try:
        wait_for_capacity(client)
    except TimeoutError:
        raise DeviceUnavailableError("Capacity wait timed out — device busy")
    job_name = spec.job_name_fmt.format(base="llama.cpp Hexagon tests")
    job_id = qdc_api.submit_job(
        public_api_client=client,
        target_id=target_id,
        job_name=job_name,
        external_job_id=None,
        job_type=JobType.AUTOMATED,
        job_mode=JobMode.APPLICATION,
        timeout=max(1, args.job_timeout // 60),
        test_framework=spec.test_framework,
        entry_script=spec.entry_script,
        job_artifacts=[artifact_id],
        monkey_events=None,
        monkey_session_timeout=None,
        job_parameters=[JobSubmissionParameter.WIFIENABLED],
    )
    if job_id is None:
        raise DeviceUnavailableError("Job submission failed — device may be unavailable")
    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
    try:
        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
    except TimeoutError as e:
        raise DeviceUnavailableError(str(e))
    log.info("Job %s finished: %s", job_id, job_status)
    wait_for_log_upload(client, job_id)
    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
    job_ok = job_status == JobState.COMPLETED.value.lower()
    if not job_ok and not tests:
        raise DeviceUnavailableError(
            f"Job did not complete (status={job_status}) and produced no test results"
        )
    passed = job_ok and all(tests.values()) if tests else job_ok
    if spec.test_framework == TestFramework.BASH and not tests:
        log.error("No test results recovered (state=%s). Script likely never ran.", job_status)
        passed = False
    if not passed:
        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
    return JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
 def main() -> int:
    args = parse_args()
    platform = DEVICE_PLATFORM.get(args.device)
    if platform is None:
        log.error(
            "Unknown device %r. Known: %s",
            args.device, ", ".join(sorted(DEVICE_PLATFORM.keys())),
        )
        return 1
    spec = PLATFORM_SPECS[platform]
    api_key = os.environ.get("QDC_API_KEY")
    if not api_key:
        log.error("QDC_API_KEY environment variable must be set")
@@ -334,10 +633,9 @@ def main() -> int:
        return 1
    with tempfile.TemporaryDirectory() as tmpdir:
-        log.info("Building artifact ...")
+        log.info("Building %s artifact (test=%s) ...", platform.value, args.test)
-        zip_path = build_artifact_zip(
+        zip_path = spec.build_artifact(
-            args.pkg_dir, Path(tmpdir),
+            args.pkg_dir, Path(tmpdir), args.test, args.model_url
            test_mode=args.test, model_url=args.model_url,
        )
        log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
        artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
@@ -346,46 +644,31 @@ def main() -> int:
        log.error("Artifact upload failed")
        return 1
-    wait_for_capacity(client)
+    max_attempts = 1 + args.retries
-
+    for attempt in range(1, max_attempts + 1):
-    job_id = qdc_api.submit_job(
+        try:
-        public_api_client=client,
+            result = _submit_and_run_job(client, args, spec, target_id, artifact_id)
-        target_id=target_id,
+            break
-        job_name="llama.cpp Hexagon tests",
+        except DeviceUnavailableError as e:
-        external_job_id=None,
+            if attempt < max_attempts:
-        job_type=JobType.AUTOMATED,
+                log.warning(
-        job_mode=JobMode.APPLICATION,
+                    "Attempt %d/%d failed (device unavailable): %s — retrying in %ds",
-        timeout=max(1, args.job_timeout // 60),
+                    attempt, max_attempts, e, args.retry_delay,
-        test_framework=TestFramework.APPIUM,
+                )
-        entry_script=None,
+                time.sleep(args.retry_delay)
-        job_artifacts=[artifact_id],
+            else:
-        monkey_events=None,
+                log.error(
-        monkey_session_timeout=None,
+                    "Attempt %d/%d failed (device unavailable): %s — no retries left",
-        job_parameters=[JobSubmissionParameter.WIFIENABLED],
+                    attempt, max_attempts, e,
-    )
+                )
-    if job_id is None:
+                write_summary(
-        log.error("Job submission failed")
+                    JobResult(passed=False, tests={}),
                    title=f"QDC Device Unavailable ({args.device})",
                )
                return 1
    else:
        return 1
    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
    try:
        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
    except TimeoutError as e:
        log.error("%s", e)
        write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
        return 1
    log.info("Job %s finished: %s", job_id, job_status)
    wait_for_log_upload(client, job_id)
    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
    passed = job_status == JobState.COMPLETED.value.lower()
    if tests:
        passed = passed and all(tests.values())
    if not passed:
        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
    result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
    if args.test == "backend-ops":
        title = f"Backend Ops — HTP0 ({args.device})"
    elif args.test == "all":
@@ -394,7 +677,7 @@ def main() -> int:
        title = f"QDC Test Results ({args.device})"
    write_summary(result, title=title)
-    return 0 if passed else 1
+    return 0 if result.passed else 1
 if __name__ == "__main__":
@@ -0,0 +1,232 @@
 #!/bin/bash
 # llama.cpp Hexagon test entry script for QDC Linux IoT (BASH framework).
 #
 # Placeholders substituted by run_qdc_jobs.py (--platform linux) before upload:
 #   {MODEL_URL}   direct URL to a .gguf model file
 #   {TEST_MODE}   bench | backend-ops | all
 #
 # QDC extracts the artifact zip to /data/local/tmp/TestContent/ and invokes
 # this script via: /bin/bash /data/local/tmp/TestContent/run_linux.sh
 # Any files written under /data/local/tmp/QDC_logs/ are auto-uploaded.
 set +e
 umask 022
 LOG_DIR=/data/local/tmp/QDC_logs
 BUNDLE_DIR=/data/local/tmp/TestContent/llama_cpp_bundle
 MODEL_DIR=/data/local/tmp/gguf
 MODEL_PATH="$MODEL_DIR/model.gguf"
 RESULTS_XML="$LOG_DIR/results.xml"
 mkdir -p "$LOG_DIR" "$MODEL_DIR"
 # Redirect all parent-shell output to script.log so QDC auto-uploads it;
 # per-case runs still capture their own stdout/stderr into dedicated logs.
 exec > "$LOG_DIR/script.log" 2>&1
 echo "=== env ==="
 date -u
 uname -a
 pwd
 mount -o rw,remount / 2>/dev/null || true
 cd "$BUNDLE_DIR" || { echo "FATAL: bundle missing at $BUNDLE_DIR"; exit 1; }
 chmod +x bin/* 2>/dev/null
 export LD_LIBRARY_PATH="$BUNDLE_DIR/lib:$LD_LIBRARY_PATH"
 export ADSP_LIBRARY_PATH="$BUNDLE_DIR/lib"
 export GGML_HEXAGON_EXPERIMENTAL=1
 echo "=== download model ==="
 MODEL_URL="{MODEL_URL}"
 if [ -z "$MODEL_URL" ]; then
  echo "No model URL provided, skipping download"
 elif [ ! -f "$MODEL_PATH" ]; then
  curl -L -fS --retry 3 --retry-delay 5 -o "$MODEL_PATH" "$MODEL_URL"
  curl_rc=$?
  if [ $curl_rc -ne 0 ]; then
    echo "FATAL: model download failed (rc=$curl_rc)"
    exit 1
  fi
  ls -la "$MODEL_PATH"
 fi
 # ---------------------------------------------------------------------------
 # JUnit XML helpers
 # ---------------------------------------------------------------------------
 xml_open() {
  printf '%s\n' \
    '<?xml version="1.0" encoding="utf-8"?>' \
    "<testsuites>" \
    "<testsuite name=\"llama_cpp_linux\">" \
    > "$RESULTS_XML"
 }
 xml_close() {
  printf '%s\n' '</testsuite>' '</testsuites>' >> "$RESULTS_XML"
 }
 xml_case_pass() {
  local classname=$1 name=$2
  printf '<testcase classname="%s" name="%s"/>\n' "$classname" "$name" >> "$RESULTS_XML"
 }
 xml_case_fail() {
  local classname=$1 name=$2 rc=$3 logfile=$4
  {
    printf '<testcase classname="%s" name="%s">\n' "$classname" "$name"
    printf '<failure message="exit %s"><![CDATA[\n' "$rc"
    tail -c 4096 "$logfile" 2>/dev/null | sed 's/]]>/]] >/g'
    printf '\n]]></failure>\n</testcase>\n'
  } >> "$RESULTS_XML"
 }
 # Map backend name -> "NDEV --device" pair. "none" means no offload (CPU).
 backend_env() {
  case "$1" in
    cpu) echo "0 none" ;;
    gpu) echo "0 GPUOpenCL" ;;
    npu) echo "1 HTP0" ;;
  esac
 }
 backend_log_name() {
  case "$1" in
    cpu) echo "cpu" ;;
    gpu) echo "gpu" ;;
    npu) echo "htp" ;;
  esac
 }
 backend_device_name() {
  case "$1" in
    cpu) echo "none" ;;
    gpu) echo "GPUOpenCL" ;;
    npu) echo "HTP0" ;;
  esac
 }
 # Append a diagnostic block when a per-case `timeout N` fires (rc=124). The
 # naked log file at that point usually just ends mid-OpenCL-init with no
 # stderr, which is hard to read in CI summaries.
 note_timeout_if_triggered() {
  local rc=$1 budget=$2 log=$3
  [ "$rc" -eq 124 ] || return 0
  {
    printf '\n'
    printf '=== TIMEOUT after %ss ===\n' "$budget"
    printf 'uptime: '; uptime 2>/dev/null
    printf 'free -m:\n'; free -m 2>/dev/null
    printf 'loadavg: '; cat /proc/loadavg 2>/dev/null
  } >> "$log"
 }
 completion_extra_args() {
  case "$1" in
    cpu) echo "--device none --ctx-size 128 -no-cnv -n 32 --seed 42 --batch-size 128" ;;
    gpu) echo "--device GPUOpenCL --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 512" ;;
    npu) echo "--device HTP0 --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 1024" ;;
  esac
 }
 run_completion_case() {
  local name=$1
  local parts=($(backend_env "$name"))
  local ndev=${parts[0]} device=${parts[1]}
  local device_log_name=$(backend_device_name "$name")
  local log="$LOG_DIR/llama_completion_${device_log_name}.log"
  local prompt="$LOG_DIR/bench_prompt.txt"
  echo 'What is the capital of France?' > "$prompt"
  local extra
  extra=$(completion_extra_args "$name")
  echo "=== [completion:$name] llama-completion --device $device (NDEV=$ndev) ==="
  timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-completion \
      -m "$MODEL_PATH" \
      -f "$prompt" \
      $extra \
      > "$log" 2>&1 < /dev/null
  local rc=$?
  note_timeout_if_triggered "$rc" 600 "$log"
  if [ $rc -eq 0 ]; then
    xml_case_pass "tests.test_bench_posix" "test_llama_completion[$name]"
  else
    xml_case_fail "tests.test_bench_posix" "test_llama_completion[$name]" "$rc" "$log"
  fi
 }
 run_bench_case() {
  local name=$1
  local parts=($(backend_env "$name"))
  local ndev=${parts[0]} device=${parts[1]}
  local log_suffix=$(backend_log_name "$name")
  local log="$LOG_DIR/llama_bench_${log_suffix}.log"
  echo "=== [bench:$name] llama-bench --device $device (NDEV=$ndev) ==="
  timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-bench \
      -m "$MODEL_PATH" \
      --device "$device" \
      -ngl 99 \
      --batch-size 128 \
      -t 4 \
      -p 128 \
      -n 32 \
      > "$log" 2>&1
  local rc=$?
  note_timeout_if_triggered "$rc" 600 "$log"
  if [ $rc -eq 0 ]; then
    xml_case_pass "tests.test_bench_posix" "test_llama_bench[$name]"
  else
    xml_case_fail "tests.test_bench_posix" "test_llama_bench[$name]" "$rc" "$log"
  fi
 }
 run_backend_ops_case() {
  local dtype=$1
  local log="$LOG_DIR/backend_ops_${dtype}.log"
  local pattern
  case "$dtype" in
    q4_0)
      # Matches Android: exclude a known-broken shape on NPU.
      pattern='^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
      ;;
    *)
      pattern="type_a=${dtype}"
      ;;
  esac
  echo "=== [backend-ops:$dtype] test-backend-ops -b HTP0 -o MUL_MAT ==="
  timeout 600 env GGML_HEXAGON_NDEV=1 GGML_HEXAGON_HOSTBUF=0 ./bin/test-backend-ops \
      -b HTP0 -o MUL_MAT -p "$pattern" \
      > "$log" 2>&1
  local rc=$?
  note_timeout_if_triggered "$rc" 600 "$log"
  if [ $rc -eq 0 ]; then
    xml_case_pass "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]"
  else
    xml_case_fail "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]" "$rc" "$log"
  fi
 }
 xml_open
 case "{TEST_MODE}" in
  bench)
    for b in cpu gpu npu; do run_completion_case "$b"; done
    for b in cpu gpu npu; do run_bench_case "$b"; done
    ;;
  backend-ops)
    for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
    ;;
  all)
    for b in cpu gpu npu; do run_completion_case "$b"; done
    for b in cpu gpu npu; do run_bench_case "$b"; done
    for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
    ;;
  *)
    echo "FATAL: unsupported TEST_MODE={TEST_MODE}"
    ;;
 esac
 xml_close
 echo "=== done ==="
 # Host parses results.xml to decide pass/fail.
 exit 0
@@ -1,8 +1,9 @@
 """
 On-device test-backend-ops runner for llama.cpp (HTP0 backend).
-Executed by QDC's Appium test framework on the QDC runner.
+On Android: executed by QDC's Appium test framework on the QDC runner.
 The runner has ADB access to the allocated device.
 On Linux: runs test-backend-ops directly via run_linux.sh (BASH framework).
 """
 import os
@@ -10,7 +11,12 @@ import sys
 import pytest
-from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+from utils import (
    BIN_PATH,
    push_bundle_if_needed,
    run_script,
    write_qdc_log,
 )
@pytest.fixture(scope="session", autouse=True)
@@ -20,17 +26,21 @@ def install(driver):
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
 def test_backend_ops_htp0(type_a):
    cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
    if type_a == "q4_0":
-        cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
+        pattern = r'^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
    else:
-        cmd += f" -p type_a={type_a}"
+        pattern = f"type_a={type_a}"
-    result = run_adb_command(
+
-        cmd,
+    quoted_pattern = f'"{pattern}"' if type_a == "q4_0" else pattern
-        check=False,
+    result = run_script(
        "run-tool.sh",
        extra_env={"HB": "0"},
        extra_args=["test-backend-ops", "-b", "HTP0", "-o", "MUL_MAT", "-p", quoted_pattern],
    )
    write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
-    assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
+    assert result.returncode == 0, (
        f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
    )
 if __name__ == "__main__":
@@ -1,11 +1,13 @@
 """
 On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
-Executed by QDC's Appium test framework on the QDC runner.
+On Android: calls upstream run-*.sh scripts from llama.cpp/scripts/snapdragon/adb/
-The runner has ADB access to the allocated device.
+on the QDC runner host (scripts wrap commands in ``adb shell`` internally).
 On Linux: runs llama-bench directly via run_linux.sh (BASH framework).
 Placeholders replaced at artifact creation time by run_qdc_jobs.py:
-  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
+  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device)
 """
 import os
@@ -14,58 +16,75 @@ import sys
 import pytest
-from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+from utils import (
    BIN_PATH,
    MODEL_DEVICE_PATH,
    MODEL_NAME,
    PROMPT_DIR,
    push_bundle_if_needed,
    run_adb_command,
    run_script,
    write_qdc_log,
 )
-MODEL_PATH = "/data/local/tmp/model.gguf"
+MODEL_URL = "<<MODEL_URL>>"
 PROMPT     = "What is the capital of France?"
 CLI_OPTS   = "--batch-size 128 -n 128 -no-cnv --seed 42"
@pytest.fixture(scope="session", autouse=True)
 def install(driver):
    push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
-
+    run_adb_command(f"mkdir -p /data/local/tmp/gguf {PROMPT_DIR}")
-    # Skip model download if already present
+    run_adb_command(f"echo 'What is the capital of France?' > {PROMPT_DIR}/bench_prompt.txt")
    check = subprocess.run(
-        ["adb", "shell", f"ls {MODEL_PATH}"],
+        ["adb", "shell", f"ls {MODEL_DEVICE_PATH}"],
        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    )
    if check.returncode != 0:
-        run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
+        run_adb_command(f'curl -L -J --output {MODEL_DEVICE_PATH} "{MODEL_URL}"')
-@pytest.mark.parametrize("device,extra_flags", [
+@pytest.mark.parametrize(
-    pytest.param("none",      "-ctk q8_0 -ctv q8_0", id="cpu"),
+    "device",
-    pytest.param("GPUOpenCL", "",                     id="gpu"),
+    [
-    pytest.param("HTP0",      "-ctk q8_0 -ctv q8_0", id="npu"),
+        pytest.param("none", id="cpu"),
-])
+        pytest.param("GPUOpenCL", id="gpu"),
-def test_llama_completion(device, extra_flags):
+        pytest.param("HTP0", id="npu"),
-    result = run_adb_command(
+    ],
-        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
+)
-        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
+def test_llama_completion(device):
-        f' -p "{PROMPT}"',
+    result = run_script(
-        check=False,
+        "run-completion.sh",
        extra_env={"D": device, "M": MODEL_NAME},
        extra_args=["--batch-size", "128", "-n", "128", "--seed", "42",
                    "-f", f"{PROMPT_DIR}/bench_prompt.txt"],
    )
    write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
-    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
+    assert result.returncode == 0, (
        f"llama-completion {device} failed (exit {result.returncode})"
    )
 _DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
-@pytest.mark.parametrize("device", [
+@pytest.mark.parametrize(
-    pytest.param("none",      id="cpu"),
+    "device",
-    pytest.param("GPUOpenCL", id="gpu"),
+    [
-    pytest.param("HTP0",      id="npu"),
+        pytest.param("none", id="cpu"),
-])
+        pytest.param("GPUOpenCL", id="gpu"),
        pytest.param("HTP0", id="npu"),
    ],
 )
 def test_llama_bench(device):
-    result = run_adb_command(
+    result = run_script(
-        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
+        "run-bench.sh",
-        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
+        extra_env={"D": device, "M": MODEL_NAME},
-        check=False,
+        extra_args=["--batch-size", "128", "-p", "128", "-n", "32"],
    )
    write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
-    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
+    assert result.returncode == 0, (
        f"llama-bench {device} failed (exit {result.returncode})"
    )
 if __name__ == "__main__":
@@ -1,5 +1,7 @@
 """Shared helpers for QDC on-device test runners."""
 from __future__ import annotations
 import logging
 import os
 import subprocess
@@ -13,16 +15,14 @@ log = logging.getLogger(__name__)
 # On-device paths
 # ---------------------------------------------------------------------------
-BUNDLE_PATH  = "/data/local/tmp/llama_cpp_bundle"
+BUNDLE_PATH = "/data/local/tmp/llama.cpp"
 BIN_PATH = f"{BUNDLE_PATH}/bin"
 LIB_PATH = f"{BUNDLE_PATH}/lib"
 QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
-LIB_PATH    = f"{BUNDLE_PATH}/lib"
+SCRIPTS_DIR = "/qdc/appium"
-BIN_PATH    = f"{BUNDLE_PATH}/bin"
+MODEL_NAME = "model.gguf"
-ENV_PREFIX  = (
+MODEL_DEVICE_PATH = "/data/local/tmp/gguf/model.gguf"
-    f"export LD_LIBRARY_PATH={LIB_PATH} && "
+PROMPT_DIR = "/data/local/tmp/scorecard_prompts"
    f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
    f"chmod +x {BIN_PATH}/* &&"
 )
 CMD_PREFIX  = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
 # ---------------------------------------------------------------------------
 # Appium session options
@@ -34,16 +34,47 @@ options.set_capability("platformName", "Android")
 options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
 # ---------------------------------------------------------------------------
-# ADB helpers
+# Shell / process helpers
 # ---------------------------------------------------------------------------
 def write_qdc_log(filename: str, content: str) -> None:
    """Write content as a log file for QDC log collection."""
    subprocess.run(
        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
        f.write(content)
        tmp_path = f.name
    try:
        subprocess.run(
            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
        )
    finally:
        os.unlink(tmp_path)
 def ensure_bundle(check_binary: str | None = None) -> None:
    """Ensure the llama_cpp_bundle is available on the target device."""
    push_bundle_if_needed(check_binary or f"{BIN_PATH}/llama-cli")
 # ---------------------------------------------------------------------------
 # Android / Linux host helpers
 # ---------------------------------------------------------------------------
 def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
-    # Append exit-code sentinel because `adb shell` doesn't reliably propagate
+    """Run a command on-device via ``adb shell`` with exit-code sentinel."""
    # the on-device exit code (older ADB versions always return 0).
    raw = subprocess.run(
        ["adb", "shell", f"{cmd}; echo __RC__:$?"],
-        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    stdout = raw.stdout
    returncode = raw.returncode
@@ -55,39 +86,58 @@ def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProc
                stdout = "\n".join(lines[:-1]) + "\n"
            except ValueError:
                pass
-    log.info("%s", stdout)
+    log.info(stdout)
    result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
    if check:
        assert returncode == 0, f"Command failed (exit {returncode})"
    return result
-def write_qdc_log(filename: str, content: str) -> None:
+def run_script(
-    """Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
+    script: str,
-    subprocess.run(
+    extra_env: dict[str, str] | None = None,
-        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
+    extra_args: list[str] | None = None,
-        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+) -> subprocess.CompletedProcess:
    """Run an upstream shell script from /qdc/appium/ on the QDC runner host."""
    env = os.environ.copy()
    env["GGML_HEXAGON_EXPERIMENTAL"] = "1"
    if extra_env:
        env.update(extra_env)
    cmd = [f"{SCRIPTS_DIR}/{script}"] + (extra_args or [])
    result = subprocess.run(
        cmd, env=env,
        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    )
    log.info(result.stdout)
    return result
 def adb_shell(cmd: str) -> None:
    """Run a command via adb shell (fire-and-forget, no error check)."""
    subprocess.run(
        ["adb", "shell", "sh", "-c", cmd],
        capture_output=True, encoding="utf-8", errors="replace", check=False,
    )
    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
        f.write(content)
        tmp_path = f.name
    try:
        subprocess.run(
            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        )
    finally:
        os.unlink(tmp_path)
 def push_bundle_if_needed(check_binary: str) -> None:
    """Push llama_cpp_bundle to the device if check_binary is not already present."""
    result = subprocess.run(
        ["adb", "shell", f"ls {check_binary}"],
-        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    if result.returncode != 0:
        subprocess.run(
-            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
+            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", BUNDLE_PATH],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            text=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
        )
        subprocess.run(
            ["adb", "shell", f"find {BUNDLE_PATH}/bin -type f -exec chmod 755 {{}} +"],
            text=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
        )
@@ -13,7 +13,7 @@ exclude = [
 [[overrides]]
 include = [
    "./tools/server/tests/**",
-    "./scripts/snapdragon/qdc/tests/**",
+    "./scripts/snapdragon/qdc/**",
 ]
 [overrides.rules]