Merge pull request 'feat: structured artifact passthrough for agent results' (#237) from feat/structured-artifact-passthrough into main

2026-03-12 23:14:28 +01:00
parent aae98895d2 abb61a4248
commit 6f2ed9a296
29 changed files with 966 additions and 198 deletions
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ All services communicate via gRPC (Protocol Buffers). Internal services run in D

 Ollama runs on the bare metal host (not in Docker) because GPU drivers — particularly ROCm for AMD and CUDA for NVIDIA — require direct hardware access that Docker GPU passthrough complicates.

-The internal Docker network (`llm-internal`) is declared `internal: true`, which means containers on this network cannot reach the internet directly. Only Caddy (on the edge network) and SearXNG (which has its own upstream configuration) can make external connections.
+The internal Docker network (`llm-internal`) is declared `internal: true`, which means containers on this network cannot reach the internet directly. Caddy (on the edge network), SearXNG, and the Search Service are also attached to `llm-external` so they can reach the internet for web searches and page content extraction.

 ## 3. Hardware Requirements

@@ -152,7 +152,7 @@ No values are strictly required — all services start with sensible defaults. H
 |-------|---------|-------------|
 | `decomposer.max_subtasks` | 10 | Maximum subtasks per request |
 | `dispatcher.max_concurrent_subtasks` | 5 | Parallel agent limit |
-| `dispatcher.subtask_timeout_seconds` | 120 | Per-subtask timeout |
+| `dispatcher.subtask_timeout_seconds` | 300 | Per-subtask timeout |
 | `dispatcher.overall_timeout_seconds` | 600 | Total dispatch timeout |
 | `compaction.threshold_ratio` | 0.6 | Compact at 60% of context window |
 | `memory_gating.confidence_threshold` | 0.7 | Minimum confidence to persist memory |
--- a/docker/caddy/Caddyfile
+++ b/docker/caddy/Caddyfile
@@ -21,6 +21,7 @@
 			versions h2c
 		}
 		header_up te trailers
+	}

 	log {
 		output stdout
--- a/docker/config/orchestrator.yaml
+++ b/docker/config/orchestrator.yaml
@@ -1,6 +1,7 @@
 # Orchestrator configuration for Docker deployment
 host: "0.0.0.0"
 port: 50058
+grpc_web_port: 8080
 model_gateway_addr: "model-gateway:50055"
 tool_broker_addr: "tool-broker:50057"
 memory_addr: "memory:50054"
@@ -9,22 +10,22 @@ audit_addr: "audit:50052"

 researcher:
  max_iterations: 10
-  timeout_seconds: 120
+  timeout_seconds: 300
  max_tokens: 4096

 coder:
  max_iterations: 10
-  timeout_seconds: 120
+  timeout_seconds: 300
  max_tokens: 4096

 assistant:
  max_iterations: 10
-  timeout_seconds: 120
+  timeout_seconds: 300
  max_tokens: 4096

 sysadmin:
  max_iterations: 10
-  timeout_seconds: 120
+  timeout_seconds: 300
  max_tokens: 4096

 decomposer:
@@ -32,7 +33,7 @@ decomposer:
  max_subtasks: 10

 dispatcher:
-  subtask_timeout_seconds: 120.0
+  subtask_timeout_seconds: 300.0
  overall_timeout_seconds: 600.0
  max_concurrent_subtasks: 5

--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -8,39 +8,12 @@
 #   - Build images first: docker compose -f docker/docker-compose.yml build
 #
 # Networks:
-#   llm-edge: Caddy <-> Orchestrator only (edge-facing)
 #   llm-internal: All inter-service communication (internal: true, no external routing)
+#
+# The orchestrator exposes a gRPC-Web HTTP gateway on port 8080 for
+# direct browser access, eliminating the need for an edge proxy.

 services:
-  # ---------- Edge Proxy ----------
-
-  caddy:
-    image: caddy:2-alpine
-    container_name: llm-caddy
-    ports:
-      - "${HTTPS_PORT:-443}:443"
-      - "${HTTP_PORT:-80}:80"
-    volumes:
-      - ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
-      - caddy-data:/data
-      - caddy-config:/config
-    environment:
-      - DOMAIN=${DOMAIN:-localhost}
-      - TLS_MODE=${TLS_MODE:-internal}
-    networks:
-      - llm-edge
-      - llm-internal
-    restart: unless-stopped
-    depends_on:
-      orchestrator:
-        condition: service_started
-    healthcheck:
-      test: ["CMD", "wget", "--spider", "--quiet", "http://localhost:80/healthz"]
-      interval: 30s
-      timeout: 5s
-      retries: 3
-      start_period: 10s
-
  # ---------- Infrastructure ----------

  searxng:
@@ -53,6 +26,7 @@ services:
      - SEARXNG_SECRET=${SEARXNG_SECRET:-dev-secret-change-in-production}
    networks:
      - llm-internal
+      - llm-external
    restart: unless-stopped
    cap_drop:
      - ALL
@@ -145,7 +119,7 @@ services:
        target: /etc/llm-multiverse/model-gateway.toml
    networks:
      - llm-internal
-      - llm-edge  # Needs external access to reach Ollama on host
+      - llm-external  # Needs external access to reach Ollama on host
    restart: unless-stopped
    depends_on:
      audit:
@@ -186,6 +160,7 @@ services:
        SERVICE_DIR: tool-broker
        SERVICE_PACKAGE: tool-broker
        SERVICE_PORT: "50057"
+        EXTRA_RUNTIME_DEPS: "python3"
    container_name: llm-tool-broker
    volumes:
      - ./manifests:/etc/llm-multiverse/manifests:ro
@@ -223,6 +198,7 @@ services:
        target: /etc/llm-multiverse/search.yaml
    networks:
      - llm-internal
+      - llm-external
    restart: unless-stopped
    depends_on:
      searxng:
@@ -237,14 +213,16 @@ services:
      context: ..
      dockerfile: docker/python/orchestrator.Dockerfile
    container_name: llm-orchestrator
+    ports:
+      - "${GRPC_WEB_PORT:-8080}:8080"
    environment:
      - ORCHESTRATOR_CONFIG=/etc/llm-multiverse/orchestrator.yaml
    configs:
      - source: orchestrator-config
        target: /etc/llm-multiverse/orchestrator.yaml
    networks:
-      - llm-edge
      - llm-internal
+      - llm-external
    restart: unless-stopped
    depends_on:
      model-gateway:
@@ -283,15 +261,11 @@ volumes:
    driver: local
  memory-data:
    driver: local
-  caddy-data:
-    driver: local
-  caddy-config:
-    driver: local

 # ---------- Networks ----------

 networks:
-  llm-edge:
+  llm-external:
    driver: bridge
  llm-internal:
    driver: bridge
--- a/docker/manifests/coder.toml
+++ b/docker/manifests/coder.toml
@@ -2,7 +2,7 @@
 agent_type = "coder"
 agent_type_id = 3
 allowed_tools = ["fs_read", "fs_write", "run_code", "web_search", "memory_read"]
-path_allowlist = ["/home/**", "/tmp/**", "/workspace/**"]
+path_allowlist = ["/home/**", "/tmp", "/tmp/**", "/workspace", "/workspace/**"]
 can_spawn = []
 max_spawn_depth = 0

--- a/docker/rust/service.Dockerfile
+++ b/docker/rust/service.Dockerfile
@@ -80,6 +80,9 @@ RUN groupadd -r app && useradd -r -g app -d /nonexistent -s /usr/sbin/nologin ap
 # Pre-create /data owned by app for services that mount volumes there
 RUN mkdir -p /data && chown app:app /data

+# Pre-create /workspace owned by app for tool execution (code writing, run_code working_dir)
+RUN mkdir -p /workspace/output && chown -R app:app /workspace
+
 COPY --from=builder /app/target/release/${SERVICE_PACKAGE} /usr/local/bin/service

 USER app
--- a/gen/python/llm_multiverse/v1/common_pb2.py
+++ b/gen/python/llm_multiverse/v1/common_pb2.py
@@ -2,7 +2,7 @@
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # NO CHECKED-IN PROTOBUF GENCODE
 # source: llm_multiverse/v1/common.proto
-# Protobuf Python Version: 7.34.0
+# Protobuf Python Version: 6.31.1
 """Generated protocol buffer code."""
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
@@ -11,9 +11,9 @@ from google.protobuf import symbol_database as _symbol_database
 from google.protobuf.internal import builder as _builder
 _runtime_version.ValidateProtobufRuntimeVersion(
    _runtime_version.Domain.PUBLIC,
-    7,
-    34,
-    0,
+    6,
+    31,
+    1,
    '',
    'llm_multiverse/v1/common.proto'
 )
@@ -25,40 +25,47 @@ _sym_db = _symbol_database.Default()
 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2


-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1ellm_multiverse/v1/common.proto\x12\x11llm_multiverse.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\x8a\x01\n\x0f\x41gentIdentifier\x12\x19\n\x08\x61gent_id\x18\x01 \x01(\tR\x07\x61gentId\x12;\n\nagent_type\x18\x02 \x01(\x0e\x32\x1c.llm_multiverse.v1.AgentTypeR\tagentType\x12\x1f\n\x0bspawn_depth\x18\x03 \x01(\rR\nspawnDepth\"J\n\x0c\x41gentLineage\x12:\n\x06\x61gents\x18\x01 \x03(\x0b\x32\".llm_multiverse.v1.AgentIdentifierR\x06\x61gents\"\x92\x02\n\x0eSessionContext\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x17\n\x07user_id\x18\x02 \x01(\tR\x06userId\x12\x44\n\ragent_lineage\x18\x03 \x01(\x0b\x32\x1f.llm_multiverse.v1.AgentLineageR\x0c\x61gentLineage\x12G\n\x0eoverride_level\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.OverrideLevelR\roverrideLevel\x12\x39\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\"\xc2\x01\n\x0b\x45rrorDetail\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12H\n\x08metadata\x18\x03 \x03(\x0b\x32,.llm_multiverse.v1.ErrorDetail.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x84\x01\n\x0fMemoryCandidate\x12\x18\n\x07\x63ontent\x18\x01 \x01(\tR\x07\x63ontent\x12\x37\n\x06source\x18\x02 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSourceR\x06source\x12\x1e\n\nconfidence\x18\x03 \x01(\x02R\nconfidence\"\x9a\x03\n\x0eSubagentResult\x12\x37\n\x06status\x18\x01 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultStatusR\x06status\x12\x18\n\x07summary\x18\x02 \x01(\tR\x07summary\x12\x1c\n\tartifacts\x18\x03 \x03(\tR\tartifacts\x12G\n\x0eresult_quality\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.ResultQualityR\rresultQuality\x12\x37\n\x06source\x18\x05 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSourceR\x06source\x12V\n\x15new_memory_candidates\x18\x06 \x03(\x0b\x32\".llm_multiverse.v1.MemoryCandidateR\x13newMemoryCandidates\x12*\n\x0e\x66\x61ilure_reason\x18\x07 \x01(\tH\x00R\rfailureReason\x88\x01\x01\x42\x11\n\x0f_failure_reason*\xa8\x01\n\tAgentType\x12\x1a\n\x16\x41GENT_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x41GENT_TYPE_ORCHESTRATOR\x10\x01\x12\x19\n\x15\x41GENT_TYPE_RESEARCHER\x10\x02\x12\x14\n\x10\x41GENT_TYPE_CODER\x10\x03\x12\x17\n\x13\x41GENT_TYPE_SYSADMIN\x10\x04\x12\x18\n\x14\x41GENT_TYPE_ASSISTANT\x10\x05*\xf5\x01\n\x08ToolType\x12\x19\n\x15TOOL_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15TOOL_TYPE_MEMORY_READ\x10\x01\x12\x1a\n\x16TOOL_TYPE_MEMORY_WRITE\x10\x02\x12\x18\n\x14TOOL_TYPE_WEB_SEARCH\x10\x03\x12\x15\n\x11TOOL_TYPE_FS_READ\x10\x04\x12\x16\n\x12TOOL_TYPE_FS_WRITE\x10\x05\x12\x16\n\x12TOOL_TYPE_RUN_CODE\x10\x06\x12\x17\n\x13TOOL_TYPE_RUN_SHELL\x10\x07\x12\x1d\n\x19TOOL_TYPE_PACKAGE_INSTALL\x10\x08*z\n\rOverrideLevel\x12\x1e\n\x1aOVERRIDE_LEVEL_UNSPECIFIED\x10\x00\x12\x17\n\x13OVERRIDE_LEVEL_NONE\x10\x01\x12\x18\n\x14OVERRIDE_LEVEL_RELAX\x10\x02\x12\x16\n\x12OVERRIDE_LEVEL_ALL\x10\x03*}\n\x0cResultStatus\x12\x1d\n\x19RESULT_STATUS_UNSPECIFIED\x10\x00\x12\x19\n\x15RESULT_STATUS_SUCCESS\x10\x01\x12\x19\n\x15RESULT_STATUS_PARTIAL\x10\x02\x12\x18\n\x14RESULT_STATUS_FAILED\x10\x03*\x87\x01\n\rResultQuality\x12\x1e\n\x1aRESULT_QUALITY_UNSPECIFIED\x10\x00\x12\x1b\n\x17RESULT_QUALITY_VERIFIED\x10\x01\x12\x1b\n\x17RESULT_QUALITY_INFERRED\x10\x02\x12\x1c\n\x18RESULT_QUALITY_UNCERTAIN\x10\x03*\x86\x01\n\x0cResultSource\x12\x1d\n\x19RESULT_SOURCE_UNSPECIFIED\x10\x00\x12\x1d\n\x19RESULT_SOURCE_TOOL_OUTPUT\x10\x01\x12!\n\x1dRESULT_SOURCE_MODEL_KNOWLEDGE\x10\x02\x12\x15\n\x11RESULT_SOURCE_WEB\x10\x03\x42\x85\x01\n\x15\x63om.llm_multiverse.v1B\x0b\x43ommonProtoP\x01\xa2\x02\x03LXX\xaa\x02\x10LlmMultiverse.V1\xca\x02\x10LlmMultiverse\\V1\xe2\x02\x1cLlmMultiverse\\V1\\GPBMetadata\xea\x02\x11LlmMultiverse::V1b\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1ellm_multiverse/v1/common.proto\x12\x11llm_multiverse.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\xd0\x01\n\x08\x41rtifact\x12\r\n\x05label\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x36\n\rartifact_type\x18\x03 \x01(\x0e\x32\x1f.llm_multiverse.v1.ArtifactType\x12;\n\x08metadata\x18\x04 \x03(\x0b\x32).llm_multiverse.v1.Artifact.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"j\n\x0f\x41gentIdentifier\x12\x10\n\x08\x61gent_id\x18\x01 \x01(\t\x12\x30\n\nagent_type\x18\x02 \x01(\x0e\x32\x1c.llm_multiverse.v1.AgentType\x12\x13\n\x0bspawn_depth\x18\x03 \x01(\r\"B\n\x0c\x41gentLineage\x12\x32\n\x06\x61gents\x18\x01 \x03(\x0b\x32\".llm_multiverse.v1.AgentIdentifier\"\xd7\x01\n\x0eSessionContext\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12\x0f\n\x07user_id\x18\x02 \x01(\t\x12\x36\n\ragent_lineage\x18\x03 \x01(\x0b\x32\x1f.llm_multiverse.v1.AgentLineage\x12\x38\n\x0eoverride_level\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.OverrideLevel\x12.\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x9d\x01\n\x0b\x45rrorDetail\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x0f\n\x07message\x18\x02 \x01(\t\x12>\n\x08metadata\x18\x03 \x03(\x0b\x32,.llm_multiverse.v1.ErrorDetail.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"g\n\x0fMemoryCandidate\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12/\n\x06source\x18\x02 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSource\x12\x12\n\nconfidence\x18\x03 \x01(\x02\"\xe0\x02\n\x0eSubagentResult\x12/\n\x06status\x18\x01 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultStatus\x12\x0f\n\x07summary\x18\x02 \x01(\t\x12.\n\tartifacts\x18\x03 \x03(\x0b\x32\x1b.llm_multiverse.v1.Artifact\x12\x38\n\x0eresult_quality\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.ResultQuality\x12/\n\x06source\x18\x05 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSource\x12\x41\n\x15new_memory_candidates\x18\x06 \x03(\x0b\x32\".llm_multiverse.v1.MemoryCandidate\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x11\n\x0f_failure_reason*\xa8\x01\n\tAgentType\x12\x1a\n\x16\x41GENT_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x41GENT_TYPE_ORCHESTRATOR\x10\x01\x12\x19\n\x15\x41GENT_TYPE_RESEARCHER\x10\x02\x12\x14\n\x10\x41GENT_TYPE_CODER\x10\x03\x12\x17\n\x13\x41GENT_TYPE_SYSADMIN\x10\x04\x12\x18\n\x14\x41GENT_TYPE_ASSISTANT\x10\x05*\xf5\x01\n\x08ToolType\x12\x19\n\x15TOOL_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15TOOL_TYPE_MEMORY_READ\x10\x01\x12\x1a\n\x16TOOL_TYPE_MEMORY_WRITE\x10\x02\x12\x18\n\x14TOOL_TYPE_WEB_SEARCH\x10\x03\x12\x15\n\x11TOOL_TYPE_FS_READ\x10\x04\x12\x16\n\x12TOOL_TYPE_FS_WRITE\x10\x05\x12\x16\n\x12TOOL_TYPE_RUN_CODE\x10\x06\x12\x17\n\x13TOOL_TYPE_RUN_SHELL\x10\x07\x12\x1d\n\x19TOOL_TYPE_PACKAGE_INSTALL\x10\x08*z\n\rOverrideLevel\x12\x1e\n\x1aOVERRIDE_LEVEL_UNSPECIFIED\x10\x00\x12\x17\n\x13OVERRIDE_LEVEL_NONE\x10\x01\x12\x18\n\x14OVERRIDE_LEVEL_RELAX\x10\x02\x12\x16\n\x12OVERRIDE_LEVEL_ALL\x10\x03*}\n\x0cResultStatus\x12\x1d\n\x19RESULT_STATUS_UNSPECIFIED\x10\x00\x12\x19\n\x15RESULT_STATUS_SUCCESS\x10\x01\x12\x19\n\x15RESULT_STATUS_PARTIAL\x10\x02\x12\x18\n\x14RESULT_STATUS_FAILED\x10\x03*\x87\x01\n\rResultQuality\x12\x1e\n\x1aRESULT_QUALITY_UNSPECIFIED\x10\x00\x12\x1b\n\x17RESULT_QUALITY_VERIFIED\x10\x01\x12\x1b\n\x17RESULT_QUALITY_INFERRED\x10\x02\x12\x1c\n\x18RESULT_QUALITY_UNCERTAIN\x10\x03*\x86\x01\n\x0cResultSource\x12\x1d\n\x19RESULT_SOURCE_UNSPECIFIED\x10\x00\x12\x1d\n\x19RESULT_SOURCE_TOOL_OUTPUT\x10\x01\x12!\n\x1dRESULT_SOURCE_MODEL_KNOWLEDGE\x10\x02\x12\x15\n\x11RESULT_SOURCE_WEB\x10\x03*\xa0\x01\n\x0c\x41rtifactType\x12\x1d\n\x19\x41RTIFACT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x41RTIFACT_TYPE_CODE\x10\x01\x12\x16\n\x12\x41RTIFACT_TYPE_TEXT\x10\x02\x12 \n\x1c\x41RTIFACT_TYPE_COMMAND_OUTPUT\x10\x03\x12\x1f\n\x1b\x41RTIFACT_TYPE_SEARCH_RESULT\x10\x04\x62\x06proto3')

 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'llm_multiverse.v1.common_pb2', _globals)
 if not _descriptor._USE_C_DESCRIPTORS:
-  _globals['DESCRIPTOR']._loaded_options = None
-  _globals['DESCRIPTOR']._serialized_options = b'\n\025com.llm_multiverse.v1B\013CommonProtoP\001\242\002\003LXX\252\002\020LlmMultiverse.V1\312\002\020LlmMultiverse\\V1\342\002\034LlmMultiverse\\V1\\GPBMetadata\352\002\021LlmMultiverse::V1'
+  DESCRIPTOR._loaded_options = None
+  _globals['_ARTIFACT_METADATAENTRY']._loaded_options = None
+  _globals['_ARTIFACT_METADATAENTRY']._serialized_options = b'8\001'
  _globals['_ERRORDETAIL_METADATAENTRY']._loaded_options = None
  _globals['_ERRORDETAIL_METADATAENTRY']._serialized_options = b'8\001'
-  _globals['_AGENTTYPE']._serialized_start=1326
-  _globals['_AGENTTYPE']._serialized_end=1494
-  _globals['_TOOLTYPE']._serialized_start=1497
-  _globals['_TOOLTYPE']._serialized_end=1742
-  _globals['_OVERRIDELEVEL']._serialized_start=1744
-  _globals['_OVERRIDELEVEL']._serialized_end=1866
-  _globals['_RESULTSTATUS']._serialized_start=1868
-  _globals['_RESULTSTATUS']._serialized_end=1993
-  _globals['_RESULTQUALITY']._serialized_start=1996
-  _globals['_RESULTQUALITY']._serialized_end=2131
-  _globals['_RESULTSOURCE']._serialized_start=2134
-  _globals['_RESULTSOURCE']._serialized_end=2268
-  _globals['_AGENTIDENTIFIER']._serialized_start=87
-  _globals['_AGENTIDENTIFIER']._serialized_end=225
-  _globals['_AGENTLINEAGE']._serialized_start=227
-  _globals['_AGENTLINEAGE']._serialized_end=301
-  _globals['_SESSIONCONTEXT']._serialized_start=304
-  _globals['_SESSIONCONTEXT']._serialized_end=578
-  _globals['_ERRORDETAIL']._serialized_start=581
-  _globals['_ERRORDETAIL']._serialized_end=775
-  _globals['_ERRORDETAIL_METADATAENTRY']._serialized_start=716
-  _globals['_ERRORDETAIL_METADATAENTRY']._serialized_end=775
-  _globals['_MEMORYCANDIDATE']._serialized_start=778
-  _globals['_MEMORYCANDIDATE']._serialized_end=910
-  _globals['_SUBAGENTRESULT']._serialized_start=913
-  _globals['_SUBAGENTRESULT']._serialized_end=1323
+  _globals['_AGENTTYPE']._serialized_start=1312
+  _globals['_AGENTTYPE']._serialized_end=1480
+  _globals['_TOOLTYPE']._serialized_start=1483
+  _globals['_TOOLTYPE']._serialized_end=1728
+  _globals['_OVERRIDELEVEL']._serialized_start=1730
+  _globals['_OVERRIDELEVEL']._serialized_end=1852
+  _globals['_RESULTSTATUS']._serialized_start=1854
+  _globals['_RESULTSTATUS']._serialized_end=1979
+  _globals['_RESULTQUALITY']._serialized_start=1982
+  _globals['_RESULTQUALITY']._serialized_end=2117
+  _globals['_RESULTSOURCE']._serialized_start=2120
+  _globals['_RESULTSOURCE']._serialized_end=2254
+  _globals['_ARTIFACTTYPE']._serialized_start=2257
+  _globals['_ARTIFACTTYPE']._serialized_end=2417
+  _globals['_ARTIFACT']._serialized_start=87
+  _globals['_ARTIFACT']._serialized_end=295
+  _globals['_ARTIFACT_METADATAENTRY']._serialized_start=248
+  _globals['_ARTIFACT_METADATAENTRY']._serialized_end=295
+  _globals['_AGENTIDENTIFIER']._serialized_start=297
+  _globals['_AGENTIDENTIFIER']._serialized_end=403
+  _globals['_AGENTLINEAGE']._serialized_start=405
+  _globals['_AGENTLINEAGE']._serialized_end=471
+  _globals['_SESSIONCONTEXT']._serialized_start=474
+  _globals['_SESSIONCONTEXT']._serialized_end=689
+  _globals['_ERRORDETAIL']._serialized_start=692
+  _globals['_ERRORDETAIL']._serialized_end=849
+  _globals['_ERRORDETAIL_METADATAENTRY']._serialized_start=248
+  _globals['_ERRORDETAIL_METADATAENTRY']._serialized_end=295
+  _globals['_MEMORYCANDIDATE']._serialized_start=851
+  _globals['_MEMORYCANDIDATE']._serialized_end=954
+  _globals['_SUBAGENTRESULT']._serialized_start=957
+  _globals['_SUBAGENTRESULT']._serialized_end=1309
 # @@protoc_insertion_point(module_scope)
--- a/gen/python/llm_multiverse/v1/common_pb2_grpc.py
+++ b/gen/python/llm_multiverse/v1/common_pb2_grpc.py
@@ -1,4 +1,24 @@
 # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 """Client and server classes corresponding to protobuf-defined services."""
 import grpc
+import warnings

+
+GRPC_GENERATED_VERSION = '1.78.0'
+GRPC_VERSION = grpc.__version__
+_version_not_supported = False
+
+try:
+    from grpc._utilities import first_version_is_lower
+    _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
+except ImportError:
+    _version_not_supported = True
+
+if _version_not_supported:
+    raise RuntimeError(
+        f'The grpc package installed is at version {GRPC_VERSION},'
+        + ' but the generated code in llm_multiverse/v1/common_pb2_grpc.py depends on'
+        + f' grpcio>={GRPC_GENERATED_VERSION}.'
+        + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+        + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
+    )
--- a/proto/llm_multiverse/v1/common.proto
+++ b/proto/llm_multiverse/v1/common.proto
@@ -64,8 +64,25 @@ enum ResultSource {
  RESULT_SOURCE_WEB = 3;
 }

+// Type of artifact produced by an agent during its tool call loop.
+enum ArtifactType {
+  ARTIFACT_TYPE_UNSPECIFIED = 0;
+  ARTIFACT_TYPE_CODE = 1;            // Code written via fs_write
+  ARTIFACT_TYPE_TEXT = 2;            // Plain text / file content from fs_read
+  ARTIFACT_TYPE_COMMAND_OUTPUT = 3;  // Output from run_code / run_shell
+  ARTIFACT_TYPE_SEARCH_RESULT = 4;   // Web search results
+}
+
 // --- Messages ---

+// A concrete output produced by an agent (code, command output, etc.).
+message Artifact {
+  string label = 1;                  // Display name (filename, query, etc.)
+  string content = 2;               // Full content
+  ArtifactType artifact_type = 3;
+  map<string, string> metadata = 4; // language, path, tool_name, exit_code, etc.
+}
+
 // Identifies a single agent in the lineage chain.
 message AgentIdentifier {
  string agent_id = 1;
@@ -107,7 +124,8 @@ message SubagentResult {
  ResultStatus status = 1;
  // 3-sentence max summary of work performed.
  string summary = 2;
-  repeated string artifacts = 3;
+  // Structured artifacts produced during the agent loop.
+  repeated Artifact artifacts = 3;
  ResultQuality result_quality = 4;
  ResultSource source = 5;
  repeated MemoryCandidate new_memory_candidates = 6;
--- a/services/orchestrator/pyproject.toml
+++ b/services/orchestrator/pyproject.toml
@@ -11,6 +11,7 @@ dependencies = [
    "orjson>=3.10",
    "uvloop>=0.21",
    "msgspec>=0.19",
+    "aiohttp>=3.10",
 ]

 [project.scripts]
--- a/services/orchestrator/src/orchestrator/main.py
+++ b/services/orchestrator/src/orchestrator/main.py
@@ -11,6 +11,7 @@ from llm_multiverse.v1 import audit_pb2_grpc, orchestrator_pb2_grpc

 from .clients import MemoryClient, ModelGatewayClient, ToolBrokerClient
 from .config import Config
+from .grpc_web_gateway import start_gateway
 from .service import OrchestratorServiceImpl

 logger = logging.getLogger("orchestrator")
@@ -50,6 +51,12 @@ async def serve(config: Config) -> None:

    await server.start()

+    # Start gRPC-Web HTTP gateway if configured.
+    gateway_runner = None
+    if config.grpc_web_port:
+        grpc_addr = f"localhost:{config.port}"
+        gateway_runner = await start_gateway(grpc_addr, config.grpc_web_port)
+
    # Graceful shutdown on SIGINT/SIGTERM
    loop = asyncio.get_running_loop()
    shutdown_event = asyncio.Event()
@@ -63,6 +70,8 @@ async def serve(config: Config) -> None:

    await shutdown_event.wait()
    logger.info("Shutting down gracefully...")
+    if gateway_runner:
+        await gateway_runner.cleanup()
    await server.stop(grace=5)
    logger.info("Orchestrator shut down")

--- a/services/orchestrator/src/orchestrator/agent_utils.py
+++ b/services/orchestrator/src/orchestrator/agent_utils.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 from llm_multiverse.v1 import common_pb2

+from .output_collector import OutputCollector
 from .parser import DoneSignalParsed

 # Confidence string → proto quality enum.
@@ -13,6 +14,14 @@ CONFIDENCE_MAP: dict[str, int] = {
    "UNCERTAIN": common_pb2.RESULT_QUALITY_UNCERTAIN,
 }

+# CollectedOutput.artifact_type → proto ArtifactType enum.
+_ARTIFACT_TYPE_MAP: dict[str, int] = {
+    "code": common_pb2.ARTIFACT_TYPE_CODE,
+    "text": common_pb2.ARTIFACT_TYPE_TEXT,
+    "command_output": common_pb2.ARTIFACT_TYPE_COMMAND_OUTPUT,
+    "search_result": common_pb2.ARTIFACT_TYPE_SEARCH_RESULT,
+}
+

 def determine_source(used_tool_output: bool, used_web: bool) -> int:
    """Determine the result source based on tool usage."""
@@ -27,6 +36,7 @@ def build_success_result(
    parsed: DoneSignalParsed,
    used_tool_output: bool,
    used_web: bool,
+    collector: OutputCollector | None = None,
 ) -> common_pb2.SubagentResult:
    """Build a success result from a parsed done signal."""
    quality = CONFIDENCE_MAP.get(parsed.confidence, common_pb2.RESULT_QUALITY_UNCERTAIN)
@@ -46,10 +56,12 @@ def build_success_result(
        for mc in parsed.memory_candidates
    ]

+    artifacts = _build_artifacts(parsed, collector)
+
    return common_pb2.SubagentResult(
        status=status,
        summary=parsed.summary,
-        artifacts=parsed.findings,
+        artifacts=artifacts,
        result_quality=quality,
        source=source,
        new_memory_candidates=candidates,
@@ -61,12 +73,15 @@ def build_partial_result(
    summary_prefix: str,
    used_tool_output: bool,
    used_web: bool,
+    collector: OutputCollector | None = None,
 ) -> common_pb2.SubagentResult:
    """Build a partial result with agent-specific summary prefix."""
    source = determine_source(used_tool_output, used_web)
+    artifacts = _build_artifacts(None, collector) if collector else []
    return common_pb2.SubagentResult(
        status=common_pb2.RESULT_STATUS_PARTIAL,
        summary=f"{summary_prefix} incomplete: {reason}",
+        artifacts=artifacts,
        result_quality=common_pb2.RESULT_QUALITY_UNCERTAIN,
        source=source,
    )
@@ -83,6 +98,40 @@ def build_failed_result(reason: str, summary_prefix: str) -> common_pb2.Subagent
    )


+def _build_artifacts(
+    parsed: DoneSignalParsed | None,
+    collector: OutputCollector | None,
+) -> list[common_pb2.Artifact]:
+    """Convert collected outputs and findings into proto Artifact messages."""
+    artifacts: list[common_pb2.Artifact] = []
+
+    if collector:
+        for entry in collector.entries:
+            artifacts.append(
+                common_pb2.Artifact(
+                    label=entry.label,
+                    content=entry.content,
+                    artifact_type=_ARTIFACT_TYPE_MAP.get(
+                        entry.artifact_type, common_pb2.ARTIFACT_TYPE_UNSPECIFIED
+                    ),
+                    metadata=entry.metadata,
+                )
+            )
+
+    # Always include findings as text artifacts (model-curated summaries).
+    if parsed and parsed.findings:
+        for finding in parsed.findings:
+            artifacts.append(
+                common_pb2.Artifact(
+                    label="Finding",
+                    content=finding,
+                    artifact_type=common_pb2.ARTIFACT_TYPE_TEXT,
+                )
+            )
+
+    return artifacts
+
+
 def format_memory(mem_response) -> str:
    """Format a QueryMemoryResponse into a context string."""
    entry = mem_response.entry
--- a/services/orchestrator/src/orchestrator/base_agent.py
+++ b/services/orchestrator/src/orchestrator/base_agent.py
@@ -17,6 +17,7 @@ from .agent_utils import (
 )
 from .clients import MemoryClient, ModelGatewayClient, ToolBrokerClient
 from .config import AgentConfig
+from .output_collector import OutputCollector
 from .parser import DoneSignalParsed, ParseError, ToolCallParsed, parse_agent_response
 from .prompt import PromptBuilder

@@ -84,8 +85,12 @@ class BaseAgent:
                session_ctx, self.agent_type, request.task
            )
        except grpc.aio.AioRpcError as e:
+            self._logger.warning(
+                "Tool discovery failed: code=%s, details=%s", e.code(), e.details()
+            )
            return build_failed_result(
-                f"Tool discovery failed: {e.code()}", self.summary_prefix
+                f"Tool discovery failed: {e.code()}, details={e.details()}",
+                self.summary_prefix,
            )

        if not tools:
@@ -120,126 +125,162 @@ class BaseAgent:
        prompt_builder.set_memory_context(memory_context)

        # Step 4: Agent loop
+        collector = OutputCollector()
        iteration = 0
        consecutive_failures = 0
        used_tool_output = False
        used_web_search = False
        start_time = asyncio.get_event_loop().time()

-        while True:
-            # Termination checks
-            if iteration >= self._config.max_iterations:
-                return build_partial_result(
-                    "Max iterations reached",
-                    self.summary_prefix,
-                    used_tool_output,
-                    used_web_search,
-                )
-
-            elapsed = asyncio.get_event_loop().time() - start_time
-            if elapsed >= self._config.timeout_seconds:
-                return build_partial_result(
-                    "Timeout",
-                    self.summary_prefix,
-                    used_tool_output,
-                    used_web_search,
-                )
-
-            if prompt_builder.needs_compaction():
-                compacted = await prompt_builder.compact(self._gateway, session_ctx)
-                if not compacted:
+        try:
+            while True:
+                # Termination checks
+                if iteration >= self._config.max_iterations:
                    return build_partial_result(
-                        "Context overflow after compaction",
+                        "Max iterations reached",
                        self.summary_prefix,
                        used_tool_output,
                        used_web_search,
+                        collector,
                    )
-                self._logger.info(
-                    "Context compacted for agent %s (iteration %d)",
-                    agent_id,
-                    iteration,
-                )

-            if consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
-                return build_failed_result(
-                    f"All tools failed after {_MAX_CONSECUTIVE_FAILURES} consecutive failures",
-                    self.summary_prefix,
-                )
+                elapsed = asyncio.get_event_loop().time() - start_time
+                if elapsed >= self._config.timeout_seconds:
+                    return build_partial_result(
+                        "Timeout",
+                        self.summary_prefix,
+                        used_tool_output,
+                        used_web_search,
+                        collector,
+                    )

-            # Build prompt and call model
-            prompt = prompt_builder.build()
-            try:
-                response_text = await self._gateway.stream_inference(
-                    session_ctx, prompt, max_tokens=max_tokens
-                )
-            except grpc.aio.AioRpcError as e:
-                return build_failed_result(
-                    f"Model gateway error: {e.code()}", self.summary_prefix
-                )
+                if prompt_builder.needs_compaction():
+                    compacted = await prompt_builder.compact(
+                        self._gateway, session_ctx
+                    )
+                    if not compacted:
+                        return build_partial_result(
+                            "Context overflow after compaction",
+                            self.summary_prefix,
+                            used_tool_output,
+                            used_web_search,
+                            collector,
+                        )
+                    self._logger.info(
+                        "Context compacted for agent %s (iteration %d)",
+                        agent_id,
+                        iteration,
+                    )

-            # Parse model output
-            try:
-                parsed = parse_agent_response(response_text)
-            except ParseError as e:
-                prompt_builder.add_reasoning(
-                    f"[Parse error: {e}. Please output valid JSON.]"
-                )
-                iteration += 1
-                continue
+                if consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
+                    return build_failed_result(
+                        f"All tools failed after {_MAX_CONSECUTIVE_FAILURES} consecutive failures",
+                        self.summary_prefix,
+                    )

-            if parsed is None:
-                prompt_builder.add_reasoning(response_text)
-                iteration += 1
-                continue
+                # Build prompt and call model
+                prompt = prompt_builder.build()
+                try:
+                    response_text = await self._gateway.stream_inference(
+                        session_ctx, prompt, max_tokens=max_tokens
+                    )
+                except grpc.aio.AioRpcError as e:
+                    self._logger.warning(
+                        "Model gateway error: code=%s, details=%s, iteration=%d",
+                        e.code(),
+                        e.details(),
+                        iteration,
+                    )
+                    return build_failed_result(
+                        f"Model gateway error: {e.code()}, details={e.details()}",
+                        self.summary_prefix,
+                    )

-            if isinstance(parsed, DoneSignalParsed):
-                return build_success_result(parsed, used_tool_output, used_web_search)
+                # Parse model output
+                try:
+                    parsed = parse_agent_response(response_text)
+                except ParseError as e:
+                    prompt_builder.add_reasoning(
+                        f"[Parse error: {e}. Please output valid JSON.]"
+                    )
+                    iteration += 1
+                    continue
+
+                if parsed is None:
+                    prompt_builder.add_reasoning(response_text)
+                    iteration += 1
+                    continue
+
+                if isinstance(parsed, DoneSignalParsed):
+                    return build_success_result(
+                        parsed, used_tool_output, used_web_search, collector
+                    )
+
+                if isinstance(parsed, ToolCallParsed):
+                    if parsed.tool not in tool_names:
+                        prompt_builder.add_tool_result(
+                            parsed.tool,
+                            f"Error: tool '{parsed.tool}' is not available. "
+                            f"Available tools: {', '.join(sorted(tool_names))}",
+                            False,
+                            agent_id,
+                            session_ctx.session_id,
+                        )
+                        iteration += 1
+                        consecutive_failures += 1
+                        continue
+
+                    prompt_builder.add_tool_call(parsed.tool, parsed.parameters)
+                    try:
+                        output, success = await self._broker.execute_tool(
+                            session_ctx,
+                            self.agent_type,
+                            parsed.tool,
+                            parsed.parameters,
+                        )
+                    except grpc.aio.AioRpcError as e:
+                        self._logger.warning(
+                            "Tool execution error: tool=%s, code=%s, details=%s",
+                            parsed.tool,
+                            e.code(),
+                            e.details(),
+                        )
+                        output = (
+                            f"Tool execution error: {e.code()}, details={e.details()}"
+                        )
+                        success = False

-            if isinstance(parsed, ToolCallParsed):
-                if parsed.tool not in tool_names:
                    prompt_builder.add_tool_result(
                        parsed.tool,
-                        f"Error: tool '{parsed.tool}' is not available. "
-                        f"Available tools: {', '.join(sorted(tool_names))}",
-                        False,
+                        output,
+                        success,
                        agent_id,
                        session_ctx.session_id,
                    )
-                    iteration += 1
-                    consecutive_failures += 1
-                    continue

-                prompt_builder.add_tool_call(parsed.tool, parsed.parameters)
-                try:
-                    output, success = await self._broker.execute_tool(
-                        session_ctx,
-                        self.agent_type,
-                        parsed.tool,
-                        parsed.parameters,
+                    collector.collect(
+                        parsed.tool, parsed.parameters, output, success
                    )
-                except grpc.aio.AioRpcError as e:
-                    output = f"Tool execution error: {e.code()}"
-                    success = False

-                prompt_builder.add_tool_result(
-                    parsed.tool,
-                    output,
-                    success,
-                    agent_id,
-                    session_ctx.session_id,
-                )
+                    if success:
+                        if parsed.tool in self.tool_output_sources:
+                            used_tool_output = True
+                        if parsed.tool == "web_search":
+                            used_web_search = True
+                        consecutive_failures = 0
+                    else:
+                        consecutive_failures += 1

-                if success:
-                    if parsed.tool in self.tool_output_sources:
-                        used_tool_output = True
-                    if parsed.tool == "web_search":
-                        used_web_search = True
-                    consecutive_failures = 0
-                else:
-                    consecutive_failures += 1
+                    iteration += 1

-                iteration += 1
-
-        return build_failed_result(
-            "Unexpected loop exit", self.summary_prefix
-        )  # pragma: no cover
+            return build_failed_result(
+                "Unexpected loop exit", self.summary_prefix
+            )  # pragma: no cover
+        finally:
+            elapsed = asyncio.get_event_loop().time() - start_time
+            self._logger.info(
+                "Agent run finished: agent_id=%s, iterations=%d, elapsed=%.2fs",
+                agent_id,
+                iteration,
+                elapsed,
+            )
--- a/services/orchestrator/src/orchestrator/config.py
+++ b/services/orchestrator/src/orchestrator/config.py
@@ -79,6 +79,7 @@ class Config(msgspec.Struct):

    host: str = "[::]"
    port: int = 50058
+    grpc_web_port: int | None = None
    model_gateway_addr: str = "model-gateway:50055"
    tool_broker_addr: str = "tool-broker:50057"
    memory_addr: str = "memory-service:50054"
--- a/services/orchestrator/src/orchestrator/decomposer.py
+++ b/services/orchestrator/src/orchestrator/decomposer.py
@@ -93,6 +93,7 @@ class TaskDecomposer:
        Returns a list of SubtaskDefinition protos. Falls back to a single-task
        plan if decomposition fails for any reason.
        """
+        response_text = ""
        try:
            prompt = _build_prompt(user_message)
            response_text = await self._gateway.inference(
@@ -106,10 +107,18 @@ class TaskDecomposer:
            _validate_decomposition(subtasks, self._config.max_subtasks)
            return _to_proto_subtasks(subtasks)
        except grpc.aio.AioRpcError as e:
-            logger.warning("Gateway error during decomposition: %s", e.code())
+            logger.warning(
+                "Gateway error during decomposition: code=%s, details=%s",
+                e.code(),
+                e.details(),
+            )
            return _build_fallback_plan(user_message)
        except (DecompositionError, orjson.JSONDecodeError) as e:
-            logger.warning("Decomposition failed: %s", e)
+            logger.warning(
+                "Decomposition parsing failed: %s, response_preview=%.200s",
+                e,
+                response_text[:200],
+            )
            return _build_fallback_plan(user_message)


--- a/services/orchestrator/src/orchestrator/grpc_web_gateway.py
+++ b/services/orchestrator/src/orchestrator/grpc_web_gateway.py
@@ -0,0 +1,121 @@
+"""Minimal gRPC-Web HTTP gateway.
+
+Translates gRPC-Web (HTTP/1.1) requests into native gRPC calls to the
+local orchestrator server and streams back gRPC-Web framed responses.
+This eliminates the need for an external proxy (Caddy/Envoy) to handle
+the gRPC-Web protocol translation.
+"""
+
+from __future__ import annotations
+
+import logging
+import struct
+
+import grpc
+from aiohttp import web
+from llm_multiverse.v1 import orchestrator_pb2_grpc
+
+logger = logging.getLogger("orchestrator.grpc_web")
+
+
+def _encode_frame(payload: bytes, *, trailer: bool = False) -> bytes:
+    """Encode a gRPC-Web frame: [flags:1][length:4][payload:N]."""
+    flags = 0x80 if trailer else 0x00
+    return struct.pack(">BI", flags, len(payload)) + payload
+
+
+def _decode_frame(data: bytes) -> bytes:
+    """Decode a single gRPC-Web data frame and return the protobuf payload."""
+    if len(data) < 5:
+        raise ValueError("gRPC-Web frame too short")
+    _flags, length = struct.unpack(">BI", data[:5])
+    if len(data) < 5 + length:
+        raise ValueError("gRPC-Web frame payload truncated")
+    return data[5 : 5 + length]
+
+
+def _status_code_number(code: grpc.StatusCode) -> int:
+    """Extract the numeric gRPC status code."""
+    return code.value[0]
+
+
+async def _handle_grpc_web(request: web.Request) -> web.StreamResponse:
+    """Proxy a gRPC-Web request to the local gRPC server."""
+    channel: grpc.aio.Channel = request.app["grpc_channel"]
+    stub = orchestrator_pb2_grpc.OrchestratorServiceStub(channel)
+
+    body = await request.read()
+    proto_bytes = _decode_frame(body)
+
+    from llm_multiverse.v1 import orchestrator_pb2
+
+    req = orchestrator_pb2.ProcessRequestRequest()
+    req.ParseFromString(proto_bytes)
+
+    response = web.StreamResponse(
+        status=200,
+        headers={
+            "Content-Type": "application/grpc-web+proto",
+            "Access-Control-Allow-Origin": "*",
+        },
+    )
+    await response.prepare(request)
+
+    grpc_status = 0
+    grpc_message = ""
+
+    try:
+        async for resp in stub.ProcessRequest(req):
+            frame = _encode_frame(resp.SerializeToString())
+            await response.write(frame)
+    except grpc.aio.AioRpcError as e:
+        grpc_status = _status_code_number(e.code())
+        grpc_message = e.details() or ""
+        logger.warning(
+            "gRPC-Web proxy error: code=%s, details=%s", e.code(), e.details()
+        )
+
+    trailer_text = f"grpc-status: {grpc_status}\r\n"
+    if grpc_message:
+        trailer_text += f"grpc-message: {grpc_message}\r\n"
+    await response.write(_encode_frame(trailer_text.encode(), trailer=True))
+    await response.write_eof()
+    return response
+
+
+async def _handle_cors(request: web.Request) -> web.Response:
+    """Handle CORS preflight for gRPC-Web requests."""
+    return web.Response(
+        headers={
+            "Access-Control-Allow-Origin": "*",
+            "Access-Control-Allow-Methods": "POST, OPTIONS",
+            "Access-Control-Allow-Headers": (
+                "Content-Type, X-Grpc-Web, X-User-Agent"
+            ),
+            "Access-Control-Max-Age": "86400",
+        }
+    )
+
+
+async def start_gateway(grpc_addr: str, port: int) -> web.AppRunner:
+    """Start the gRPC-Web HTTP gateway on the given port.
+
+    Connects to the native gRPC server at *grpc_addr* and exposes an
+    HTTP/1.1 endpoint that speaks gRPC-Web.
+    """
+    app = web.Application()
+    app["grpc_channel"] = grpc.aio.insecure_channel(grpc_addr)
+
+    app.router.add_post(
+        "/llm_multiverse.v1.OrchestratorService/{method}", _handle_grpc_web
+    )
+    app.router.add_options(
+        "/llm_multiverse.v1.OrchestratorService/{method}", _handle_cors
+    )
+
+    runner = web.AppRunner(app, access_log=None)
+    await runner.setup()
+    site = web.TCPSite(runner, "0.0.0.0", port)
+    await site.start()
+    logger.info("gRPC-Web gateway listening on 0.0.0.0:%d -> %s", port, grpc_addr)
+    return runner
--- a/services/orchestrator/src/orchestrator/output_collector.py
+++ b/services/orchestrator/src/orchestrator/output_collector.py
@@ -0,0 +1,144 @@
+"""Lightweight collector for concrete tool outputs during the agent loop."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+# Maximum content length to store per artifact (256 KB).
+_MAX_CONTENT_LEN = 256 * 1024
+
+# Tools whose outputs are context, not user-facing artifacts.
+_SKIP_TOOLS = frozenset({"memory_read", "memory_write"})
+
+# Tool name → artifact type string (matches ArtifactType enum names).
+_TOOL_TYPE_MAP: dict[str, str] = {
+    "fs_write": "code",
+    "fs_read": "text",
+    "run_code": "command_output",
+    "run_shell": "command_output",
+    "web_search": "search_result",
+}
+
+
+@dataclass(frozen=True)
+class CollectedOutput:
+    """A single artifact captured from a tool execution."""
+
+    tool_name: str
+    label: str
+    content: str
+    artifact_type: str
+    metadata: dict[str, str]
+
+
+class OutputCollector:
+    """Collects concrete outputs during the agent tool call loop.
+
+    Populated after each successful tool execution. Maps tool names to
+    artifact types so downstream code can build structured Artifact protos.
+    """
+
+    def __init__(self) -> None:
+        self._entries: list[CollectedOutput] = []
+
+    def collect(
+        self,
+        tool_name: str,
+        parameters: dict,
+        output: str,
+        success: bool,
+    ) -> None:
+        """Collect output from a tool execution.
+
+        Only successful calls for known, user-facing tools are collected.
+        """
+        if not success:
+            return
+        if tool_name in _SKIP_TOOLS:
+            return
+
+        artifact_type = _TOOL_TYPE_MAP.get(tool_name)
+        if artifact_type is None:
+            return
+
+        if tool_name == "fs_write":
+            label = parameters.get("path", parameters.get("file_path", "file"))
+            content = parameters.get("content", "")
+            metadata = {"path": label}
+            lang = _guess_language(label)
+            if lang:
+                metadata["language"] = lang
+        elif tool_name == "fs_read":
+            label = parameters.get("path", parameters.get("file_path", "file"))
+            content = output
+            metadata = {"path": label}
+            lang = _guess_language(label)
+            if lang:
+                metadata["language"] = lang
+        elif tool_name == "run_code":
+            label = "Code execution"
+            content = output
+            lang = parameters.get("language", "")
+            metadata = {"language": lang} if lang else {}
+        elif tool_name == "run_shell":
+            cmd = parameters.get("command", "")
+            label = cmd[:80] if cmd else "Shell command"
+            content = output
+            metadata = {"command": cmd} if cmd else {}
+        elif tool_name == "web_search":
+            label = parameters.get("query", "Search")
+            content = output
+            metadata = {}
+        else:
+            return  # pragma: no cover
+
+        # Truncate oversized content.
+        if len(content) > _MAX_CONTENT_LEN:
+            content = content[:_MAX_CONTENT_LEN] + "\n... [truncated]"
+
+        self._entries.append(
+            CollectedOutput(
+                tool_name=tool_name,
+                label=label,
+                content=content,
+                artifact_type=artifact_type,
+                metadata=metadata,
+            )
+        )
+
+    @property
+    def entries(self) -> list[CollectedOutput]:
+        """Return a copy of all collected outputs."""
+        return list(self._entries)
+
+
+def _guess_language(path: str) -> str:
+    """Guess programming language from file extension."""
+    ext_map = {
+        ".py": "python",
+        ".js": "javascript",
+        ".ts": "typescript",
+        ".tsx": "typescript",
+        ".jsx": "javascript",
+        ".rs": "rust",
+        ".go": "go",
+        ".java": "java",
+        ".rb": "ruby",
+        ".sh": "bash",
+        ".yaml": "yaml",
+        ".yml": "yaml",
+        ".json": "json",
+        ".toml": "toml",
+        ".html": "html",
+        ".css": "css",
+        ".sql": "sql",
+        ".md": "markdown",
+        ".c": "c",
+        ".cpp": "cpp",
+        ".h": "c",
+        ".hpp": "cpp",
+    }
+    for ext, lang in ext_map.items():
+        if path.endswith(ext):
+            return lang
+    return ""
--- a/services/orchestrator/src/orchestrator/prompt.py
+++ b/services/orchestrator/src/orchestrator/prompt.py
@@ -125,6 +125,13 @@ When done, respond with a JSON result block:
 "confidence": "VERIFIED|INFERRED|UNCERTAIN", \
 "memory_candidates": [{"content": "<fact>", "confidence": 0.0-1.0}]}

+## IMPORTANT: Always Use Tools for Code
+- When asked to write or generate code, you MUST use `fs_write` to write the \
+code to a file and `run_code` to verify it. Do NOT just describe the code in \
+your summary or findings — the user needs the actual code as a tool artifact.
+- For new code requests, write to `/workspace/` (e.g. `/workspace/fibonacci.py`).
+- When using `run_code`, set working_dir to `/workspace/output`.
+
 ## Constraints
 - Do NOT fabricate file contents. Always read files before modifying them.
 - Do NOT attempt to use tools not listed in your capabilities.
--- a/services/orchestrator/src/orchestrator/service.py
+++ b/services/orchestrator/src/orchestrator/service.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import logging
+import time

 import grpc
 from google.protobuf import timestamp_pb2
@@ -135,9 +136,12 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)

        Pipeline: decompose → filter → dispatch → confidence → memory gate → respond.
        """
+        start_time = time.monotonic()
        logger.info(
-            "ProcessRequest: session=%s, message=%s",
+            "ProcessRequest: session=%s, message_len=%d, has_config=%s, preview=%.100s",
            request.session_id,
+            len(request.user_message),
+            request.HasField("session_config"),
            request.user_message[:100],
        )

@@ -179,10 +183,17 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
        try:
            subtasks = await decomposer.decompose(session_ctx, request.user_message)
        except Exception as e:
-            logger.error("Decomposition failed: %s", e)
+            elapsed = time.monotonic() - start_time
+            logger.error(
+                "Decomposition failed: session=%s, error=%s, elapsed=%.2fs",
+                request.session_id,
+                e,
+                elapsed,
+                exc_info=True,
+            )
            yield orchestrator_pb2.ProcessRequestResponse(
                state=orchestrator_pb2.ORCHESTRATION_STATE_COMPLETE,
-                message="Failed to decompose request. Please try again.",
+                message=f"Failed to decompose request: {type(e).__name__}. Please try again.",
            )
            return

@@ -260,6 +271,15 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)

        outcomes = await dispatcher.dispatch_all()

+        for outcome in outcomes:
+            logger.info(
+                "Subtask outcome: session=%s, subtask=%s, status=%s%s",
+                request.session_id,
+                outcome.subtask_id,
+                outcome.status,
+                f", error={outcome.error}" if outcome.error else "",
+            )
+
        # Step 5: Evaluate confidence signals.
        # TODO: Wire ConfidenceReplanner to re-dispatch low-confidence subtasks.
        evaluator = ConfidenceEvaluator(self._config.confidence)
@@ -289,14 +309,16 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
        # Build final SubagentResult summary.
        final_result = _build_final_result(outcomes, confidence_report)

+        elapsed = time.monotonic() - start_time
        logger.info(
            "ProcessRequest complete: session=%s, subtasks=%d, "
-            "confidence=%.2f, memory_accepted=%d/%d",
+            "confidence=%.2f, memory_accepted=%d/%d, elapsed=%.2fs",
            request.session_id,
            len(subtasks),
            confidence_report.overall_confidence,
            gating_report.accepted_count,
            gating_report.total_candidates,
+            elapsed,
        )

        yield orchestrator_pb2.ProcessRequestResponse(
@@ -341,7 +363,7 @@ def _synthesize_response(
 def _build_final_result(outcomes: list, confidence_report) -> common_pb2.SubagentResult:
    """Build the final SubagentResult from all outcomes."""
    summaries: list[str] = []
-    artifacts: list[str] = []
+    artifacts: list[common_pb2.Artifact] = []
    all_candidates: list[common_pb2.MemoryCandidate] = []
    has_failure = False
    has_success = False
--- a/services/orchestrator/tests/test_agent_utils_artifacts.py
+++ b/services/orchestrator/tests/test_agent_utils_artifacts.py
@@ -0,0 +1,126 @@
+"""Tests for _build_artifacts in agent_utils."""
+
+from __future__ import annotations
+
+from llm_multiverse.v1 import common_pb2
+
+from orchestrator.agent_utils import _build_artifacts, build_success_result
+from orchestrator.output_collector import OutputCollector
+from orchestrator.parser import DoneSignalParsed
+
+
+def _make_parsed(
+    summary: str = "Done.",
+    findings: list[str] | None = None,
+    confidence: str = "VERIFIED",
+) -> DoneSignalParsed:
+    return DoneSignalParsed(
+        summary=summary,
+        findings=findings or [],
+        confidence=confidence,
+    )
+
+
+def test_build_artifacts_from_collector():
+    """Collector entries are converted to Artifact protos."""
+    collector = OutputCollector()
+    collector.collect("fs_write", {"path": "/main.py", "content": "print(1)"}, "ok", True)
+    collector.collect("run_code", {"language": "python"}, "1\n", True)
+
+    parsed = _make_parsed()
+    artifacts = _build_artifacts(parsed, collector)
+
+    assert len(artifacts) == 2
+    assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_CODE
+    assert artifacts[0].label == "/main.py"
+    assert artifacts[0].content == "print(1)"
+    assert artifacts[0].metadata["language"] == "python"
+
+    assert artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_COMMAND_OUTPUT
+    assert artifacts[1].content == "1\n"
+
+
+def test_build_artifacts_findings_as_text():
+    """Findings are always included as TEXT artifacts."""
+    parsed = _make_parsed(findings=["Fixed null check", "Improved error handling"])
+    artifacts = _build_artifacts(parsed, None)
+
+    assert len(artifacts) == 2
+    assert all(a.artifact_type == common_pb2.ARTIFACT_TYPE_TEXT for a in artifacts)
+    assert artifacts[0].label == "Finding"
+    assert artifacts[0].content == "Fixed null check"
+    assert artifacts[1].content == "Improved error handling"
+
+
+def test_build_artifacts_collector_plus_findings():
+    """Both collector entries and findings are included."""
+    collector = OutputCollector()
+    collector.collect("web_search", {"query": "python docs"}, "results...", True)
+
+    parsed = _make_parsed(findings=["Found documentation"])
+    artifacts = _build_artifacts(parsed, collector)
+
+    assert len(artifacts) == 2
+    assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_SEARCH_RESULT
+    assert artifacts[0].label == "python docs"
+    assert artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
+    assert artifacts[1].content == "Found documentation"
+
+
+def test_build_artifacts_empty():
+    """No collector and no findings produces empty list."""
+    parsed = _make_parsed()
+    artifacts = _build_artifacts(parsed, None)
+    assert artifacts == []
+
+
+def test_build_artifacts_none_parsed():
+    """None parsed (partial result path) still builds from collector."""
+    collector = OutputCollector()
+    collector.collect("fs_read", {"path": "/test.py"}, "content", True)
+    artifacts = _build_artifacts(None, collector)
+    assert len(artifacts) == 1
+    assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
+
+
+def test_build_success_result_with_collector():
+    """build_success_result includes structured artifacts from collector."""
+    collector = OutputCollector()
+    collector.collect("fs_write", {"path": "/app.ts", "content": "const x = 1;"}, "ok", True)
+
+    parsed = _make_parsed(findings=["Created app.ts"])
+    result = build_success_result(parsed, True, False, collector)
+
+    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
+    assert len(result.artifacts) == 2
+    assert result.artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_CODE
+    assert result.artifacts[0].label == "/app.ts"
+    assert result.artifacts[0].content == "const x = 1;"
+    assert result.artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
+    assert result.artifacts[1].content == "Created app.ts"
+
+
+def test_build_success_result_backward_compat_no_collector():
+    """build_success_result without collector still works (findings only)."""
+    parsed = _make_parsed(findings=["Some finding"])
+    result = build_success_result(parsed, False, False)
+
+    assert len(result.artifacts) == 1
+    assert result.artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
+    assert result.artifacts[0].content == "Some finding"
+
+
+def test_artifact_metadata_map():
+    """Metadata map is correctly populated on proto Artifact."""
+    collector = OutputCollector()
+    collector.collect(
+        "fs_write",
+        {"path": "/src/lib.rs", "content": "fn main() {}"},
+        "ok",
+        True,
+    )
+    parsed = _make_parsed()
+    artifacts = _build_artifacts(parsed, collector)
+
+    assert artifacts[0].metadata["path"] == "/src/lib.rs"
+    assert artifacts[0].metadata["language"] == "rust"
--- a/services/orchestrator/tests/test_assistant.py
+++ b/services/orchestrator/tests/test_assistant.py
@@ -96,7 +96,8 @@ async def test_simple_assistant_task():
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "Docker" in result.summary
-    assert result.artifacts == ["Uses namespaces"]
+    finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
+    assert "Uses namespaces" in finding_contents
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED


--- a/services/orchestrator/tests/test_coder.py
+++ b/services/orchestrator/tests/test_coder.py
@@ -100,7 +100,8 @@ async def test_simple_coding_task():
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "Fixed" in result.summary
-    assert result.artifacts == ["Fixed null check"]
+    finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
+    assert "Fixed null check" in finding_contents
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED


@@ -458,10 +459,10 @@ async def test_compaction_triggers_and_continues():
            '{"done": true, "summary": "Done after compaction.", "confidence": "VERIFIED"}',
        ],
        exec_output="A" * 600,
-        config=AgentConfig(max_iterations=20, max_tokens=1800),
+        config=AgentConfig(max_iterations=20, max_tokens=2400),
    )
    agent._gateway.inference = AsyncMock(return_value="- Short summary")
-    result = await agent.run(_make_request(max_tokens=1800))
+    result = await agent.run(_make_request(max_tokens=2400))
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "Done after compaction" in result.summary

--- a/services/orchestrator/tests/test_e2e_researcher.py
+++ b/services/orchestrator/tests/test_e2e_researcher.py
@@ -67,7 +67,8 @@ async def test_e2e_web_search_research_task():
        result = await ctx.agent.run(make_request())
        assert result.status == common_pb2.RESULT_STATUS_SUCCESS
        assert "Paris" in result.summary
-        assert result.artifacts == ["Paris"]
+        finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
+        assert "Paris" in finding_contents
        assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
        assert result.source == common_pb2.RESULT_SOURCE_WEB

@@ -306,9 +307,9 @@ async def test_e2e_subagent_result_schema():
        assert isinstance(result.summary, str)
        assert len(result.summary) > 0

-        # Artifacts is a list of strings
+        # Artifacts is a list of Artifact proto messages
        assert len(result.artifacts) > 0
-        assert all(isinstance(a, str) for a in result.artifacts)
+        assert all(hasattr(a, "label") and hasattr(a, "content") for a in result.artifacts)

        # Quality is valid enum
        assert result.result_quality in (
--- a/services/orchestrator/tests/test_integration_agents.py
+++ b/services/orchestrator/tests/test_integration_agents.py
@@ -145,7 +145,7 @@ async def test_researcher_multi_step_web_search():
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
    assert result.source == common_pb2.RESULT_SOURCE_WEB
    assert "Quantum" in result.summary or "quantum" in result.summary
-    assert len(result.artifacts) == 3
+    assert len(result.artifacts) == 5
    assert len(result.new_memory_candidates) == 1
    assert agent._broker.execute_tool.call_count == 2
    assert agent._broker.discover_tools.call_count == 1
@@ -443,7 +443,8 @@ async def test_researcher_produces_valid_subagent_result():
    assert result.summary == "Validated result."
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
    assert result.source == common_pb2.RESULT_SOURCE_WEB
-    assert result.artifacts == ["finding1"]
+    finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
+    assert "finding1" in finding_contents
    assert result.failure_reason == ""
    assert len(result.new_memory_candidates) == 1
    assert result.new_memory_candidates[0].content == "fact1"
--- a/services/orchestrator/tests/test_output_collector.py
+++ b/services/orchestrator/tests/test_output_collector.py
@@ -0,0 +1,207 @@
+"""Tests for the OutputCollector class."""
+
+from __future__ import annotations
+
+from orchestrator.output_collector import OutputCollector
+
+
+def test_collect_fs_write():
+    """fs_write produces a CODE artifact with file content from parameters."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="fs_write",
+        parameters={"path": "/src/main.py", "content": "def hello():\n    pass\n"},
+        output="File written successfully",
+        success=True,
+    )
+    entries = collector.entries
+    assert len(entries) == 1
+    e = entries[0]
+    assert e.tool_name == "fs_write"
+    assert e.artifact_type == "code"
+    assert e.label == "/src/main.py"
+    assert e.content == "def hello():\n    pass\n"
+    assert e.metadata["path"] == "/src/main.py"
+    assert e.metadata["language"] == "python"
+
+
+def test_collect_fs_write_file_path_param():
+    """fs_write also works with 'file_path' parameter key."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="fs_write",
+        parameters={"file_path": "/app.js", "content": "console.log('hi')"},
+        output="ok",
+        success=True,
+    )
+    assert collector.entries[0].label == "/app.js"
+    assert collector.entries[0].metadata["language"] == "javascript"
+
+
+def test_collect_fs_read():
+    """fs_read produces a TEXT artifact with tool output as content."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="fs_read",
+        parameters={"path": "/config.yaml"},
+        output="key: value\nother: 42",
+        success=True,
+    )
+    entries = collector.entries
+    assert len(entries) == 1
+    e = entries[0]
+    assert e.artifact_type == "text"
+    assert e.label == "/config.yaml"
+    assert e.content == "key: value\nother: 42"
+    assert e.metadata["language"] == "yaml"
+
+
+def test_collect_run_code():
+    """run_code produces a COMMAND_OUTPUT artifact."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="run_code",
+        parameters={"language": "python", "code": "print(42)"},
+        output="42\n",
+        success=True,
+    )
+    entries = collector.entries
+    assert len(entries) == 1
+    e = entries[0]
+    assert e.artifact_type == "command_output"
+    assert e.label == "Code execution"
+    assert e.content == "42\n"
+    assert e.metadata["language"] == "python"
+
+
+def test_collect_run_shell():
+    """run_shell produces a COMMAND_OUTPUT artifact with truncated command label."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="run_shell",
+        parameters={"command": "ls -la /tmp"},
+        output="total 4\ndrwxrwxrwt 2 root root 40 Jan  1 00:00 .\n",
+        success=True,
+    )
+    entries = collector.entries
+    assert len(entries) == 1
+    e = entries[0]
+    assert e.artifact_type == "command_output"
+    assert e.label == "ls -la /tmp"
+    assert e.metadata["command"] == "ls -la /tmp"
+
+
+def test_collect_web_search():
+    """web_search produces a SEARCH_RESULT artifact."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="web_search",
+        parameters={"query": "python async tutorial"},
+        output="Result 1: ...\nResult 2: ...",
+        success=True,
+    )
+    entries = collector.entries
+    assert len(entries) == 1
+    e = entries[0]
+    assert e.artifact_type == "search_result"
+    assert e.label == "python async tutorial"
+    assert "Result 1" in e.content
+
+
+def test_skips_failed_tools():
+    """Failed tool executions are not collected."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="fs_write",
+        parameters={"path": "/test.py", "content": "code"},
+        output="Permission denied",
+        success=False,
+    )
+    assert len(collector.entries) == 0
+
+
+def test_skips_memory_tools():
+    """memory_read and memory_write are excluded."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="memory_read",
+        parameters={"query": "something"},
+        output="memory content",
+        success=True,
+    )
+    collector.collect(
+        tool_name="memory_write",
+        parameters={"content": "save this"},
+        output="ok",
+        success=True,
+    )
+    assert len(collector.entries) == 0
+
+
+def test_skips_unknown_tools():
+    """Unknown tool names are ignored."""
+    collector = OutputCollector()
+    collector.collect(
+        tool_name="custom_tool",
+        parameters={"data": "stuff"},
+        output="result",
+        success=True,
+    )
+    assert len(collector.entries) == 0
+
+
+def test_multiple_tools_collected():
+    """Multiple tool outputs are collected in order."""
+    collector = OutputCollector()
+    collector.collect("fs_read", {"path": "/a.py"}, "content_a", True)
+    collector.collect("fs_write", {"path": "/b.py", "content": "code_b"}, "ok", True)
+    collector.collect("run_code", {"language": "python"}, "output_c", True)
+
+    entries = collector.entries
+    assert len(entries) == 3
+    assert entries[0].label == "/a.py"
+    assert entries[1].label == "/b.py"
+    assert entries[2].label == "Code execution"
+
+
+def test_content_truncation():
+    """Oversized content is truncated."""
+    collector = OutputCollector()
+    huge_content = "x" * (300 * 1024)
+    collector.collect(
+        tool_name="fs_read",
+        parameters={"path": "/big.txt"},
+        output=huge_content,
+        success=True,
+    )
+    assert len(collector.entries) == 1
+    assert len(collector.entries[0].content) < len(huge_content)
+    assert collector.entries[0].content.endswith("... [truncated]")
+
+
+def test_entries_returns_copy():
+    """entries property returns a copy, not the internal list."""
+    collector = OutputCollector()
+    collector.collect("fs_read", {"path": "/a.py"}, "content", True)
+    entries1 = collector.entries
+    entries2 = collector.entries
+    assert entries1 == entries2
+    assert entries1 is not entries2
+
+
+def test_language_detection():
+    """Language is correctly guessed from various file extensions."""
+    collector = OutputCollector()
+    test_cases = [
+        ("/app.ts", "typescript"),
+        ("/style.css", "css"),
+        ("/main.go", "go"),
+        ("/lib.rs", "rust"),
+        ("/query.sql", "sql"),
+        ("/unknown.xyz", ""),
+    ]
+    for path, expected_lang in test_cases:
+        collector = OutputCollector()
+        collector.collect("fs_read", {"path": path}, "content", True)
+        actual = collector.entries[0].metadata.get("language", "")
+        assert actual == expected_lang, f"Expected '{expected_lang}' for {path}, got '{actual}'"
--- a/services/orchestrator/tests/test_researcher.py
+++ b/services/orchestrator/tests/test_researcher.py
@@ -93,7 +93,8 @@ async def test_simple_research_task():
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "Paris" in result.summary
-    assert result.artifacts == ["Paris"]
+    finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
+    assert "Paris" in finding_contents
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED


--- a/services/orchestrator/tests/test_service.py
+++ b/services/orchestrator/tests/test_service.py
@@ -419,7 +419,7 @@ def test_build_final_result_all_success():
    r1 = common_pb2.SubagentResult(
        status=common_pb2.RESULT_STATUS_SUCCESS,
        summary="Done A",
-        artifacts=["file1.txt"],
+        artifacts=[common_pb2.Artifact(label="Finding", content="file1.txt", artifact_type=common_pb2.ARTIFACT_TYPE_TEXT)],
    )
    outcomes = [SubtaskOutcome(subtask_id="t-1", status="success", result=r1)]
    report = ConfidenceReport(
@@ -430,7 +430,7 @@ def test_build_final_result_all_success():
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
    assert "Done A" in result.summary
-    assert "file1.txt" in result.artifacts
+    assert any(a.content == "file1.txt" for a in result.artifacts)


 def test_build_final_result_partial():
--- a/services/orchestrator/tests/test_sysadmin.py
+++ b/services/orchestrator/tests/test_sysadmin.py
@@ -99,7 +99,8 @@ async def test_simple_sysadmin_task():
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "nginx" in result.summary
-    assert result.artifacts == ["Config valid"]
+    finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
+    assert "Config valid" in finding_contents
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED


--- a/services/search/src/search_service/extractor.py
+++ b/services/search/src/search_service/extractor.py
@@ -14,7 +14,8 @@ from readability import Document
 logger = logging.getLogger(__name__)

 DEFAULT_USER_AGENT = (
-    "Mozilla/5.0 (compatible; llm-multiverse-search/0.1; +https://localhost)"
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
 )
 DEFAULT_TIMEOUT = 10.0
 DEFAULT_MAX_CONTENT_LENGTH = 8000  # characters