Merge pull request 'feat: structured artifact passthrough for agent results' (#237) from feat/structured-artifact-passthrough into main

This commit was merged in pull request #237.
This commit is contained in:
2026-03-12 23:14:28 +01:00
29 changed files with 966 additions and 198 deletions

View File

@@ -49,7 +49,7 @@ All services communicate via gRPC (Protocol Buffers). Internal services run in D
Ollama runs on the bare metal host (not in Docker) because GPU drivers — particularly ROCm for AMD and CUDA for NVIDIA — require direct hardware access that Docker GPU passthrough complicates.
The internal Docker network (`llm-internal`) is declared `internal: true`, which means containers on this network cannot reach the internet directly. Only Caddy (on the edge network) and SearXNG (which has its own upstream configuration) can make external connections.
The internal Docker network (`llm-internal`) is declared `internal: true`, which means containers on this network cannot reach the internet directly. Caddy (on the edge network), SearXNG, and the Search Service are also attached to `llm-external` so they can reach the internet for web searches and page content extraction.
## 3. Hardware Requirements
@@ -152,7 +152,7 @@ No values are strictly required — all services start with sensible defaults. H
|-------|---------|-------------|
| `decomposer.max_subtasks` | 10 | Maximum subtasks per request |
| `dispatcher.max_concurrent_subtasks` | 5 | Parallel agent limit |
| `dispatcher.subtask_timeout_seconds` | 120 | Per-subtask timeout |
| `dispatcher.subtask_timeout_seconds` | 300 | Per-subtask timeout |
| `dispatcher.overall_timeout_seconds` | 600 | Total dispatch timeout |
| `compaction.threshold_ratio` | 0.6 | Compact at 60% of context window |
| `memory_gating.confidence_threshold` | 0.7 | Minimum confidence to persist memory |

View File

@@ -21,6 +21,7 @@
versions h2c
}
header_up te trailers
}
log {
output stdout

View File

@@ -1,6 +1,7 @@
# Orchestrator configuration for Docker deployment
host: "0.0.0.0"
port: 50058
grpc_web_port: 8080
model_gateway_addr: "model-gateway:50055"
tool_broker_addr: "tool-broker:50057"
memory_addr: "memory:50054"
@@ -9,22 +10,22 @@ audit_addr: "audit:50052"
researcher:
max_iterations: 10
timeout_seconds: 120
timeout_seconds: 300
max_tokens: 4096
coder:
max_iterations: 10
timeout_seconds: 120
timeout_seconds: 300
max_tokens: 4096
assistant:
max_iterations: 10
timeout_seconds: 120
timeout_seconds: 300
max_tokens: 4096
sysadmin:
max_iterations: 10
timeout_seconds: 120
timeout_seconds: 300
max_tokens: 4096
decomposer:
@@ -32,7 +33,7 @@ decomposer:
max_subtasks: 10
dispatcher:
subtask_timeout_seconds: 120.0
subtask_timeout_seconds: 300.0
overall_timeout_seconds: 600.0
max_concurrent_subtasks: 5

View File

@@ -8,39 +8,12 @@
# - Build images first: docker compose -f docker/docker-compose.yml build
#
# Networks:
# llm-edge: Caddy <-> Orchestrator only (edge-facing)
# llm-internal: All inter-service communication (internal: true, no external routing)
#
# The orchestrator exposes a gRPC-Web HTTP gateway on port 8080 for
# direct browser access, eliminating the need for an edge proxy.
services:
# ---------- Edge Proxy ----------
caddy:
image: caddy:2-alpine
container_name: llm-caddy
ports:
- "${HTTPS_PORT:-443}:443"
- "${HTTP_PORT:-80}:80"
volumes:
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
- caddy-data:/data
- caddy-config:/config
environment:
- DOMAIN=${DOMAIN:-localhost}
- TLS_MODE=${TLS_MODE:-internal}
networks:
- llm-edge
- llm-internal
restart: unless-stopped
depends_on:
orchestrator:
condition: service_started
healthcheck:
test: ["CMD", "wget", "--spider", "--quiet", "http://localhost:80/healthz"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
# ---------- Infrastructure ----------
searxng:
@@ -53,6 +26,7 @@ services:
- SEARXNG_SECRET=${SEARXNG_SECRET:-dev-secret-change-in-production}
networks:
- llm-internal
- llm-external
restart: unless-stopped
cap_drop:
- ALL
@@ -145,7 +119,7 @@ services:
target: /etc/llm-multiverse/model-gateway.toml
networks:
- llm-internal
- llm-edge # Needs external access to reach Ollama on host
- llm-external # Needs external access to reach Ollama on host
restart: unless-stopped
depends_on:
audit:
@@ -186,6 +160,7 @@ services:
SERVICE_DIR: tool-broker
SERVICE_PACKAGE: tool-broker
SERVICE_PORT: "50057"
EXTRA_RUNTIME_DEPS: "python3"
container_name: llm-tool-broker
volumes:
- ./manifests:/etc/llm-multiverse/manifests:ro
@@ -223,6 +198,7 @@ services:
target: /etc/llm-multiverse/search.yaml
networks:
- llm-internal
- llm-external
restart: unless-stopped
depends_on:
searxng:
@@ -237,14 +213,16 @@ services:
context: ..
dockerfile: docker/python/orchestrator.Dockerfile
container_name: llm-orchestrator
ports:
- "${GRPC_WEB_PORT:-8080}:8080"
environment:
- ORCHESTRATOR_CONFIG=/etc/llm-multiverse/orchestrator.yaml
configs:
- source: orchestrator-config
target: /etc/llm-multiverse/orchestrator.yaml
networks:
- llm-edge
- llm-internal
- llm-external
restart: unless-stopped
depends_on:
model-gateway:
@@ -283,15 +261,11 @@ volumes:
driver: local
memory-data:
driver: local
caddy-data:
driver: local
caddy-config:
driver: local
# ---------- Networks ----------
networks:
llm-edge:
llm-external:
driver: bridge
llm-internal:
driver: bridge

View File

@@ -2,7 +2,7 @@
agent_type = "coder"
agent_type_id = 3
allowed_tools = ["fs_read", "fs_write", "run_code", "web_search", "memory_read"]
path_allowlist = ["/home/**", "/tmp/**", "/workspace/**"]
path_allowlist = ["/home/**", "/tmp", "/tmp/**", "/workspace", "/workspace/**"]
can_spawn = []
max_spawn_depth = 0

View File

@@ -80,6 +80,9 @@ RUN groupadd -r app && useradd -r -g app -d /nonexistent -s /usr/sbin/nologin ap
# Pre-create /data owned by app for services that mount volumes there
RUN mkdir -p /data && chown app:app /data
# Pre-create /workspace owned by app for tool execution (code writing, run_code working_dir)
RUN mkdir -p /workspace/output && chown -R app:app /workspace
COPY --from=builder /app/target/release/${SERVICE_PACKAGE} /usr/local/bin/service
USER app

View File

@@ -2,7 +2,7 @@
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: llm_multiverse/v1/common.proto
# Protobuf Python Version: 7.34.0
# Protobuf Python Version: 6.31.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
@@ -11,9 +11,9 @@ from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC,
7,
34,
0,
6,
31,
1,
'',
'llm_multiverse/v1/common.proto'
)
@@ -25,40 +25,47 @@ _sym_db = _symbol_database.Default()
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1ellm_multiverse/v1/common.proto\x12\x11llm_multiverse.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\x8a\x01\n\x0f\x41gentIdentifier\x12\x19\n\x08\x61gent_id\x18\x01 \x01(\tR\x07\x61gentId\x12;\n\nagent_type\x18\x02 \x01(\x0e\x32\x1c.llm_multiverse.v1.AgentTypeR\tagentType\x12\x1f\n\x0bspawn_depth\x18\x03 \x01(\rR\nspawnDepth\"J\n\x0c\x41gentLineage\x12:\n\x06\x61gents\x18\x01 \x03(\x0b\x32\".llm_multiverse.v1.AgentIdentifierR\x06\x61gents\"\x92\x02\n\x0eSessionContext\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x17\n\x07user_id\x18\x02 \x01(\tR\x06userId\x12\x44\n\ragent_lineage\x18\x03 \x01(\x0b\x32\x1f.llm_multiverse.v1.AgentLineageR\x0c\x61gentLineage\x12G\n\x0eoverride_level\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.OverrideLevelR\roverrideLevel\x12\x39\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\"\xc2\x01\n\x0b\x45rrorDetail\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12H\n\x08metadata\x18\x03 \x03(\x0b\x32,.llm_multiverse.v1.ErrorDetail.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x84\x01\n\x0fMemoryCandidate\x12\x18\n\x07\x63ontent\x18\x01 \x01(\tR\x07\x63ontent\x12\x37\n\x06source\x18\x02 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSourceR\x06source\x12\x1e\n\nconfidence\x18\x03 \x01(\x02R\nconfidence\"\x9a\x03\n\x0eSubagentResult\x12\x37\n\x06status\x18\x01 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultStatusR\x06status\x12\x18\n\x07summary\x18\x02 \x01(\tR\x07summary\x12\x1c\n\tartifacts\x18\x03 \x03(\tR\tartifacts\x12G\n\x0eresult_quality\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.ResultQualityR\rresultQuality\x12\x37\n\x06source\x18\x05 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSourceR\x06source\x12V\n\x15new_memory_candidates\x18\x06 \x03(\x0b\x32\".llm_multiverse.v1.MemoryCandidateR\x13newMemoryCandidates\x12*\n\x0e\x66\x61ilure_reason\x18\x07 \x01(\tH\x00R\rfailureReason\x88\x01\x01\x42\x11\n\x0f_failure_reason*\xa8\x01\n\tAgentType\x12\x1a\n\x16\x41GENT_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x41GENT_TYPE_ORCHESTRATOR\x10\x01\x12\x19\n\x15\x41GENT_TYPE_RESEARCHER\x10\x02\x12\x14\n\x10\x41GENT_TYPE_CODER\x10\x03\x12\x17\n\x13\x41GENT_TYPE_SYSADMIN\x10\x04\x12\x18\n\x14\x41GENT_TYPE_ASSISTANT\x10\x05*\xf5\x01\n\x08ToolType\x12\x19\n\x15TOOL_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15TOOL_TYPE_MEMORY_READ\x10\x01\x12\x1a\n\x16TOOL_TYPE_MEMORY_WRITE\x10\x02\x12\x18\n\x14TOOL_TYPE_WEB_SEARCH\x10\x03\x12\x15\n\x11TOOL_TYPE_FS_READ\x10\x04\x12\x16\n\x12TOOL_TYPE_FS_WRITE\x10\x05\x12\x16\n\x12TOOL_TYPE_RUN_CODE\x10\x06\x12\x17\n\x13TOOL_TYPE_RUN_SHELL\x10\x07\x12\x1d\n\x19TOOL_TYPE_PACKAGE_INSTALL\x10\x08*z\n\rOverrideLevel\x12\x1e\n\x1aOVERRIDE_LEVEL_UNSPECIFIED\x10\x00\x12\x17\n\x13OVERRIDE_LEVEL_NONE\x10\x01\x12\x18\n\x14OVERRIDE_LEVEL_RELAX\x10\x02\x12\x16\n\x12OVERRIDE_LEVEL_ALL\x10\x03*}\n\x0cResultStatus\x12\x1d\n\x19RESULT_STATUS_UNSPECIFIED\x10\x00\x12\x19\n\x15RESULT_STATUS_SUCCESS\x10\x01\x12\x19\n\x15RESULT_STATUS_PARTIAL\x10\x02\x12\x18\n\x14RESULT_STATUS_FAILED\x10\x03*\x87\x01\n\rResultQuality\x12\x1e\n\x1aRESULT_QUALITY_UNSPECIFIED\x10\x00\x12\x1b\n\x17RESULT_QUALITY_VERIFIED\x10\x01\x12\x1b\n\x17RESULT_QUALITY_INFERRED\x10\x02\x12\x1c\n\x18RESULT_QUALITY_UNCERTAIN\x10\x03*\x86\x01\n\x0cResultSource\x12\x1d\n\x19RESULT_SOURCE_UNSPECIFIED\x10\x00\x12\x1d\n\x19RESULT_SOURCE_TOOL_OUTPUT\x10\x01\x12!\n\x1dRESULT_SOURCE_MODEL_KNOWLEDGE\x10\x02\x12\x15\n\x11RESULT_SOURCE_WEB\x10\x03\x42\x85\x01\n\x15\x63om.llm_multiverse.v1B\x0b\x43ommonProtoP\x01\xa2\x02\x03LXX\xaa\x02\x10LlmMultiverse.V1\xca\x02\x10LlmMultiverse\\V1\xe2\x02\x1cLlmMultiverse\\V1\\GPBMetadata\xea\x02\x11LlmMultiverse::V1b\x06proto3')
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1ellm_multiverse/v1/common.proto\x12\x11llm_multiverse.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\xd0\x01\n\x08\x41rtifact\x12\r\n\x05label\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x36\n\rartifact_type\x18\x03 \x01(\x0e\x32\x1f.llm_multiverse.v1.ArtifactType\x12;\n\x08metadata\x18\x04 \x03(\x0b\x32).llm_multiverse.v1.Artifact.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"j\n\x0f\x41gentIdentifier\x12\x10\n\x08\x61gent_id\x18\x01 \x01(\t\x12\x30\n\nagent_type\x18\x02 \x01(\x0e\x32\x1c.llm_multiverse.v1.AgentType\x12\x13\n\x0bspawn_depth\x18\x03 \x01(\r\"B\n\x0c\x41gentLineage\x12\x32\n\x06\x61gents\x18\x01 \x03(\x0b\x32\".llm_multiverse.v1.AgentIdentifier\"\xd7\x01\n\x0eSessionContext\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12\x0f\n\x07user_id\x18\x02 \x01(\t\x12\x36\n\ragent_lineage\x18\x03 \x01(\x0b\x32\x1f.llm_multiverse.v1.AgentLineage\x12\x38\n\x0eoverride_level\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.OverrideLevel\x12.\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x9d\x01\n\x0b\x45rrorDetail\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x0f\n\x07message\x18\x02 \x01(\t\x12>\n\x08metadata\x18\x03 \x03(\x0b\x32,.llm_multiverse.v1.ErrorDetail.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"g\n\x0fMemoryCandidate\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12/\n\x06source\x18\x02 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSource\x12\x12\n\nconfidence\x18\x03 \x01(\x02\"\xe0\x02\n\x0eSubagentResult\x12/\n\x06status\x18\x01 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultStatus\x12\x0f\n\x07summary\x18\x02 \x01(\t\x12.\n\tartifacts\x18\x03 \x03(\x0b\x32\x1b.llm_multiverse.v1.Artifact\x12\x38\n\x0eresult_quality\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.ResultQuality\x12/\n\x06source\x18\x05 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSource\x12\x41\n\x15new_memory_candidates\x18\x06 \x03(\x0b\x32\".llm_multiverse.v1.MemoryCandidate\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x11\n\x0f_failure_reason*\xa8\x01\n\tAgentType\x12\x1a\n\x16\x41GENT_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x41GENT_TYPE_ORCHESTRATOR\x10\x01\x12\x19\n\x15\x41GENT_TYPE_RESEARCHER\x10\x02\x12\x14\n\x10\x41GENT_TYPE_CODER\x10\x03\x12\x17\n\x13\x41GENT_TYPE_SYSADMIN\x10\x04\x12\x18\n\x14\x41GENT_TYPE_ASSISTANT\x10\x05*\xf5\x01\n\x08ToolType\x12\x19\n\x15TOOL_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15TOOL_TYPE_MEMORY_READ\x10\x01\x12\x1a\n\x16TOOL_TYPE_MEMORY_WRITE\x10\x02\x12\x18\n\x14TOOL_TYPE_WEB_SEARCH\x10\x03\x12\x15\n\x11TOOL_TYPE_FS_READ\x10\x04\x12\x16\n\x12TOOL_TYPE_FS_WRITE\x10\x05\x12\x16\n\x12TOOL_TYPE_RUN_CODE\x10\x06\x12\x17\n\x13TOOL_TYPE_RUN_SHELL\x10\x07\x12\x1d\n\x19TOOL_TYPE_PACKAGE_INSTALL\x10\x08*z\n\rOverrideLevel\x12\x1e\n\x1aOVERRIDE_LEVEL_UNSPECIFIED\x10\x00\x12\x17\n\x13OVERRIDE_LEVEL_NONE\x10\x01\x12\x18\n\x14OVERRIDE_LEVEL_RELAX\x10\x02\x12\x16\n\x12OVERRIDE_LEVEL_ALL\x10\x03*}\n\x0cResultStatus\x12\x1d\n\x19RESULT_STATUS_UNSPECIFIED\x10\x00\x12\x19\n\x15RESULT_STATUS_SUCCESS\x10\x01\x12\x19\n\x15RESULT_STATUS_PARTIAL\x10\x02\x12\x18\n\x14RESULT_STATUS_FAILED\x10\x03*\x87\x01\n\rResultQuality\x12\x1e\n\x1aRESULT_QUALITY_UNSPECIFIED\x10\x00\x12\x1b\n\x17RESULT_QUALITY_VERIFIED\x10\x01\x12\x1b\n\x17RESULT_QUALITY_INFERRED\x10\x02\x12\x1c\n\x18RESULT_QUALITY_UNCERTAIN\x10\x03*\x86\x01\n\x0cResultSource\x12\x1d\n\x19RESULT_SOURCE_UNSPECIFIED\x10\x00\x12\x1d\n\x19RESULT_SOURCE_TOOL_OUTPUT\x10\x01\x12!\n\x1dRESULT_SOURCE_MODEL_KNOWLEDGE\x10\x02\x12\x15\n\x11RESULT_SOURCE_WEB\x10\x03*\xa0\x01\n\x0c\x41rtifactType\x12\x1d\n\x19\x41RTIFACT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x41RTIFACT_TYPE_CODE\x10\x01\x12\x16\n\x12\x41RTIFACT_TYPE_TEXT\x10\x02\x12 \n\x1c\x41RTIFACT_TYPE_COMMAND_OUTPUT\x10\x03\x12\x1f\n\x1b\x41RTIFACT_TYPE_SEARCH_RESULT\x10\x04\x62\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'llm_multiverse.v1.common_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
_globals['DESCRIPTOR']._loaded_options = None
_globals['DESCRIPTOR']._serialized_options = b'\n\025com.llm_multiverse.v1B\013CommonProtoP\001\242\002\003LXX\252\002\020LlmMultiverse.V1\312\002\020LlmMultiverse\\V1\342\002\034LlmMultiverse\\V1\\GPBMetadata\352\002\021LlmMultiverse::V1'
DESCRIPTOR._loaded_options = None
_globals['_ARTIFACT_METADATAENTRY']._loaded_options = None
_globals['_ARTIFACT_METADATAENTRY']._serialized_options = b'8\001'
_globals['_ERRORDETAIL_METADATAENTRY']._loaded_options = None
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_options = b'8\001'
_globals['_AGENTTYPE']._serialized_start=1326
_globals['_AGENTTYPE']._serialized_end=1494
_globals['_TOOLTYPE']._serialized_start=1497
_globals['_TOOLTYPE']._serialized_end=1742
_globals['_OVERRIDELEVEL']._serialized_start=1744
_globals['_OVERRIDELEVEL']._serialized_end=1866
_globals['_RESULTSTATUS']._serialized_start=1868
_globals['_RESULTSTATUS']._serialized_end=1993
_globals['_RESULTQUALITY']._serialized_start=1996
_globals['_RESULTQUALITY']._serialized_end=2131
_globals['_RESULTSOURCE']._serialized_start=2134
_globals['_RESULTSOURCE']._serialized_end=2268
_globals['_AGENTIDENTIFIER']._serialized_start=87
_globals['_AGENTIDENTIFIER']._serialized_end=225
_globals['_AGENTLINEAGE']._serialized_start=227
_globals['_AGENTLINEAGE']._serialized_end=301
_globals['_SESSIONCONTEXT']._serialized_start=304
_globals['_SESSIONCONTEXT']._serialized_end=578
_globals['_ERRORDETAIL']._serialized_start=581
_globals['_ERRORDETAIL']._serialized_end=775
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_start=716
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_end=775
_globals['_MEMORYCANDIDATE']._serialized_start=778
_globals['_MEMORYCANDIDATE']._serialized_end=910
_globals['_SUBAGENTRESULT']._serialized_start=913
_globals['_SUBAGENTRESULT']._serialized_end=1323
_globals['_AGENTTYPE']._serialized_start=1312
_globals['_AGENTTYPE']._serialized_end=1480
_globals['_TOOLTYPE']._serialized_start=1483
_globals['_TOOLTYPE']._serialized_end=1728
_globals['_OVERRIDELEVEL']._serialized_start=1730
_globals['_OVERRIDELEVEL']._serialized_end=1852
_globals['_RESULTSTATUS']._serialized_start=1854
_globals['_RESULTSTATUS']._serialized_end=1979
_globals['_RESULTQUALITY']._serialized_start=1982
_globals['_RESULTQUALITY']._serialized_end=2117
_globals['_RESULTSOURCE']._serialized_start=2120
_globals['_RESULTSOURCE']._serialized_end=2254
_globals['_ARTIFACTTYPE']._serialized_start=2257
_globals['_ARTIFACTTYPE']._serialized_end=2417
_globals['_ARTIFACT']._serialized_start=87
_globals['_ARTIFACT']._serialized_end=295
_globals['_ARTIFACT_METADATAENTRY']._serialized_start=248
_globals['_ARTIFACT_METADATAENTRY']._serialized_end=295
_globals['_AGENTIDENTIFIER']._serialized_start=297
_globals['_AGENTIDENTIFIER']._serialized_end=403
_globals['_AGENTLINEAGE']._serialized_start=405
_globals['_AGENTLINEAGE']._serialized_end=471
_globals['_SESSIONCONTEXT']._serialized_start=474
_globals['_SESSIONCONTEXT']._serialized_end=689
_globals['_ERRORDETAIL']._serialized_start=692
_globals['_ERRORDETAIL']._serialized_end=849
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_start=248
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_end=295
_globals['_MEMORYCANDIDATE']._serialized_start=851
_globals['_MEMORYCANDIDATE']._serialized_end=954
_globals['_SUBAGENTRESULT']._serialized_start=957
_globals['_SUBAGENTRESULT']._serialized_end=1309
# @@protoc_insertion_point(module_scope)

View File

@@ -1,4 +1,24 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings
GRPC_GENERATED_VERSION = '1.78.0'
GRPC_VERSION = grpc.__version__
_version_not_supported = False
try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True
if _version_not_supported:
raise RuntimeError(
f'The grpc package installed is at version {GRPC_VERSION},'
+ ' but the generated code in llm_multiverse/v1/common_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
)

View File

@@ -64,8 +64,25 @@ enum ResultSource {
RESULT_SOURCE_WEB = 3;
}
// Type of artifact produced by an agent during its tool call loop.
enum ArtifactType {
ARTIFACT_TYPE_UNSPECIFIED = 0;
ARTIFACT_TYPE_CODE = 1; // Code written via fs_write
ARTIFACT_TYPE_TEXT = 2; // Plain text / file content from fs_read
ARTIFACT_TYPE_COMMAND_OUTPUT = 3; // Output from run_code / run_shell
ARTIFACT_TYPE_SEARCH_RESULT = 4; // Web search results
}
// --- Messages ---
// A concrete output produced by an agent (code, command output, etc.).
message Artifact {
string label = 1; // Display name (filename, query, etc.)
string content = 2; // Full content
ArtifactType artifact_type = 3;
map<string, string> metadata = 4; // language, path, tool_name, exit_code, etc.
}
// Identifies a single agent in the lineage chain.
message AgentIdentifier {
string agent_id = 1;
@@ -107,7 +124,8 @@ message SubagentResult {
ResultStatus status = 1;
// 3-sentence max summary of work performed.
string summary = 2;
repeated string artifacts = 3;
// Structured artifacts produced during the agent loop.
repeated Artifact artifacts = 3;
ResultQuality result_quality = 4;
ResultSource source = 5;
repeated MemoryCandidate new_memory_candidates = 6;

View File

@@ -11,6 +11,7 @@ dependencies = [
"orjson>=3.10",
"uvloop>=0.21",
"msgspec>=0.19",
"aiohttp>=3.10",
]
[project.scripts]

View File

@@ -11,6 +11,7 @@ from llm_multiverse.v1 import audit_pb2_grpc, orchestrator_pb2_grpc
from .clients import MemoryClient, ModelGatewayClient, ToolBrokerClient
from .config import Config
from .grpc_web_gateway import start_gateway
from .service import OrchestratorServiceImpl
logger = logging.getLogger("orchestrator")
@@ -50,6 +51,12 @@ async def serve(config: Config) -> None:
await server.start()
# Start gRPC-Web HTTP gateway if configured.
gateway_runner = None
if config.grpc_web_port:
grpc_addr = f"localhost:{config.port}"
gateway_runner = await start_gateway(grpc_addr, config.grpc_web_port)
# Graceful shutdown on SIGINT/SIGTERM
loop = asyncio.get_running_loop()
shutdown_event = asyncio.Event()
@@ -63,6 +70,8 @@ async def serve(config: Config) -> None:
await shutdown_event.wait()
logger.info("Shutting down gracefully...")
if gateway_runner:
await gateway_runner.cleanup()
await server.stop(grace=5)
logger.info("Orchestrator shut down")

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
from llm_multiverse.v1 import common_pb2
from .output_collector import OutputCollector
from .parser import DoneSignalParsed
# Confidence string → proto quality enum.
@@ -13,6 +14,14 @@ CONFIDENCE_MAP: dict[str, int] = {
"UNCERTAIN": common_pb2.RESULT_QUALITY_UNCERTAIN,
}
# CollectedOutput.artifact_type → proto ArtifactType enum.
_ARTIFACT_TYPE_MAP: dict[str, int] = {
"code": common_pb2.ARTIFACT_TYPE_CODE,
"text": common_pb2.ARTIFACT_TYPE_TEXT,
"command_output": common_pb2.ARTIFACT_TYPE_COMMAND_OUTPUT,
"search_result": common_pb2.ARTIFACT_TYPE_SEARCH_RESULT,
}
def determine_source(used_tool_output: bool, used_web: bool) -> int:
"""Determine the result source based on tool usage."""
@@ -27,6 +36,7 @@ def build_success_result(
parsed: DoneSignalParsed,
used_tool_output: bool,
used_web: bool,
collector: OutputCollector | None = None,
) -> common_pb2.SubagentResult:
"""Build a success result from a parsed done signal."""
quality = CONFIDENCE_MAP.get(parsed.confidence, common_pb2.RESULT_QUALITY_UNCERTAIN)
@@ -46,10 +56,12 @@ def build_success_result(
for mc in parsed.memory_candidates
]
artifacts = _build_artifacts(parsed, collector)
return common_pb2.SubagentResult(
status=status,
summary=parsed.summary,
artifacts=parsed.findings,
artifacts=artifacts,
result_quality=quality,
source=source,
new_memory_candidates=candidates,
@@ -61,12 +73,15 @@ def build_partial_result(
summary_prefix: str,
used_tool_output: bool,
used_web: bool,
collector: OutputCollector | None = None,
) -> common_pb2.SubagentResult:
"""Build a partial result with agent-specific summary prefix."""
source = determine_source(used_tool_output, used_web)
artifacts = _build_artifacts(None, collector) if collector else []
return common_pb2.SubagentResult(
status=common_pb2.RESULT_STATUS_PARTIAL,
summary=f"{summary_prefix} incomplete: {reason}",
artifacts=artifacts,
result_quality=common_pb2.RESULT_QUALITY_UNCERTAIN,
source=source,
)
@@ -83,6 +98,40 @@ def build_failed_result(reason: str, summary_prefix: str) -> common_pb2.Subagent
)
def _build_artifacts(
parsed: DoneSignalParsed | None,
collector: OutputCollector | None,
) -> list[common_pb2.Artifact]:
"""Convert collected outputs and findings into proto Artifact messages."""
artifacts: list[common_pb2.Artifact] = []
if collector:
for entry in collector.entries:
artifacts.append(
common_pb2.Artifact(
label=entry.label,
content=entry.content,
artifact_type=_ARTIFACT_TYPE_MAP.get(
entry.artifact_type, common_pb2.ARTIFACT_TYPE_UNSPECIFIED
),
metadata=entry.metadata,
)
)
# Always include findings as text artifacts (model-curated summaries).
if parsed and parsed.findings:
for finding in parsed.findings:
artifacts.append(
common_pb2.Artifact(
label="Finding",
content=finding,
artifact_type=common_pb2.ARTIFACT_TYPE_TEXT,
)
)
return artifacts
def format_memory(mem_response) -> str:
"""Format a QueryMemoryResponse into a context string."""
entry = mem_response.entry

View File

@@ -17,6 +17,7 @@ from .agent_utils import (
)
from .clients import MemoryClient, ModelGatewayClient, ToolBrokerClient
from .config import AgentConfig
from .output_collector import OutputCollector
from .parser import DoneSignalParsed, ParseError, ToolCallParsed, parse_agent_response
from .prompt import PromptBuilder
@@ -84,8 +85,12 @@ class BaseAgent:
session_ctx, self.agent_type, request.task
)
except grpc.aio.AioRpcError as e:
self._logger.warning(
"Tool discovery failed: code=%s, details=%s", e.code(), e.details()
)
return build_failed_result(
f"Tool discovery failed: {e.code()}", self.summary_prefix
f"Tool discovery failed: {e.code()}, details={e.details()}",
self.summary_prefix,
)
if not tools:
@@ -120,126 +125,162 @@ class BaseAgent:
prompt_builder.set_memory_context(memory_context)
# Step 4: Agent loop
collector = OutputCollector()
iteration = 0
consecutive_failures = 0
used_tool_output = False
used_web_search = False
start_time = asyncio.get_event_loop().time()
while True:
# Termination checks
if iteration >= self._config.max_iterations:
return build_partial_result(
"Max iterations reached",
self.summary_prefix,
used_tool_output,
used_web_search,
)
elapsed = asyncio.get_event_loop().time() - start_time
if elapsed >= self._config.timeout_seconds:
return build_partial_result(
"Timeout",
self.summary_prefix,
used_tool_output,
used_web_search,
)
if prompt_builder.needs_compaction():
compacted = await prompt_builder.compact(self._gateway, session_ctx)
if not compacted:
try:
while True:
# Termination checks
if iteration >= self._config.max_iterations:
return build_partial_result(
"Context overflow after compaction",
"Max iterations reached",
self.summary_prefix,
used_tool_output,
used_web_search,
collector,
)
self._logger.info(
"Context compacted for agent %s (iteration %d)",
agent_id,
iteration,
)
if consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
return build_failed_result(
f"All tools failed after {_MAX_CONSECUTIVE_FAILURES} consecutive failures",
self.summary_prefix,
)
elapsed = asyncio.get_event_loop().time() - start_time
if elapsed >= self._config.timeout_seconds:
return build_partial_result(
"Timeout",
self.summary_prefix,
used_tool_output,
used_web_search,
collector,
)
# Build prompt and call model
prompt = prompt_builder.build()
try:
response_text = await self._gateway.stream_inference(
session_ctx, prompt, max_tokens=max_tokens
)
except grpc.aio.AioRpcError as e:
return build_failed_result(
f"Model gateway error: {e.code()}", self.summary_prefix
)
if prompt_builder.needs_compaction():
compacted = await prompt_builder.compact(
self._gateway, session_ctx
)
if not compacted:
return build_partial_result(
"Context overflow after compaction",
self.summary_prefix,
used_tool_output,
used_web_search,
collector,
)
self._logger.info(
"Context compacted for agent %s (iteration %d)",
agent_id,
iteration,
)
# Parse model output
try:
parsed = parse_agent_response(response_text)
except ParseError as e:
prompt_builder.add_reasoning(
f"[Parse error: {e}. Please output valid JSON.]"
)
iteration += 1
continue
if consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
return build_failed_result(
f"All tools failed after {_MAX_CONSECUTIVE_FAILURES} consecutive failures",
self.summary_prefix,
)
if parsed is None:
prompt_builder.add_reasoning(response_text)
iteration += 1
continue
# Build prompt and call model
prompt = prompt_builder.build()
try:
response_text = await self._gateway.stream_inference(
session_ctx, prompt, max_tokens=max_tokens
)
except grpc.aio.AioRpcError as e:
self._logger.warning(
"Model gateway error: code=%s, details=%s, iteration=%d",
e.code(),
e.details(),
iteration,
)
return build_failed_result(
f"Model gateway error: {e.code()}, details={e.details()}",
self.summary_prefix,
)
if isinstance(parsed, DoneSignalParsed):
return build_success_result(parsed, used_tool_output, used_web_search)
# Parse model output
try:
parsed = parse_agent_response(response_text)
except ParseError as e:
prompt_builder.add_reasoning(
f"[Parse error: {e}. Please output valid JSON.]"
)
iteration += 1
continue
if parsed is None:
prompt_builder.add_reasoning(response_text)
iteration += 1
continue
if isinstance(parsed, DoneSignalParsed):
return build_success_result(
parsed, used_tool_output, used_web_search, collector
)
if isinstance(parsed, ToolCallParsed):
if parsed.tool not in tool_names:
prompt_builder.add_tool_result(
parsed.tool,
f"Error: tool '{parsed.tool}' is not available. "
f"Available tools: {', '.join(sorted(tool_names))}",
False,
agent_id,
session_ctx.session_id,
)
iteration += 1
consecutive_failures += 1
continue
prompt_builder.add_tool_call(parsed.tool, parsed.parameters)
try:
output, success = await self._broker.execute_tool(
session_ctx,
self.agent_type,
parsed.tool,
parsed.parameters,
)
except grpc.aio.AioRpcError as e:
self._logger.warning(
"Tool execution error: tool=%s, code=%s, details=%s",
parsed.tool,
e.code(),
e.details(),
)
output = (
f"Tool execution error: {e.code()}, details={e.details()}"
)
success = False
if isinstance(parsed, ToolCallParsed):
if parsed.tool not in tool_names:
prompt_builder.add_tool_result(
parsed.tool,
f"Error: tool '{parsed.tool}' is not available. "
f"Available tools: {', '.join(sorted(tool_names))}",
False,
output,
success,
agent_id,
session_ctx.session_id,
)
iteration += 1
consecutive_failures += 1
continue
prompt_builder.add_tool_call(parsed.tool, parsed.parameters)
try:
output, success = await self._broker.execute_tool(
session_ctx,
self.agent_type,
parsed.tool,
parsed.parameters,
collector.collect(
parsed.tool, parsed.parameters, output, success
)
except grpc.aio.AioRpcError as e:
output = f"Tool execution error: {e.code()}"
success = False
prompt_builder.add_tool_result(
parsed.tool,
output,
success,
agent_id,
session_ctx.session_id,
)
if success:
if parsed.tool in self.tool_output_sources:
used_tool_output = True
if parsed.tool == "web_search":
used_web_search = True
consecutive_failures = 0
else:
consecutive_failures += 1
if success:
if parsed.tool in self.tool_output_sources:
used_tool_output = True
if parsed.tool == "web_search":
used_web_search = True
consecutive_failures = 0
else:
consecutive_failures += 1
iteration += 1
iteration += 1
return build_failed_result(
"Unexpected loop exit", self.summary_prefix
) # pragma: no cover
return build_failed_result(
"Unexpected loop exit", self.summary_prefix
) # pragma: no cover
finally:
elapsed = asyncio.get_event_loop().time() - start_time
self._logger.info(
"Agent run finished: agent_id=%s, iterations=%d, elapsed=%.2fs",
agent_id,
iteration,
elapsed,
)

View File

@@ -79,6 +79,7 @@ class Config(msgspec.Struct):
host: str = "[::]"
port: int = 50058
grpc_web_port: int | None = None
model_gateway_addr: str = "model-gateway:50055"
tool_broker_addr: str = "tool-broker:50057"
memory_addr: str = "memory-service:50054"

View File

@@ -93,6 +93,7 @@ class TaskDecomposer:
Returns a list of SubtaskDefinition protos. Falls back to a single-task
plan if decomposition fails for any reason.
"""
response_text = ""
try:
prompt = _build_prompt(user_message)
response_text = await self._gateway.inference(
@@ -106,10 +107,18 @@ class TaskDecomposer:
_validate_decomposition(subtasks, self._config.max_subtasks)
return _to_proto_subtasks(subtasks)
except grpc.aio.AioRpcError as e:
logger.warning("Gateway error during decomposition: %s", e.code())
logger.warning(
"Gateway error during decomposition: code=%s, details=%s",
e.code(),
e.details(),
)
return _build_fallback_plan(user_message)
except (DecompositionError, orjson.JSONDecodeError) as e:
logger.warning("Decomposition failed: %s", e)
logger.warning(
"Decomposition parsing failed: %s, response_preview=%.200s",
e,
response_text[:200],
)
return _build_fallback_plan(user_message)

View File

@@ -0,0 +1,121 @@
"""Minimal gRPC-Web HTTP gateway.
Translates gRPC-Web (HTTP/1.1) requests into native gRPC calls to the
local orchestrator server and streams back gRPC-Web framed responses.
This eliminates the need for an external proxy (Caddy/Envoy) to handle
the gRPC-Web protocol translation.
"""
from __future__ import annotations
import logging
import struct
import grpc
from aiohttp import web
from llm_multiverse.v1 import orchestrator_pb2_grpc
logger = logging.getLogger("orchestrator.grpc_web")
def _encode_frame(payload: bytes, *, trailer: bool = False) -> bytes:
"""Encode a gRPC-Web frame: [flags:1][length:4][payload:N]."""
flags = 0x80 if trailer else 0x00
return struct.pack(">BI", flags, len(payload)) + payload
def _decode_frame(data: bytes) -> bytes:
"""Decode a single gRPC-Web data frame and return the protobuf payload."""
if len(data) < 5:
raise ValueError("gRPC-Web frame too short")
_flags, length = struct.unpack(">BI", data[:5])
if len(data) < 5 + length:
raise ValueError("gRPC-Web frame payload truncated")
return data[5 : 5 + length]
def _status_code_number(code: grpc.StatusCode) -> int:
"""Extract the numeric gRPC status code."""
return code.value[0]
async def _handle_grpc_web(request: web.Request) -> web.StreamResponse:
"""Proxy a gRPC-Web request to the local gRPC server."""
channel: grpc.aio.Channel = request.app["grpc_channel"]
stub = orchestrator_pb2_grpc.OrchestratorServiceStub(channel)
body = await request.read()
proto_bytes = _decode_frame(body)
from llm_multiverse.v1 import orchestrator_pb2
req = orchestrator_pb2.ProcessRequestRequest()
req.ParseFromString(proto_bytes)
response = web.StreamResponse(
status=200,
headers={
"Content-Type": "application/grpc-web+proto",
"Access-Control-Allow-Origin": "*",
},
)
await response.prepare(request)
grpc_status = 0
grpc_message = ""
try:
async for resp in stub.ProcessRequest(req):
frame = _encode_frame(resp.SerializeToString())
await response.write(frame)
except grpc.aio.AioRpcError as e:
grpc_status = _status_code_number(e.code())
grpc_message = e.details() or ""
logger.warning(
"gRPC-Web proxy error: code=%s, details=%s", e.code(), e.details()
)
trailer_text = f"grpc-status: {grpc_status}\r\n"
if grpc_message:
trailer_text += f"grpc-message: {grpc_message}\r\n"
await response.write(_encode_frame(trailer_text.encode(), trailer=True))
await response.write_eof()
return response
async def _handle_cors(request: web.Request) -> web.Response:
"""Handle CORS preflight for gRPC-Web requests."""
return web.Response(
headers={
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": (
"Content-Type, X-Grpc-Web, X-User-Agent"
),
"Access-Control-Max-Age": "86400",
}
)
async def start_gateway(grpc_addr: str, port: int) -> web.AppRunner:
"""Start the gRPC-Web HTTP gateway on the given port.
Connects to the native gRPC server at *grpc_addr* and exposes an
HTTP/1.1 endpoint that speaks gRPC-Web.
"""
app = web.Application()
app["grpc_channel"] = grpc.aio.insecure_channel(grpc_addr)
app.router.add_post(
"/llm_multiverse.v1.OrchestratorService/{method}", _handle_grpc_web
)
app.router.add_options(
"/llm_multiverse.v1.OrchestratorService/{method}", _handle_cors
)
runner = web.AppRunner(app, access_log=None)
await runner.setup()
site = web.TCPSite(runner, "0.0.0.0", port)
await site.start()
logger.info("gRPC-Web gateway listening on 0.0.0.0:%d -> %s", port, grpc_addr)
return runner

View File

@@ -0,0 +1,144 @@
"""Lightweight collector for concrete tool outputs during the agent loop."""
from __future__ import annotations
from dataclasses import dataclass, field
# Maximum content length to store per artifact (256 KB).
_MAX_CONTENT_LEN = 256 * 1024
# Tools whose outputs are context, not user-facing artifacts.
_SKIP_TOOLS = frozenset({"memory_read", "memory_write"})
# Tool name → artifact type string (matches ArtifactType enum names).
_TOOL_TYPE_MAP: dict[str, str] = {
"fs_write": "code",
"fs_read": "text",
"run_code": "command_output",
"run_shell": "command_output",
"web_search": "search_result",
}
@dataclass(frozen=True)
class CollectedOutput:
"""A single artifact captured from a tool execution."""
tool_name: str
label: str
content: str
artifact_type: str
metadata: dict[str, str]
class OutputCollector:
"""Collects concrete outputs during the agent tool call loop.
Populated after each successful tool execution. Maps tool names to
artifact types so downstream code can build structured Artifact protos.
"""
def __init__(self) -> None:
self._entries: list[CollectedOutput] = []
def collect(
self,
tool_name: str,
parameters: dict,
output: str,
success: bool,
) -> None:
"""Collect output from a tool execution.
Only successful calls for known, user-facing tools are collected.
"""
if not success:
return
if tool_name in _SKIP_TOOLS:
return
artifact_type = _TOOL_TYPE_MAP.get(tool_name)
if artifact_type is None:
return
if tool_name == "fs_write":
label = parameters.get("path", parameters.get("file_path", "file"))
content = parameters.get("content", "")
metadata = {"path": label}
lang = _guess_language(label)
if lang:
metadata["language"] = lang
elif tool_name == "fs_read":
label = parameters.get("path", parameters.get("file_path", "file"))
content = output
metadata = {"path": label}
lang = _guess_language(label)
if lang:
metadata["language"] = lang
elif tool_name == "run_code":
label = "Code execution"
content = output
lang = parameters.get("language", "")
metadata = {"language": lang} if lang else {}
elif tool_name == "run_shell":
cmd = parameters.get("command", "")
label = cmd[:80] if cmd else "Shell command"
content = output
metadata = {"command": cmd} if cmd else {}
elif tool_name == "web_search":
label = parameters.get("query", "Search")
content = output
metadata = {}
else:
return # pragma: no cover
# Truncate oversized content.
if len(content) > _MAX_CONTENT_LEN:
content = content[:_MAX_CONTENT_LEN] + "\n... [truncated]"
self._entries.append(
CollectedOutput(
tool_name=tool_name,
label=label,
content=content,
artifact_type=artifact_type,
metadata=metadata,
)
)
@property
def entries(self) -> list[CollectedOutput]:
"""Return a copy of all collected outputs."""
return list(self._entries)
def _guess_language(path: str) -> str:
"""Guess programming language from file extension."""
ext_map = {
".py": "python",
".js": "javascript",
".ts": "typescript",
".tsx": "typescript",
".jsx": "javascript",
".rs": "rust",
".go": "go",
".java": "java",
".rb": "ruby",
".sh": "bash",
".yaml": "yaml",
".yml": "yaml",
".json": "json",
".toml": "toml",
".html": "html",
".css": "css",
".sql": "sql",
".md": "markdown",
".c": "c",
".cpp": "cpp",
".h": "c",
".hpp": "cpp",
}
for ext, lang in ext_map.items():
if path.endswith(ext):
return lang
return ""

View File

@@ -125,6 +125,13 @@ When done, respond with a JSON result block:
"confidence": "VERIFIED|INFERRED|UNCERTAIN", \
"memory_candidates": [{"content": "<fact>", "confidence": 0.0-1.0}]}
## IMPORTANT: Always Use Tools for Code
- When asked to write or generate code, you MUST use `fs_write` to write the \
code to a file and `run_code` to verify it. Do NOT just describe the code in \
your summary or findings — the user needs the actual code as a tool artifact.
- For new code requests, write to `/workspace/` (e.g. `/workspace/fibonacci.py`).
- When using `run_code`, set working_dir to `/workspace/output`.
## Constraints
- Do NOT fabricate file contents. Always read files before modifying them.
- Do NOT attempt to use tools not listed in your capabilities.

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import logging
import time
import grpc
from google.protobuf import timestamp_pb2
@@ -135,9 +136,12 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
Pipeline: decompose → filter → dispatch → confidence → memory gate → respond.
"""
start_time = time.monotonic()
logger.info(
"ProcessRequest: session=%s, message=%s",
"ProcessRequest: session=%s, message_len=%d, has_config=%s, preview=%.100s",
request.session_id,
len(request.user_message),
request.HasField("session_config"),
request.user_message[:100],
)
@@ -179,10 +183,17 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
try:
subtasks = await decomposer.decompose(session_ctx, request.user_message)
except Exception as e:
logger.error("Decomposition failed: %s", e)
elapsed = time.monotonic() - start_time
logger.error(
"Decomposition failed: session=%s, error=%s, elapsed=%.2fs",
request.session_id,
e,
elapsed,
exc_info=True,
)
yield orchestrator_pb2.ProcessRequestResponse(
state=orchestrator_pb2.ORCHESTRATION_STATE_COMPLETE,
message="Failed to decompose request. Please try again.",
message=f"Failed to decompose request: {type(e).__name__}. Please try again.",
)
return
@@ -260,6 +271,15 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
outcomes = await dispatcher.dispatch_all()
for outcome in outcomes:
logger.info(
"Subtask outcome: session=%s, subtask=%s, status=%s%s",
request.session_id,
outcome.subtask_id,
outcome.status,
f", error={outcome.error}" if outcome.error else "",
)
# Step 5: Evaluate confidence signals.
# TODO: Wire ConfidenceReplanner to re-dispatch low-confidence subtasks.
evaluator = ConfidenceEvaluator(self._config.confidence)
@@ -289,14 +309,16 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
# Build final SubagentResult summary.
final_result = _build_final_result(outcomes, confidence_report)
elapsed = time.monotonic() - start_time
logger.info(
"ProcessRequest complete: session=%s, subtasks=%d, "
"confidence=%.2f, memory_accepted=%d/%d",
"confidence=%.2f, memory_accepted=%d/%d, elapsed=%.2fs",
request.session_id,
len(subtasks),
confidence_report.overall_confidence,
gating_report.accepted_count,
gating_report.total_candidates,
elapsed,
)
yield orchestrator_pb2.ProcessRequestResponse(
@@ -341,7 +363,7 @@ def _synthesize_response(
def _build_final_result(outcomes: list, confidence_report) -> common_pb2.SubagentResult:
"""Build the final SubagentResult from all outcomes."""
summaries: list[str] = []
artifacts: list[str] = []
artifacts: list[common_pb2.Artifact] = []
all_candidates: list[common_pb2.MemoryCandidate] = []
has_failure = False
has_success = False

View File

@@ -0,0 +1,126 @@
"""Tests for _build_artifacts in agent_utils."""
from __future__ import annotations
from llm_multiverse.v1 import common_pb2
from orchestrator.agent_utils import _build_artifacts, build_success_result
from orchestrator.output_collector import OutputCollector
from orchestrator.parser import DoneSignalParsed
def _make_parsed(
summary: str = "Done.",
findings: list[str] | None = None,
confidence: str = "VERIFIED",
) -> DoneSignalParsed:
return DoneSignalParsed(
summary=summary,
findings=findings or [],
confidence=confidence,
)
def test_build_artifacts_from_collector():
"""Collector entries are converted to Artifact protos."""
collector = OutputCollector()
collector.collect("fs_write", {"path": "/main.py", "content": "print(1)"}, "ok", True)
collector.collect("run_code", {"language": "python"}, "1\n", True)
parsed = _make_parsed()
artifacts = _build_artifacts(parsed, collector)
assert len(artifacts) == 2
assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_CODE
assert artifacts[0].label == "/main.py"
assert artifacts[0].content == "print(1)"
assert artifacts[0].metadata["language"] == "python"
assert artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_COMMAND_OUTPUT
assert artifacts[1].content == "1\n"
def test_build_artifacts_findings_as_text():
"""Findings are always included as TEXT artifacts."""
parsed = _make_parsed(findings=["Fixed null check", "Improved error handling"])
artifacts = _build_artifacts(parsed, None)
assert len(artifacts) == 2
assert all(a.artifact_type == common_pb2.ARTIFACT_TYPE_TEXT for a in artifacts)
assert artifacts[0].label == "Finding"
assert artifacts[0].content == "Fixed null check"
assert artifacts[1].content == "Improved error handling"
def test_build_artifacts_collector_plus_findings():
"""Both collector entries and findings are included."""
collector = OutputCollector()
collector.collect("web_search", {"query": "python docs"}, "results...", True)
parsed = _make_parsed(findings=["Found documentation"])
artifacts = _build_artifacts(parsed, collector)
assert len(artifacts) == 2
assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_SEARCH_RESULT
assert artifacts[0].label == "python docs"
assert artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
assert artifacts[1].content == "Found documentation"
def test_build_artifacts_empty():
"""No collector and no findings produces empty list."""
parsed = _make_parsed()
artifacts = _build_artifacts(parsed, None)
assert artifacts == []
def test_build_artifacts_none_parsed():
"""None parsed (partial result path) still builds from collector."""
collector = OutputCollector()
collector.collect("fs_read", {"path": "/test.py"}, "content", True)
artifacts = _build_artifacts(None, collector)
assert len(artifacts) == 1
assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
def test_build_success_result_with_collector():
"""build_success_result includes structured artifacts from collector."""
collector = OutputCollector()
collector.collect("fs_write", {"path": "/app.ts", "content": "const x = 1;"}, "ok", True)
parsed = _make_parsed(findings=["Created app.ts"])
result = build_success_result(parsed, True, False, collector)
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert len(result.artifacts) == 2
assert result.artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_CODE
assert result.artifacts[0].label == "/app.ts"
assert result.artifacts[0].content == "const x = 1;"
assert result.artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
assert result.artifacts[1].content == "Created app.ts"
def test_build_success_result_backward_compat_no_collector():
"""build_success_result without collector still works (findings only)."""
parsed = _make_parsed(findings=["Some finding"])
result = build_success_result(parsed, False, False)
assert len(result.artifacts) == 1
assert result.artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
assert result.artifacts[0].content == "Some finding"
def test_artifact_metadata_map():
"""Metadata map is correctly populated on proto Artifact."""
collector = OutputCollector()
collector.collect(
"fs_write",
{"path": "/src/lib.rs", "content": "fn main() {}"},
"ok",
True,
)
parsed = _make_parsed()
artifacts = _build_artifacts(parsed, collector)
assert artifacts[0].metadata["path"] == "/src/lib.rs"
assert artifacts[0].metadata["language"] == "rust"

View File

@@ -96,7 +96,8 @@ async def test_simple_assistant_task():
result = await agent.run(_make_request())
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert "Docker" in result.summary
assert result.artifacts == ["Uses namespaces"]
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
assert "Uses namespaces" in finding_contents
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED

View File

@@ -100,7 +100,8 @@ async def test_simple_coding_task():
result = await agent.run(_make_request())
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert "Fixed" in result.summary
assert result.artifacts == ["Fixed null check"]
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
assert "Fixed null check" in finding_contents
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
@@ -458,10 +459,10 @@ async def test_compaction_triggers_and_continues():
'{"done": true, "summary": "Done after compaction.", "confidence": "VERIFIED"}',
],
exec_output="A" * 600,
config=AgentConfig(max_iterations=20, max_tokens=1800),
config=AgentConfig(max_iterations=20, max_tokens=2400),
)
agent._gateway.inference = AsyncMock(return_value="- Short summary")
result = await agent.run(_make_request(max_tokens=1800))
result = await agent.run(_make_request(max_tokens=2400))
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert "Done after compaction" in result.summary

View File

@@ -67,7 +67,8 @@ async def test_e2e_web_search_research_task():
result = await ctx.agent.run(make_request())
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert "Paris" in result.summary
assert result.artifacts == ["Paris"]
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
assert "Paris" in finding_contents
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
assert result.source == common_pb2.RESULT_SOURCE_WEB
@@ -306,9 +307,9 @@ async def test_e2e_subagent_result_schema():
assert isinstance(result.summary, str)
assert len(result.summary) > 0
# Artifacts is a list of strings
# Artifacts is a list of Artifact proto messages
assert len(result.artifacts) > 0
assert all(isinstance(a, str) for a in result.artifacts)
assert all(hasattr(a, "label") and hasattr(a, "content") for a in result.artifacts)
# Quality is valid enum
assert result.result_quality in (

View File

@@ -145,7 +145,7 @@ async def test_researcher_multi_step_web_search():
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
assert result.source == common_pb2.RESULT_SOURCE_WEB
assert "Quantum" in result.summary or "quantum" in result.summary
assert len(result.artifacts) == 3
assert len(result.artifacts) == 5
assert len(result.new_memory_candidates) == 1
assert agent._broker.execute_tool.call_count == 2
assert agent._broker.discover_tools.call_count == 1
@@ -443,7 +443,8 @@ async def test_researcher_produces_valid_subagent_result():
assert result.summary == "Validated result."
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
assert result.source == common_pb2.RESULT_SOURCE_WEB
assert result.artifacts == ["finding1"]
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
assert "finding1" in finding_contents
assert result.failure_reason == ""
assert len(result.new_memory_candidates) == 1
assert result.new_memory_candidates[0].content == "fact1"

View File

@@ -0,0 +1,207 @@
"""Tests for the OutputCollector class."""
from __future__ import annotations
from orchestrator.output_collector import OutputCollector
def test_collect_fs_write():
"""fs_write produces a CODE artifact with file content from parameters."""
collector = OutputCollector()
collector.collect(
tool_name="fs_write",
parameters={"path": "/src/main.py", "content": "def hello():\n pass\n"},
output="File written successfully",
success=True,
)
entries = collector.entries
assert len(entries) == 1
e = entries[0]
assert e.tool_name == "fs_write"
assert e.artifact_type == "code"
assert e.label == "/src/main.py"
assert e.content == "def hello():\n pass\n"
assert e.metadata["path"] == "/src/main.py"
assert e.metadata["language"] == "python"
def test_collect_fs_write_file_path_param():
"""fs_write also works with 'file_path' parameter key."""
collector = OutputCollector()
collector.collect(
tool_name="fs_write",
parameters={"file_path": "/app.js", "content": "console.log('hi')"},
output="ok",
success=True,
)
assert collector.entries[0].label == "/app.js"
assert collector.entries[0].metadata["language"] == "javascript"
def test_collect_fs_read():
"""fs_read produces a TEXT artifact with tool output as content."""
collector = OutputCollector()
collector.collect(
tool_name="fs_read",
parameters={"path": "/config.yaml"},
output="key: value\nother: 42",
success=True,
)
entries = collector.entries
assert len(entries) == 1
e = entries[0]
assert e.artifact_type == "text"
assert e.label == "/config.yaml"
assert e.content == "key: value\nother: 42"
assert e.metadata["language"] == "yaml"
def test_collect_run_code():
"""run_code produces a COMMAND_OUTPUT artifact."""
collector = OutputCollector()
collector.collect(
tool_name="run_code",
parameters={"language": "python", "code": "print(42)"},
output="42\n",
success=True,
)
entries = collector.entries
assert len(entries) == 1
e = entries[0]
assert e.artifact_type == "command_output"
assert e.label == "Code execution"
assert e.content == "42\n"
assert e.metadata["language"] == "python"
def test_collect_run_shell():
"""run_shell produces a COMMAND_OUTPUT artifact with truncated command label."""
collector = OutputCollector()
collector.collect(
tool_name="run_shell",
parameters={"command": "ls -la /tmp"},
output="total 4\ndrwxrwxrwt 2 root root 40 Jan 1 00:00 .\n",
success=True,
)
entries = collector.entries
assert len(entries) == 1
e = entries[0]
assert e.artifact_type == "command_output"
assert e.label == "ls -la /tmp"
assert e.metadata["command"] == "ls -la /tmp"
def test_collect_web_search():
"""web_search produces a SEARCH_RESULT artifact."""
collector = OutputCollector()
collector.collect(
tool_name="web_search",
parameters={"query": "python async tutorial"},
output="Result 1: ...\nResult 2: ...",
success=True,
)
entries = collector.entries
assert len(entries) == 1
e = entries[0]
assert e.artifact_type == "search_result"
assert e.label == "python async tutorial"
assert "Result 1" in e.content
def test_skips_failed_tools():
"""Failed tool executions are not collected."""
collector = OutputCollector()
collector.collect(
tool_name="fs_write",
parameters={"path": "/test.py", "content": "code"},
output="Permission denied",
success=False,
)
assert len(collector.entries) == 0
def test_skips_memory_tools():
"""memory_read and memory_write are excluded."""
collector = OutputCollector()
collector.collect(
tool_name="memory_read",
parameters={"query": "something"},
output="memory content",
success=True,
)
collector.collect(
tool_name="memory_write",
parameters={"content": "save this"},
output="ok",
success=True,
)
assert len(collector.entries) == 0
def test_skips_unknown_tools():
"""Unknown tool names are ignored."""
collector = OutputCollector()
collector.collect(
tool_name="custom_tool",
parameters={"data": "stuff"},
output="result",
success=True,
)
assert len(collector.entries) == 0
def test_multiple_tools_collected():
"""Multiple tool outputs are collected in order."""
collector = OutputCollector()
collector.collect("fs_read", {"path": "/a.py"}, "content_a", True)
collector.collect("fs_write", {"path": "/b.py", "content": "code_b"}, "ok", True)
collector.collect("run_code", {"language": "python"}, "output_c", True)
entries = collector.entries
assert len(entries) == 3
assert entries[0].label == "/a.py"
assert entries[1].label == "/b.py"
assert entries[2].label == "Code execution"
def test_content_truncation():
"""Oversized content is truncated."""
collector = OutputCollector()
huge_content = "x" * (300 * 1024)
collector.collect(
tool_name="fs_read",
parameters={"path": "/big.txt"},
output=huge_content,
success=True,
)
assert len(collector.entries) == 1
assert len(collector.entries[0].content) < len(huge_content)
assert collector.entries[0].content.endswith("... [truncated]")
def test_entries_returns_copy():
"""entries property returns a copy, not the internal list."""
collector = OutputCollector()
collector.collect("fs_read", {"path": "/a.py"}, "content", True)
entries1 = collector.entries
entries2 = collector.entries
assert entries1 == entries2
assert entries1 is not entries2
def test_language_detection():
"""Language is correctly guessed from various file extensions."""
collector = OutputCollector()
test_cases = [
("/app.ts", "typescript"),
("/style.css", "css"),
("/main.go", "go"),
("/lib.rs", "rust"),
("/query.sql", "sql"),
("/unknown.xyz", ""),
]
for path, expected_lang in test_cases:
collector = OutputCollector()
collector.collect("fs_read", {"path": path}, "content", True)
actual = collector.entries[0].metadata.get("language", "")
assert actual == expected_lang, f"Expected '{expected_lang}' for {path}, got '{actual}'"

View File

@@ -93,7 +93,8 @@ async def test_simple_research_task():
result = await agent.run(_make_request())
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert "Paris" in result.summary
assert result.artifacts == ["Paris"]
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
assert "Paris" in finding_contents
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED

View File

@@ -419,7 +419,7 @@ def test_build_final_result_all_success():
r1 = common_pb2.SubagentResult(
status=common_pb2.RESULT_STATUS_SUCCESS,
summary="Done A",
artifacts=["file1.txt"],
artifacts=[common_pb2.Artifact(label="Finding", content="file1.txt", artifact_type=common_pb2.ARTIFACT_TYPE_TEXT)],
)
outcomes = [SubtaskOutcome(subtask_id="t-1", status="success", result=r1)]
report = ConfidenceReport(
@@ -430,7 +430,7 @@ def test_build_final_result_all_success():
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
assert "Done A" in result.summary
assert "file1.txt" in result.artifacts
assert any(a.content == "file1.txt" for a in result.artifacts)
def test_build_final_result_partial():

View File

@@ -99,7 +99,8 @@ async def test_simple_sysadmin_task():
result = await agent.run(_make_request())
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
assert "nginx" in result.summary
assert result.artifacts == ["Config valid"]
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
assert "Config valid" in finding_contents
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED

View File

@@ -14,7 +14,8 @@ from readability import Document
logger = logging.getLogger(__name__)
DEFAULT_USER_AGENT = (
"Mozilla/5.0 (compatible; llm-multiverse-search/0.1; +https://localhost)"
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
DEFAULT_TIMEOUT = 10.0
DEFAULT_MAX_CONTENT_LENGTH = 8000 # characters