Merge pull request 'feat: structured artifact passthrough for agent results' (#237) from feat/structured-artifact-passthrough into main
This commit was merged in pull request #237.
This commit is contained in:
@@ -49,7 +49,7 @@ All services communicate via gRPC (Protocol Buffers). Internal services run in D
|
||||
|
||||
Ollama runs on the bare metal host (not in Docker) because GPU drivers — particularly ROCm for AMD and CUDA for NVIDIA — require direct hardware access that Docker GPU passthrough complicates.
|
||||
|
||||
The internal Docker network (`llm-internal`) is declared `internal: true`, which means containers on this network cannot reach the internet directly. Only Caddy (on the edge network) and SearXNG (which has its own upstream configuration) can make external connections.
|
||||
The internal Docker network (`llm-internal`) is declared `internal: true`, which means containers on this network cannot reach the internet directly. Caddy (on the edge network), SearXNG, and the Search Service are also attached to `llm-external` so they can reach the internet for web searches and page content extraction.
|
||||
|
||||
## 3. Hardware Requirements
|
||||
|
||||
@@ -152,7 +152,7 @@ No values are strictly required — all services start with sensible defaults. H
|
||||
|-------|---------|-------------|
|
||||
| `decomposer.max_subtasks` | 10 | Maximum subtasks per request |
|
||||
| `dispatcher.max_concurrent_subtasks` | 5 | Parallel agent limit |
|
||||
| `dispatcher.subtask_timeout_seconds` | 120 | Per-subtask timeout |
|
||||
| `dispatcher.subtask_timeout_seconds` | 300 | Per-subtask timeout |
|
||||
| `dispatcher.overall_timeout_seconds` | 600 | Total dispatch timeout |
|
||||
| `compaction.threshold_ratio` | 0.6 | Compact at 60% of context window |
|
||||
| `memory_gating.confidence_threshold` | 0.7 | Minimum confidence to persist memory |
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
versions h2c
|
||||
}
|
||||
header_up te trailers
|
||||
}
|
||||
|
||||
log {
|
||||
output stdout
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Orchestrator configuration for Docker deployment
|
||||
host: "0.0.0.0"
|
||||
port: 50058
|
||||
grpc_web_port: 8080
|
||||
model_gateway_addr: "model-gateway:50055"
|
||||
tool_broker_addr: "tool-broker:50057"
|
||||
memory_addr: "memory:50054"
|
||||
@@ -9,22 +10,22 @@ audit_addr: "audit:50052"
|
||||
|
||||
researcher:
|
||||
max_iterations: 10
|
||||
timeout_seconds: 120
|
||||
timeout_seconds: 300
|
||||
max_tokens: 4096
|
||||
|
||||
coder:
|
||||
max_iterations: 10
|
||||
timeout_seconds: 120
|
||||
timeout_seconds: 300
|
||||
max_tokens: 4096
|
||||
|
||||
assistant:
|
||||
max_iterations: 10
|
||||
timeout_seconds: 120
|
||||
timeout_seconds: 300
|
||||
max_tokens: 4096
|
||||
|
||||
sysadmin:
|
||||
max_iterations: 10
|
||||
timeout_seconds: 120
|
||||
timeout_seconds: 300
|
||||
max_tokens: 4096
|
||||
|
||||
decomposer:
|
||||
@@ -32,7 +33,7 @@ decomposer:
|
||||
max_subtasks: 10
|
||||
|
||||
dispatcher:
|
||||
subtask_timeout_seconds: 120.0
|
||||
subtask_timeout_seconds: 300.0
|
||||
overall_timeout_seconds: 600.0
|
||||
max_concurrent_subtasks: 5
|
||||
|
||||
|
||||
@@ -8,39 +8,12 @@
|
||||
# - Build images first: docker compose -f docker/docker-compose.yml build
|
||||
#
|
||||
# Networks:
|
||||
# llm-edge: Caddy <-> Orchestrator only (edge-facing)
|
||||
# llm-internal: All inter-service communication (internal: true, no external routing)
|
||||
#
|
||||
# The orchestrator exposes a gRPC-Web HTTP gateway on port 8080 for
|
||||
# direct browser access, eliminating the need for an edge proxy.
|
||||
|
||||
services:
|
||||
# ---------- Edge Proxy ----------
|
||||
|
||||
caddy:
|
||||
image: caddy:2-alpine
|
||||
container_name: llm-caddy
|
||||
ports:
|
||||
- "${HTTPS_PORT:-443}:443"
|
||||
- "${HTTP_PORT:-80}:80"
|
||||
volumes:
|
||||
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
|
||||
- caddy-data:/data
|
||||
- caddy-config:/config
|
||||
environment:
|
||||
- DOMAIN=${DOMAIN:-localhost}
|
||||
- TLS_MODE=${TLS_MODE:-internal}
|
||||
networks:
|
||||
- llm-edge
|
||||
- llm-internal
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
orchestrator:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "--quiet", "http://localhost:80/healthz"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# ---------- Infrastructure ----------
|
||||
|
||||
searxng:
|
||||
@@ -53,6 +26,7 @@ services:
|
||||
- SEARXNG_SECRET=${SEARXNG_SECRET:-dev-secret-change-in-production}
|
||||
networks:
|
||||
- llm-internal
|
||||
- llm-external
|
||||
restart: unless-stopped
|
||||
cap_drop:
|
||||
- ALL
|
||||
@@ -145,7 +119,7 @@ services:
|
||||
target: /etc/llm-multiverse/model-gateway.toml
|
||||
networks:
|
||||
- llm-internal
|
||||
- llm-edge # Needs external access to reach Ollama on host
|
||||
- llm-external # Needs external access to reach Ollama on host
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
audit:
|
||||
@@ -186,6 +160,7 @@ services:
|
||||
SERVICE_DIR: tool-broker
|
||||
SERVICE_PACKAGE: tool-broker
|
||||
SERVICE_PORT: "50057"
|
||||
EXTRA_RUNTIME_DEPS: "python3"
|
||||
container_name: llm-tool-broker
|
||||
volumes:
|
||||
- ./manifests:/etc/llm-multiverse/manifests:ro
|
||||
@@ -223,6 +198,7 @@ services:
|
||||
target: /etc/llm-multiverse/search.yaml
|
||||
networks:
|
||||
- llm-internal
|
||||
- llm-external
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
searxng:
|
||||
@@ -237,14 +213,16 @@ services:
|
||||
context: ..
|
||||
dockerfile: docker/python/orchestrator.Dockerfile
|
||||
container_name: llm-orchestrator
|
||||
ports:
|
||||
- "${GRPC_WEB_PORT:-8080}:8080"
|
||||
environment:
|
||||
- ORCHESTRATOR_CONFIG=/etc/llm-multiverse/orchestrator.yaml
|
||||
configs:
|
||||
- source: orchestrator-config
|
||||
target: /etc/llm-multiverse/orchestrator.yaml
|
||||
networks:
|
||||
- llm-edge
|
||||
- llm-internal
|
||||
- llm-external
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
model-gateway:
|
||||
@@ -283,15 +261,11 @@ volumes:
|
||||
driver: local
|
||||
memory-data:
|
||||
driver: local
|
||||
caddy-data:
|
||||
driver: local
|
||||
caddy-config:
|
||||
driver: local
|
||||
|
||||
# ---------- Networks ----------
|
||||
|
||||
networks:
|
||||
llm-edge:
|
||||
llm-external:
|
||||
driver: bridge
|
||||
llm-internal:
|
||||
driver: bridge
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
agent_type = "coder"
|
||||
agent_type_id = 3
|
||||
allowed_tools = ["fs_read", "fs_write", "run_code", "web_search", "memory_read"]
|
||||
path_allowlist = ["/home/**", "/tmp/**", "/workspace/**"]
|
||||
path_allowlist = ["/home/**", "/tmp", "/tmp/**", "/workspace", "/workspace/**"]
|
||||
can_spawn = []
|
||||
max_spawn_depth = 0
|
||||
|
||||
|
||||
@@ -80,6 +80,9 @@ RUN groupadd -r app && useradd -r -g app -d /nonexistent -s /usr/sbin/nologin ap
|
||||
# Pre-create /data owned by app for services that mount volumes there
|
||||
RUN mkdir -p /data && chown app:app /data
|
||||
|
||||
# Pre-create /workspace owned by app for tool execution (code writing, run_code working_dir)
|
||||
RUN mkdir -p /workspace/output && chown -R app:app /workspace
|
||||
|
||||
COPY --from=builder /app/target/release/${SERVICE_PACKAGE} /usr/local/bin/service
|
||||
|
||||
USER app
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# NO CHECKED-IN PROTOBUF GENCODE
|
||||
# source: llm_multiverse/v1/common.proto
|
||||
# Protobuf Python Version: 7.34.0
|
||||
# Protobuf Python Version: 6.31.1
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
@@ -11,9 +11,9 @@ from google.protobuf import symbol_database as _symbol_database
|
||||
from google.protobuf.internal import builder as _builder
|
||||
_runtime_version.ValidateProtobufRuntimeVersion(
|
||||
_runtime_version.Domain.PUBLIC,
|
||||
7,
|
||||
34,
|
||||
0,
|
||||
6,
|
||||
31,
|
||||
1,
|
||||
'',
|
||||
'llm_multiverse/v1/common.proto'
|
||||
)
|
||||
@@ -25,40 +25,47 @@ _sym_db = _symbol_database.Default()
|
||||
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1ellm_multiverse/v1/common.proto\x12\x11llm_multiverse.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\x8a\x01\n\x0f\x41gentIdentifier\x12\x19\n\x08\x61gent_id\x18\x01 \x01(\tR\x07\x61gentId\x12;\n\nagent_type\x18\x02 \x01(\x0e\x32\x1c.llm_multiverse.v1.AgentTypeR\tagentType\x12\x1f\n\x0bspawn_depth\x18\x03 \x01(\rR\nspawnDepth\"J\n\x0c\x41gentLineage\x12:\n\x06\x61gents\x18\x01 \x03(\x0b\x32\".llm_multiverse.v1.AgentIdentifierR\x06\x61gents\"\x92\x02\n\x0eSessionContext\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x17\n\x07user_id\x18\x02 \x01(\tR\x06userId\x12\x44\n\ragent_lineage\x18\x03 \x01(\x0b\x32\x1f.llm_multiverse.v1.AgentLineageR\x0c\x61gentLineage\x12G\n\x0eoverride_level\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.OverrideLevelR\roverrideLevel\x12\x39\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\"\xc2\x01\n\x0b\x45rrorDetail\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12H\n\x08metadata\x18\x03 \x03(\x0b\x32,.llm_multiverse.v1.ErrorDetail.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x84\x01\n\x0fMemoryCandidate\x12\x18\n\x07\x63ontent\x18\x01 \x01(\tR\x07\x63ontent\x12\x37\n\x06source\x18\x02 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSourceR\x06source\x12\x1e\n\nconfidence\x18\x03 \x01(\x02R\nconfidence\"\x9a\x03\n\x0eSubagentResult\x12\x37\n\x06status\x18\x01 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultStatusR\x06status\x12\x18\n\x07summary\x18\x02 \x01(\tR\x07summary\x12\x1c\n\tartifacts\x18\x03 \x03(\tR\tartifacts\x12G\n\x0eresult_quality\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.ResultQualityR\rresultQuality\x12\x37\n\x06source\x18\x05 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSourceR\x06source\x12V\n\x15new_memory_candidates\x18\x06 \x03(\x0b\x32\".llm_multiverse.v1.MemoryCandidateR\x13newMemoryCandidates\x12*\n\x0e\x66\x61ilure_reason\x18\x07 \x01(\tH\x00R\rfailureReason\x88\x01\x01\x42\x11\n\x0f_failure_reason*\xa8\x01\n\tAgentType\x12\x1a\n\x16\x41GENT_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x41GENT_TYPE_ORCHESTRATOR\x10\x01\x12\x19\n\x15\x41GENT_TYPE_RESEARCHER\x10\x02\x12\x14\n\x10\x41GENT_TYPE_CODER\x10\x03\x12\x17\n\x13\x41GENT_TYPE_SYSADMIN\x10\x04\x12\x18\n\x14\x41GENT_TYPE_ASSISTANT\x10\x05*\xf5\x01\n\x08ToolType\x12\x19\n\x15TOOL_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15TOOL_TYPE_MEMORY_READ\x10\x01\x12\x1a\n\x16TOOL_TYPE_MEMORY_WRITE\x10\x02\x12\x18\n\x14TOOL_TYPE_WEB_SEARCH\x10\x03\x12\x15\n\x11TOOL_TYPE_FS_READ\x10\x04\x12\x16\n\x12TOOL_TYPE_FS_WRITE\x10\x05\x12\x16\n\x12TOOL_TYPE_RUN_CODE\x10\x06\x12\x17\n\x13TOOL_TYPE_RUN_SHELL\x10\x07\x12\x1d\n\x19TOOL_TYPE_PACKAGE_INSTALL\x10\x08*z\n\rOverrideLevel\x12\x1e\n\x1aOVERRIDE_LEVEL_UNSPECIFIED\x10\x00\x12\x17\n\x13OVERRIDE_LEVEL_NONE\x10\x01\x12\x18\n\x14OVERRIDE_LEVEL_RELAX\x10\x02\x12\x16\n\x12OVERRIDE_LEVEL_ALL\x10\x03*}\n\x0cResultStatus\x12\x1d\n\x19RESULT_STATUS_UNSPECIFIED\x10\x00\x12\x19\n\x15RESULT_STATUS_SUCCESS\x10\x01\x12\x19\n\x15RESULT_STATUS_PARTIAL\x10\x02\x12\x18\n\x14RESULT_STATUS_FAILED\x10\x03*\x87\x01\n\rResultQuality\x12\x1e\n\x1aRESULT_QUALITY_UNSPECIFIED\x10\x00\x12\x1b\n\x17RESULT_QUALITY_VERIFIED\x10\x01\x12\x1b\n\x17RESULT_QUALITY_INFERRED\x10\x02\x12\x1c\n\x18RESULT_QUALITY_UNCERTAIN\x10\x03*\x86\x01\n\x0cResultSource\x12\x1d\n\x19RESULT_SOURCE_UNSPECIFIED\x10\x00\x12\x1d\n\x19RESULT_SOURCE_TOOL_OUTPUT\x10\x01\x12!\n\x1dRESULT_SOURCE_MODEL_KNOWLEDGE\x10\x02\x12\x15\n\x11RESULT_SOURCE_WEB\x10\x03\x42\x85\x01\n\x15\x63om.llm_multiverse.v1B\x0b\x43ommonProtoP\x01\xa2\x02\x03LXX\xaa\x02\x10LlmMultiverse.V1\xca\x02\x10LlmMultiverse\\V1\xe2\x02\x1cLlmMultiverse\\V1\\GPBMetadata\xea\x02\x11LlmMultiverse::V1b\x06proto3')
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1ellm_multiverse/v1/common.proto\x12\x11llm_multiverse.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\xd0\x01\n\x08\x41rtifact\x12\r\n\x05label\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x36\n\rartifact_type\x18\x03 \x01(\x0e\x32\x1f.llm_multiverse.v1.ArtifactType\x12;\n\x08metadata\x18\x04 \x03(\x0b\x32).llm_multiverse.v1.Artifact.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"j\n\x0f\x41gentIdentifier\x12\x10\n\x08\x61gent_id\x18\x01 \x01(\t\x12\x30\n\nagent_type\x18\x02 \x01(\x0e\x32\x1c.llm_multiverse.v1.AgentType\x12\x13\n\x0bspawn_depth\x18\x03 \x01(\r\"B\n\x0c\x41gentLineage\x12\x32\n\x06\x61gents\x18\x01 \x03(\x0b\x32\".llm_multiverse.v1.AgentIdentifier\"\xd7\x01\n\x0eSessionContext\x12\x12\n\nsession_id\x18\x01 \x01(\t\x12\x0f\n\x07user_id\x18\x02 \x01(\t\x12\x36\n\ragent_lineage\x18\x03 \x01(\x0b\x32\x1f.llm_multiverse.v1.AgentLineage\x12\x38\n\x0eoverride_level\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.OverrideLevel\x12.\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x9d\x01\n\x0b\x45rrorDetail\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x0f\n\x07message\x18\x02 \x01(\t\x12>\n\x08metadata\x18\x03 \x03(\x0b\x32,.llm_multiverse.v1.ErrorDetail.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"g\n\x0fMemoryCandidate\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12/\n\x06source\x18\x02 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSource\x12\x12\n\nconfidence\x18\x03 \x01(\x02\"\xe0\x02\n\x0eSubagentResult\x12/\n\x06status\x18\x01 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultStatus\x12\x0f\n\x07summary\x18\x02 \x01(\t\x12.\n\tartifacts\x18\x03 \x03(\x0b\x32\x1b.llm_multiverse.v1.Artifact\x12\x38\n\x0eresult_quality\x18\x04 \x01(\x0e\x32 .llm_multiverse.v1.ResultQuality\x12/\n\x06source\x18\x05 \x01(\x0e\x32\x1f.llm_multiverse.v1.ResultSource\x12\x41\n\x15new_memory_candidates\x18\x06 \x03(\x0b\x32\".llm_multiverse.v1.MemoryCandidate\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x07 \x01(\tH\x00\x88\x01\x01\x42\x11\n\x0f_failure_reason*\xa8\x01\n\tAgentType\x12\x1a\n\x16\x41GENT_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n\x17\x41GENT_TYPE_ORCHESTRATOR\x10\x01\x12\x19\n\x15\x41GENT_TYPE_RESEARCHER\x10\x02\x12\x14\n\x10\x41GENT_TYPE_CODER\x10\x03\x12\x17\n\x13\x41GENT_TYPE_SYSADMIN\x10\x04\x12\x18\n\x14\x41GENT_TYPE_ASSISTANT\x10\x05*\xf5\x01\n\x08ToolType\x12\x19\n\x15TOOL_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15TOOL_TYPE_MEMORY_READ\x10\x01\x12\x1a\n\x16TOOL_TYPE_MEMORY_WRITE\x10\x02\x12\x18\n\x14TOOL_TYPE_WEB_SEARCH\x10\x03\x12\x15\n\x11TOOL_TYPE_FS_READ\x10\x04\x12\x16\n\x12TOOL_TYPE_FS_WRITE\x10\x05\x12\x16\n\x12TOOL_TYPE_RUN_CODE\x10\x06\x12\x17\n\x13TOOL_TYPE_RUN_SHELL\x10\x07\x12\x1d\n\x19TOOL_TYPE_PACKAGE_INSTALL\x10\x08*z\n\rOverrideLevel\x12\x1e\n\x1aOVERRIDE_LEVEL_UNSPECIFIED\x10\x00\x12\x17\n\x13OVERRIDE_LEVEL_NONE\x10\x01\x12\x18\n\x14OVERRIDE_LEVEL_RELAX\x10\x02\x12\x16\n\x12OVERRIDE_LEVEL_ALL\x10\x03*}\n\x0cResultStatus\x12\x1d\n\x19RESULT_STATUS_UNSPECIFIED\x10\x00\x12\x19\n\x15RESULT_STATUS_SUCCESS\x10\x01\x12\x19\n\x15RESULT_STATUS_PARTIAL\x10\x02\x12\x18\n\x14RESULT_STATUS_FAILED\x10\x03*\x87\x01\n\rResultQuality\x12\x1e\n\x1aRESULT_QUALITY_UNSPECIFIED\x10\x00\x12\x1b\n\x17RESULT_QUALITY_VERIFIED\x10\x01\x12\x1b\n\x17RESULT_QUALITY_INFERRED\x10\x02\x12\x1c\n\x18RESULT_QUALITY_UNCERTAIN\x10\x03*\x86\x01\n\x0cResultSource\x12\x1d\n\x19RESULT_SOURCE_UNSPECIFIED\x10\x00\x12\x1d\n\x19RESULT_SOURCE_TOOL_OUTPUT\x10\x01\x12!\n\x1dRESULT_SOURCE_MODEL_KNOWLEDGE\x10\x02\x12\x15\n\x11RESULT_SOURCE_WEB\x10\x03*\xa0\x01\n\x0c\x41rtifactType\x12\x1d\n\x19\x41RTIFACT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x41RTIFACT_TYPE_CODE\x10\x01\x12\x16\n\x12\x41RTIFACT_TYPE_TEXT\x10\x02\x12 \n\x1c\x41RTIFACT_TYPE_COMMAND_OUTPUT\x10\x03\x12\x1f\n\x1b\x41RTIFACT_TYPE_SEARCH_RESULT\x10\x04\x62\x06proto3')
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'llm_multiverse.v1.common_pb2', _globals)
|
||||
if not _descriptor._USE_C_DESCRIPTORS:
|
||||
_globals['DESCRIPTOR']._loaded_options = None
|
||||
_globals['DESCRIPTOR']._serialized_options = b'\n\025com.llm_multiverse.v1B\013CommonProtoP\001\242\002\003LXX\252\002\020LlmMultiverse.V1\312\002\020LlmMultiverse\\V1\342\002\034LlmMultiverse\\V1\\GPBMetadata\352\002\021LlmMultiverse::V1'
|
||||
DESCRIPTOR._loaded_options = None
|
||||
_globals['_ARTIFACT_METADATAENTRY']._loaded_options = None
|
||||
_globals['_ARTIFACT_METADATAENTRY']._serialized_options = b'8\001'
|
||||
_globals['_ERRORDETAIL_METADATAENTRY']._loaded_options = None
|
||||
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_options = b'8\001'
|
||||
_globals['_AGENTTYPE']._serialized_start=1326
|
||||
_globals['_AGENTTYPE']._serialized_end=1494
|
||||
_globals['_TOOLTYPE']._serialized_start=1497
|
||||
_globals['_TOOLTYPE']._serialized_end=1742
|
||||
_globals['_OVERRIDELEVEL']._serialized_start=1744
|
||||
_globals['_OVERRIDELEVEL']._serialized_end=1866
|
||||
_globals['_RESULTSTATUS']._serialized_start=1868
|
||||
_globals['_RESULTSTATUS']._serialized_end=1993
|
||||
_globals['_RESULTQUALITY']._serialized_start=1996
|
||||
_globals['_RESULTQUALITY']._serialized_end=2131
|
||||
_globals['_RESULTSOURCE']._serialized_start=2134
|
||||
_globals['_RESULTSOURCE']._serialized_end=2268
|
||||
_globals['_AGENTIDENTIFIER']._serialized_start=87
|
||||
_globals['_AGENTIDENTIFIER']._serialized_end=225
|
||||
_globals['_AGENTLINEAGE']._serialized_start=227
|
||||
_globals['_AGENTLINEAGE']._serialized_end=301
|
||||
_globals['_SESSIONCONTEXT']._serialized_start=304
|
||||
_globals['_SESSIONCONTEXT']._serialized_end=578
|
||||
_globals['_ERRORDETAIL']._serialized_start=581
|
||||
_globals['_ERRORDETAIL']._serialized_end=775
|
||||
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_start=716
|
||||
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_end=775
|
||||
_globals['_MEMORYCANDIDATE']._serialized_start=778
|
||||
_globals['_MEMORYCANDIDATE']._serialized_end=910
|
||||
_globals['_SUBAGENTRESULT']._serialized_start=913
|
||||
_globals['_SUBAGENTRESULT']._serialized_end=1323
|
||||
_globals['_AGENTTYPE']._serialized_start=1312
|
||||
_globals['_AGENTTYPE']._serialized_end=1480
|
||||
_globals['_TOOLTYPE']._serialized_start=1483
|
||||
_globals['_TOOLTYPE']._serialized_end=1728
|
||||
_globals['_OVERRIDELEVEL']._serialized_start=1730
|
||||
_globals['_OVERRIDELEVEL']._serialized_end=1852
|
||||
_globals['_RESULTSTATUS']._serialized_start=1854
|
||||
_globals['_RESULTSTATUS']._serialized_end=1979
|
||||
_globals['_RESULTQUALITY']._serialized_start=1982
|
||||
_globals['_RESULTQUALITY']._serialized_end=2117
|
||||
_globals['_RESULTSOURCE']._serialized_start=2120
|
||||
_globals['_RESULTSOURCE']._serialized_end=2254
|
||||
_globals['_ARTIFACTTYPE']._serialized_start=2257
|
||||
_globals['_ARTIFACTTYPE']._serialized_end=2417
|
||||
_globals['_ARTIFACT']._serialized_start=87
|
||||
_globals['_ARTIFACT']._serialized_end=295
|
||||
_globals['_ARTIFACT_METADATAENTRY']._serialized_start=248
|
||||
_globals['_ARTIFACT_METADATAENTRY']._serialized_end=295
|
||||
_globals['_AGENTIDENTIFIER']._serialized_start=297
|
||||
_globals['_AGENTIDENTIFIER']._serialized_end=403
|
||||
_globals['_AGENTLINEAGE']._serialized_start=405
|
||||
_globals['_AGENTLINEAGE']._serialized_end=471
|
||||
_globals['_SESSIONCONTEXT']._serialized_start=474
|
||||
_globals['_SESSIONCONTEXT']._serialized_end=689
|
||||
_globals['_ERRORDETAIL']._serialized_start=692
|
||||
_globals['_ERRORDETAIL']._serialized_end=849
|
||||
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_start=248
|
||||
_globals['_ERRORDETAIL_METADATAENTRY']._serialized_end=295
|
||||
_globals['_MEMORYCANDIDATE']._serialized_start=851
|
||||
_globals['_MEMORYCANDIDATE']._serialized_end=954
|
||||
_globals['_SUBAGENTRESULT']._serialized_start=957
|
||||
_globals['_SUBAGENTRESULT']._serialized_end=1309
|
||||
# @@protoc_insertion_point(module_scope)
|
||||
|
||||
@@ -1,4 +1,24 @@
|
||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
import warnings
|
||||
|
||||
|
||||
GRPC_GENERATED_VERSION = '1.78.0'
|
||||
GRPC_VERSION = grpc.__version__
|
||||
_version_not_supported = False
|
||||
|
||||
try:
|
||||
from grpc._utilities import first_version_is_lower
|
||||
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
|
||||
except ImportError:
|
||||
_version_not_supported = True
|
||||
|
||||
if _version_not_supported:
|
||||
raise RuntimeError(
|
||||
f'The grpc package installed is at version {GRPC_VERSION},'
|
||||
+ ' but the generated code in llm_multiverse/v1/common_pb2_grpc.py depends on'
|
||||
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
|
||||
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
|
||||
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
|
||||
)
|
||||
|
||||
@@ -64,8 +64,25 @@ enum ResultSource {
|
||||
RESULT_SOURCE_WEB = 3;
|
||||
}
|
||||
|
||||
// Type of artifact produced by an agent during its tool call loop.
|
||||
enum ArtifactType {
|
||||
ARTIFACT_TYPE_UNSPECIFIED = 0;
|
||||
ARTIFACT_TYPE_CODE = 1; // Code written via fs_write
|
||||
ARTIFACT_TYPE_TEXT = 2; // Plain text / file content from fs_read
|
||||
ARTIFACT_TYPE_COMMAND_OUTPUT = 3; // Output from run_code / run_shell
|
||||
ARTIFACT_TYPE_SEARCH_RESULT = 4; // Web search results
|
||||
}
|
||||
|
||||
// --- Messages ---
|
||||
|
||||
// A concrete output produced by an agent (code, command output, etc.).
|
||||
message Artifact {
|
||||
string label = 1; // Display name (filename, query, etc.)
|
||||
string content = 2; // Full content
|
||||
ArtifactType artifact_type = 3;
|
||||
map<string, string> metadata = 4; // language, path, tool_name, exit_code, etc.
|
||||
}
|
||||
|
||||
// Identifies a single agent in the lineage chain.
|
||||
message AgentIdentifier {
|
||||
string agent_id = 1;
|
||||
@@ -107,7 +124,8 @@ message SubagentResult {
|
||||
ResultStatus status = 1;
|
||||
// 3-sentence max summary of work performed.
|
||||
string summary = 2;
|
||||
repeated string artifacts = 3;
|
||||
// Structured artifacts produced during the agent loop.
|
||||
repeated Artifact artifacts = 3;
|
||||
ResultQuality result_quality = 4;
|
||||
ResultSource source = 5;
|
||||
repeated MemoryCandidate new_memory_candidates = 6;
|
||||
|
||||
@@ -11,6 +11,7 @@ dependencies = [
|
||||
"orjson>=3.10",
|
||||
"uvloop>=0.21",
|
||||
"msgspec>=0.19",
|
||||
"aiohttp>=3.10",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -11,6 +11,7 @@ from llm_multiverse.v1 import audit_pb2_grpc, orchestrator_pb2_grpc
|
||||
|
||||
from .clients import MemoryClient, ModelGatewayClient, ToolBrokerClient
|
||||
from .config import Config
|
||||
from .grpc_web_gateway import start_gateway
|
||||
from .service import OrchestratorServiceImpl
|
||||
|
||||
logger = logging.getLogger("orchestrator")
|
||||
@@ -50,6 +51,12 @@ async def serve(config: Config) -> None:
|
||||
|
||||
await server.start()
|
||||
|
||||
# Start gRPC-Web HTTP gateway if configured.
|
||||
gateway_runner = None
|
||||
if config.grpc_web_port:
|
||||
grpc_addr = f"localhost:{config.port}"
|
||||
gateway_runner = await start_gateway(grpc_addr, config.grpc_web_port)
|
||||
|
||||
# Graceful shutdown on SIGINT/SIGTERM
|
||||
loop = asyncio.get_running_loop()
|
||||
shutdown_event = asyncio.Event()
|
||||
@@ -63,6 +70,8 @@ async def serve(config: Config) -> None:
|
||||
|
||||
await shutdown_event.wait()
|
||||
logger.info("Shutting down gracefully...")
|
||||
if gateway_runner:
|
||||
await gateway_runner.cleanup()
|
||||
await server.stop(grace=5)
|
||||
logger.info("Orchestrator shut down")
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
from llm_multiverse.v1 import common_pb2
|
||||
|
||||
from .output_collector import OutputCollector
|
||||
from .parser import DoneSignalParsed
|
||||
|
||||
# Confidence string → proto quality enum.
|
||||
@@ -13,6 +14,14 @@ CONFIDENCE_MAP: dict[str, int] = {
|
||||
"UNCERTAIN": common_pb2.RESULT_QUALITY_UNCERTAIN,
|
||||
}
|
||||
|
||||
# CollectedOutput.artifact_type → proto ArtifactType enum.
|
||||
_ARTIFACT_TYPE_MAP: dict[str, int] = {
|
||||
"code": common_pb2.ARTIFACT_TYPE_CODE,
|
||||
"text": common_pb2.ARTIFACT_TYPE_TEXT,
|
||||
"command_output": common_pb2.ARTIFACT_TYPE_COMMAND_OUTPUT,
|
||||
"search_result": common_pb2.ARTIFACT_TYPE_SEARCH_RESULT,
|
||||
}
|
||||
|
||||
|
||||
def determine_source(used_tool_output: bool, used_web: bool) -> int:
|
||||
"""Determine the result source based on tool usage."""
|
||||
@@ -27,6 +36,7 @@ def build_success_result(
|
||||
parsed: DoneSignalParsed,
|
||||
used_tool_output: bool,
|
||||
used_web: bool,
|
||||
collector: OutputCollector | None = None,
|
||||
) -> common_pb2.SubagentResult:
|
||||
"""Build a success result from a parsed done signal."""
|
||||
quality = CONFIDENCE_MAP.get(parsed.confidence, common_pb2.RESULT_QUALITY_UNCERTAIN)
|
||||
@@ -46,10 +56,12 @@ def build_success_result(
|
||||
for mc in parsed.memory_candidates
|
||||
]
|
||||
|
||||
artifacts = _build_artifacts(parsed, collector)
|
||||
|
||||
return common_pb2.SubagentResult(
|
||||
status=status,
|
||||
summary=parsed.summary,
|
||||
artifacts=parsed.findings,
|
||||
artifacts=artifacts,
|
||||
result_quality=quality,
|
||||
source=source,
|
||||
new_memory_candidates=candidates,
|
||||
@@ -61,12 +73,15 @@ def build_partial_result(
|
||||
summary_prefix: str,
|
||||
used_tool_output: bool,
|
||||
used_web: bool,
|
||||
collector: OutputCollector | None = None,
|
||||
) -> common_pb2.SubagentResult:
|
||||
"""Build a partial result with agent-specific summary prefix."""
|
||||
source = determine_source(used_tool_output, used_web)
|
||||
artifacts = _build_artifacts(None, collector) if collector else []
|
||||
return common_pb2.SubagentResult(
|
||||
status=common_pb2.RESULT_STATUS_PARTIAL,
|
||||
summary=f"{summary_prefix} incomplete: {reason}",
|
||||
artifacts=artifacts,
|
||||
result_quality=common_pb2.RESULT_QUALITY_UNCERTAIN,
|
||||
source=source,
|
||||
)
|
||||
@@ -83,6 +98,40 @@ def build_failed_result(reason: str, summary_prefix: str) -> common_pb2.Subagent
|
||||
)
|
||||
|
||||
|
||||
def _build_artifacts(
|
||||
parsed: DoneSignalParsed | None,
|
||||
collector: OutputCollector | None,
|
||||
) -> list[common_pb2.Artifact]:
|
||||
"""Convert collected outputs and findings into proto Artifact messages."""
|
||||
artifacts: list[common_pb2.Artifact] = []
|
||||
|
||||
if collector:
|
||||
for entry in collector.entries:
|
||||
artifacts.append(
|
||||
common_pb2.Artifact(
|
||||
label=entry.label,
|
||||
content=entry.content,
|
||||
artifact_type=_ARTIFACT_TYPE_MAP.get(
|
||||
entry.artifact_type, common_pb2.ARTIFACT_TYPE_UNSPECIFIED
|
||||
),
|
||||
metadata=entry.metadata,
|
||||
)
|
||||
)
|
||||
|
||||
# Always include findings as text artifacts (model-curated summaries).
|
||||
if parsed and parsed.findings:
|
||||
for finding in parsed.findings:
|
||||
artifacts.append(
|
||||
common_pb2.Artifact(
|
||||
label="Finding",
|
||||
content=finding,
|
||||
artifact_type=common_pb2.ARTIFACT_TYPE_TEXT,
|
||||
)
|
||||
)
|
||||
|
||||
return artifacts
|
||||
|
||||
|
||||
def format_memory(mem_response) -> str:
|
||||
"""Format a QueryMemoryResponse into a context string."""
|
||||
entry = mem_response.entry
|
||||
|
||||
@@ -17,6 +17,7 @@ from .agent_utils import (
|
||||
)
|
||||
from .clients import MemoryClient, ModelGatewayClient, ToolBrokerClient
|
||||
from .config import AgentConfig
|
||||
from .output_collector import OutputCollector
|
||||
from .parser import DoneSignalParsed, ParseError, ToolCallParsed, parse_agent_response
|
||||
from .prompt import PromptBuilder
|
||||
|
||||
@@ -84,8 +85,12 @@ class BaseAgent:
|
||||
session_ctx, self.agent_type, request.task
|
||||
)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
self._logger.warning(
|
||||
"Tool discovery failed: code=%s, details=%s", e.code(), e.details()
|
||||
)
|
||||
return build_failed_result(
|
||||
f"Tool discovery failed: {e.code()}", self.summary_prefix
|
||||
f"Tool discovery failed: {e.code()}, details={e.details()}",
|
||||
self.summary_prefix,
|
||||
)
|
||||
|
||||
if not tools:
|
||||
@@ -120,126 +125,162 @@ class BaseAgent:
|
||||
prompt_builder.set_memory_context(memory_context)
|
||||
|
||||
# Step 4: Agent loop
|
||||
collector = OutputCollector()
|
||||
iteration = 0
|
||||
consecutive_failures = 0
|
||||
used_tool_output = False
|
||||
used_web_search = False
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
|
||||
while True:
|
||||
# Termination checks
|
||||
if iteration >= self._config.max_iterations:
|
||||
return build_partial_result(
|
||||
"Max iterations reached",
|
||||
self.summary_prefix,
|
||||
used_tool_output,
|
||||
used_web_search,
|
||||
)
|
||||
|
||||
elapsed = asyncio.get_event_loop().time() - start_time
|
||||
if elapsed >= self._config.timeout_seconds:
|
||||
return build_partial_result(
|
||||
"Timeout",
|
||||
self.summary_prefix,
|
||||
used_tool_output,
|
||||
used_web_search,
|
||||
)
|
||||
|
||||
if prompt_builder.needs_compaction():
|
||||
compacted = await prompt_builder.compact(self._gateway, session_ctx)
|
||||
if not compacted:
|
||||
try:
|
||||
while True:
|
||||
# Termination checks
|
||||
if iteration >= self._config.max_iterations:
|
||||
return build_partial_result(
|
||||
"Context overflow after compaction",
|
||||
"Max iterations reached",
|
||||
self.summary_prefix,
|
||||
used_tool_output,
|
||||
used_web_search,
|
||||
collector,
|
||||
)
|
||||
self._logger.info(
|
||||
"Context compacted for agent %s (iteration %d)",
|
||||
agent_id,
|
||||
iteration,
|
||||
)
|
||||
|
||||
if consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
|
||||
return build_failed_result(
|
||||
f"All tools failed after {_MAX_CONSECUTIVE_FAILURES} consecutive failures",
|
||||
self.summary_prefix,
|
||||
)
|
||||
elapsed = asyncio.get_event_loop().time() - start_time
|
||||
if elapsed >= self._config.timeout_seconds:
|
||||
return build_partial_result(
|
||||
"Timeout",
|
||||
self.summary_prefix,
|
||||
used_tool_output,
|
||||
used_web_search,
|
||||
collector,
|
||||
)
|
||||
|
||||
# Build prompt and call model
|
||||
prompt = prompt_builder.build()
|
||||
try:
|
||||
response_text = await self._gateway.stream_inference(
|
||||
session_ctx, prompt, max_tokens=max_tokens
|
||||
)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
return build_failed_result(
|
||||
f"Model gateway error: {e.code()}", self.summary_prefix
|
||||
)
|
||||
if prompt_builder.needs_compaction():
|
||||
compacted = await prompt_builder.compact(
|
||||
self._gateway, session_ctx
|
||||
)
|
||||
if not compacted:
|
||||
return build_partial_result(
|
||||
"Context overflow after compaction",
|
||||
self.summary_prefix,
|
||||
used_tool_output,
|
||||
used_web_search,
|
||||
collector,
|
||||
)
|
||||
self._logger.info(
|
||||
"Context compacted for agent %s (iteration %d)",
|
||||
agent_id,
|
||||
iteration,
|
||||
)
|
||||
|
||||
# Parse model output
|
||||
try:
|
||||
parsed = parse_agent_response(response_text)
|
||||
except ParseError as e:
|
||||
prompt_builder.add_reasoning(
|
||||
f"[Parse error: {e}. Please output valid JSON.]"
|
||||
)
|
||||
iteration += 1
|
||||
continue
|
||||
if consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
|
||||
return build_failed_result(
|
||||
f"All tools failed after {_MAX_CONSECUTIVE_FAILURES} consecutive failures",
|
||||
self.summary_prefix,
|
||||
)
|
||||
|
||||
if parsed is None:
|
||||
prompt_builder.add_reasoning(response_text)
|
||||
iteration += 1
|
||||
continue
|
||||
# Build prompt and call model
|
||||
prompt = prompt_builder.build()
|
||||
try:
|
||||
response_text = await self._gateway.stream_inference(
|
||||
session_ctx, prompt, max_tokens=max_tokens
|
||||
)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
self._logger.warning(
|
||||
"Model gateway error: code=%s, details=%s, iteration=%d",
|
||||
e.code(),
|
||||
e.details(),
|
||||
iteration,
|
||||
)
|
||||
return build_failed_result(
|
||||
f"Model gateway error: {e.code()}, details={e.details()}",
|
||||
self.summary_prefix,
|
||||
)
|
||||
|
||||
if isinstance(parsed, DoneSignalParsed):
|
||||
return build_success_result(parsed, used_tool_output, used_web_search)
|
||||
# Parse model output
|
||||
try:
|
||||
parsed = parse_agent_response(response_text)
|
||||
except ParseError as e:
|
||||
prompt_builder.add_reasoning(
|
||||
f"[Parse error: {e}. Please output valid JSON.]"
|
||||
)
|
||||
iteration += 1
|
||||
continue
|
||||
|
||||
if parsed is None:
|
||||
prompt_builder.add_reasoning(response_text)
|
||||
iteration += 1
|
||||
continue
|
||||
|
||||
if isinstance(parsed, DoneSignalParsed):
|
||||
return build_success_result(
|
||||
parsed, used_tool_output, used_web_search, collector
|
||||
)
|
||||
|
||||
if isinstance(parsed, ToolCallParsed):
|
||||
if parsed.tool not in tool_names:
|
||||
prompt_builder.add_tool_result(
|
||||
parsed.tool,
|
||||
f"Error: tool '{parsed.tool}' is not available. "
|
||||
f"Available tools: {', '.join(sorted(tool_names))}",
|
||||
False,
|
||||
agent_id,
|
||||
session_ctx.session_id,
|
||||
)
|
||||
iteration += 1
|
||||
consecutive_failures += 1
|
||||
continue
|
||||
|
||||
prompt_builder.add_tool_call(parsed.tool, parsed.parameters)
|
||||
try:
|
||||
output, success = await self._broker.execute_tool(
|
||||
session_ctx,
|
||||
self.agent_type,
|
||||
parsed.tool,
|
||||
parsed.parameters,
|
||||
)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
self._logger.warning(
|
||||
"Tool execution error: tool=%s, code=%s, details=%s",
|
||||
parsed.tool,
|
||||
e.code(),
|
||||
e.details(),
|
||||
)
|
||||
output = (
|
||||
f"Tool execution error: {e.code()}, details={e.details()}"
|
||||
)
|
||||
success = False
|
||||
|
||||
if isinstance(parsed, ToolCallParsed):
|
||||
if parsed.tool not in tool_names:
|
||||
prompt_builder.add_tool_result(
|
||||
parsed.tool,
|
||||
f"Error: tool '{parsed.tool}' is not available. "
|
||||
f"Available tools: {', '.join(sorted(tool_names))}",
|
||||
False,
|
||||
output,
|
||||
success,
|
||||
agent_id,
|
||||
session_ctx.session_id,
|
||||
)
|
||||
iteration += 1
|
||||
consecutive_failures += 1
|
||||
continue
|
||||
|
||||
prompt_builder.add_tool_call(parsed.tool, parsed.parameters)
|
||||
try:
|
||||
output, success = await self._broker.execute_tool(
|
||||
session_ctx,
|
||||
self.agent_type,
|
||||
parsed.tool,
|
||||
parsed.parameters,
|
||||
collector.collect(
|
||||
parsed.tool, parsed.parameters, output, success
|
||||
)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
output = f"Tool execution error: {e.code()}"
|
||||
success = False
|
||||
|
||||
prompt_builder.add_tool_result(
|
||||
parsed.tool,
|
||||
output,
|
||||
success,
|
||||
agent_id,
|
||||
session_ctx.session_id,
|
||||
)
|
||||
if success:
|
||||
if parsed.tool in self.tool_output_sources:
|
||||
used_tool_output = True
|
||||
if parsed.tool == "web_search":
|
||||
used_web_search = True
|
||||
consecutive_failures = 0
|
||||
else:
|
||||
consecutive_failures += 1
|
||||
|
||||
if success:
|
||||
if parsed.tool in self.tool_output_sources:
|
||||
used_tool_output = True
|
||||
if parsed.tool == "web_search":
|
||||
used_web_search = True
|
||||
consecutive_failures = 0
|
||||
else:
|
||||
consecutive_failures += 1
|
||||
iteration += 1
|
||||
|
||||
iteration += 1
|
||||
|
||||
return build_failed_result(
|
||||
"Unexpected loop exit", self.summary_prefix
|
||||
) # pragma: no cover
|
||||
return build_failed_result(
|
||||
"Unexpected loop exit", self.summary_prefix
|
||||
) # pragma: no cover
|
||||
finally:
|
||||
elapsed = asyncio.get_event_loop().time() - start_time
|
||||
self._logger.info(
|
||||
"Agent run finished: agent_id=%s, iterations=%d, elapsed=%.2fs",
|
||||
agent_id,
|
||||
iteration,
|
||||
elapsed,
|
||||
)
|
||||
|
||||
@@ -79,6 +79,7 @@ class Config(msgspec.Struct):
|
||||
|
||||
host: str = "[::]"
|
||||
port: int = 50058
|
||||
grpc_web_port: int | None = None
|
||||
model_gateway_addr: str = "model-gateway:50055"
|
||||
tool_broker_addr: str = "tool-broker:50057"
|
||||
memory_addr: str = "memory-service:50054"
|
||||
|
||||
@@ -93,6 +93,7 @@ class TaskDecomposer:
|
||||
Returns a list of SubtaskDefinition protos. Falls back to a single-task
|
||||
plan if decomposition fails for any reason.
|
||||
"""
|
||||
response_text = ""
|
||||
try:
|
||||
prompt = _build_prompt(user_message)
|
||||
response_text = await self._gateway.inference(
|
||||
@@ -106,10 +107,18 @@ class TaskDecomposer:
|
||||
_validate_decomposition(subtasks, self._config.max_subtasks)
|
||||
return _to_proto_subtasks(subtasks)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
logger.warning("Gateway error during decomposition: %s", e.code())
|
||||
logger.warning(
|
||||
"Gateway error during decomposition: code=%s, details=%s",
|
||||
e.code(),
|
||||
e.details(),
|
||||
)
|
||||
return _build_fallback_plan(user_message)
|
||||
except (DecompositionError, orjson.JSONDecodeError) as e:
|
||||
logger.warning("Decomposition failed: %s", e)
|
||||
logger.warning(
|
||||
"Decomposition parsing failed: %s, response_preview=%.200s",
|
||||
e,
|
||||
response_text[:200],
|
||||
)
|
||||
return _build_fallback_plan(user_message)
|
||||
|
||||
|
||||
|
||||
121
services/orchestrator/src/orchestrator/grpc_web_gateway.py
Normal file
121
services/orchestrator/src/orchestrator/grpc_web_gateway.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Minimal gRPC-Web HTTP gateway.
|
||||
|
||||
Translates gRPC-Web (HTTP/1.1) requests into native gRPC calls to the
|
||||
local orchestrator server and streams back gRPC-Web framed responses.
|
||||
This eliminates the need for an external proxy (Caddy/Envoy) to handle
|
||||
the gRPC-Web protocol translation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import struct
|
||||
|
||||
import grpc
|
||||
from aiohttp import web
|
||||
from llm_multiverse.v1 import orchestrator_pb2_grpc
|
||||
|
||||
logger = logging.getLogger("orchestrator.grpc_web")
|
||||
|
||||
|
||||
def _encode_frame(payload: bytes, *, trailer: bool = False) -> bytes:
|
||||
"""Encode a gRPC-Web frame: [flags:1][length:4][payload:N]."""
|
||||
flags = 0x80 if trailer else 0x00
|
||||
return struct.pack(">BI", flags, len(payload)) + payload
|
||||
|
||||
|
||||
def _decode_frame(data: bytes) -> bytes:
|
||||
"""Decode a single gRPC-Web data frame and return the protobuf payload."""
|
||||
if len(data) < 5:
|
||||
raise ValueError("gRPC-Web frame too short")
|
||||
_flags, length = struct.unpack(">BI", data[:5])
|
||||
if len(data) < 5 + length:
|
||||
raise ValueError("gRPC-Web frame payload truncated")
|
||||
return data[5 : 5 + length]
|
||||
|
||||
|
||||
def _status_code_number(code: grpc.StatusCode) -> int:
|
||||
"""Extract the numeric gRPC status code."""
|
||||
return code.value[0]
|
||||
|
||||
|
||||
async def _handle_grpc_web(request: web.Request) -> web.StreamResponse:
|
||||
"""Proxy a gRPC-Web request to the local gRPC server."""
|
||||
channel: grpc.aio.Channel = request.app["grpc_channel"]
|
||||
stub = orchestrator_pb2_grpc.OrchestratorServiceStub(channel)
|
||||
|
||||
body = await request.read()
|
||||
proto_bytes = _decode_frame(body)
|
||||
|
||||
from llm_multiverse.v1 import orchestrator_pb2
|
||||
|
||||
req = orchestrator_pb2.ProcessRequestRequest()
|
||||
req.ParseFromString(proto_bytes)
|
||||
|
||||
response = web.StreamResponse(
|
||||
status=200,
|
||||
headers={
|
||||
"Content-Type": "application/grpc-web+proto",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
)
|
||||
await response.prepare(request)
|
||||
|
||||
grpc_status = 0
|
||||
grpc_message = ""
|
||||
|
||||
try:
|
||||
async for resp in stub.ProcessRequest(req):
|
||||
frame = _encode_frame(resp.SerializeToString())
|
||||
await response.write(frame)
|
||||
except grpc.aio.AioRpcError as e:
|
||||
grpc_status = _status_code_number(e.code())
|
||||
grpc_message = e.details() or ""
|
||||
logger.warning(
|
||||
"gRPC-Web proxy error: code=%s, details=%s", e.code(), e.details()
|
||||
)
|
||||
|
||||
trailer_text = f"grpc-status: {grpc_status}\r\n"
|
||||
if grpc_message:
|
||||
trailer_text += f"grpc-message: {grpc_message}\r\n"
|
||||
await response.write(_encode_frame(trailer_text.encode(), trailer=True))
|
||||
await response.write_eof()
|
||||
return response
|
||||
|
||||
|
||||
async def _handle_cors(request: web.Request) -> web.Response:
|
||||
"""Handle CORS preflight for gRPC-Web requests."""
|
||||
return web.Response(
|
||||
headers={
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": (
|
||||
"Content-Type, X-Grpc-Web, X-User-Agent"
|
||||
),
|
||||
"Access-Control-Max-Age": "86400",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def start_gateway(grpc_addr: str, port: int) -> web.AppRunner:
|
||||
"""Start the gRPC-Web HTTP gateway on the given port.
|
||||
|
||||
Connects to the native gRPC server at *grpc_addr* and exposes an
|
||||
HTTP/1.1 endpoint that speaks gRPC-Web.
|
||||
"""
|
||||
app = web.Application()
|
||||
app["grpc_channel"] = grpc.aio.insecure_channel(grpc_addr)
|
||||
|
||||
app.router.add_post(
|
||||
"/llm_multiverse.v1.OrchestratorService/{method}", _handle_grpc_web
|
||||
)
|
||||
app.router.add_options(
|
||||
"/llm_multiverse.v1.OrchestratorService/{method}", _handle_cors
|
||||
)
|
||||
|
||||
runner = web.AppRunner(app, access_log=None)
|
||||
await runner.setup()
|
||||
site = web.TCPSite(runner, "0.0.0.0", port)
|
||||
await site.start()
|
||||
logger.info("gRPC-Web gateway listening on 0.0.0.0:%d -> %s", port, grpc_addr)
|
||||
return runner
|
||||
144
services/orchestrator/src/orchestrator/output_collector.py
Normal file
144
services/orchestrator/src/orchestrator/output_collector.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""Lightweight collector for concrete tool outputs during the agent loop."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Maximum content length to store per artifact (256 KB).
|
||||
_MAX_CONTENT_LEN = 256 * 1024
|
||||
|
||||
# Tools whose outputs are context, not user-facing artifacts.
|
||||
_SKIP_TOOLS = frozenset({"memory_read", "memory_write"})
|
||||
|
||||
# Tool name → artifact type string (matches ArtifactType enum names).
|
||||
_TOOL_TYPE_MAP: dict[str, str] = {
|
||||
"fs_write": "code",
|
||||
"fs_read": "text",
|
||||
"run_code": "command_output",
|
||||
"run_shell": "command_output",
|
||||
"web_search": "search_result",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CollectedOutput:
|
||||
"""A single artifact captured from a tool execution."""
|
||||
|
||||
tool_name: str
|
||||
label: str
|
||||
content: str
|
||||
artifact_type: str
|
||||
metadata: dict[str, str]
|
||||
|
||||
|
||||
class OutputCollector:
|
||||
"""Collects concrete outputs during the agent tool call loop.
|
||||
|
||||
Populated after each successful tool execution. Maps tool names to
|
||||
artifact types so downstream code can build structured Artifact protos.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._entries: list[CollectedOutput] = []
|
||||
|
||||
def collect(
|
||||
self,
|
||||
tool_name: str,
|
||||
parameters: dict,
|
||||
output: str,
|
||||
success: bool,
|
||||
) -> None:
|
||||
"""Collect output from a tool execution.
|
||||
|
||||
Only successful calls for known, user-facing tools are collected.
|
||||
"""
|
||||
if not success:
|
||||
return
|
||||
if tool_name in _SKIP_TOOLS:
|
||||
return
|
||||
|
||||
artifact_type = _TOOL_TYPE_MAP.get(tool_name)
|
||||
if artifact_type is None:
|
||||
return
|
||||
|
||||
if tool_name == "fs_write":
|
||||
label = parameters.get("path", parameters.get("file_path", "file"))
|
||||
content = parameters.get("content", "")
|
||||
metadata = {"path": label}
|
||||
lang = _guess_language(label)
|
||||
if lang:
|
||||
metadata["language"] = lang
|
||||
elif tool_name == "fs_read":
|
||||
label = parameters.get("path", parameters.get("file_path", "file"))
|
||||
content = output
|
||||
metadata = {"path": label}
|
||||
lang = _guess_language(label)
|
||||
if lang:
|
||||
metadata["language"] = lang
|
||||
elif tool_name == "run_code":
|
||||
label = "Code execution"
|
||||
content = output
|
||||
lang = parameters.get("language", "")
|
||||
metadata = {"language": lang} if lang else {}
|
||||
elif tool_name == "run_shell":
|
||||
cmd = parameters.get("command", "")
|
||||
label = cmd[:80] if cmd else "Shell command"
|
||||
content = output
|
||||
metadata = {"command": cmd} if cmd else {}
|
||||
elif tool_name == "web_search":
|
||||
label = parameters.get("query", "Search")
|
||||
content = output
|
||||
metadata = {}
|
||||
else:
|
||||
return # pragma: no cover
|
||||
|
||||
# Truncate oversized content.
|
||||
if len(content) > _MAX_CONTENT_LEN:
|
||||
content = content[:_MAX_CONTENT_LEN] + "\n... [truncated]"
|
||||
|
||||
self._entries.append(
|
||||
CollectedOutput(
|
||||
tool_name=tool_name,
|
||||
label=label,
|
||||
content=content,
|
||||
artifact_type=artifact_type,
|
||||
metadata=metadata,
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def entries(self) -> list[CollectedOutput]:
|
||||
"""Return a copy of all collected outputs."""
|
||||
return list(self._entries)
|
||||
|
||||
|
||||
def _guess_language(path: str) -> str:
|
||||
"""Guess programming language from file extension."""
|
||||
ext_map = {
|
||||
".py": "python",
|
||||
".js": "javascript",
|
||||
".ts": "typescript",
|
||||
".tsx": "typescript",
|
||||
".jsx": "javascript",
|
||||
".rs": "rust",
|
||||
".go": "go",
|
||||
".java": "java",
|
||||
".rb": "ruby",
|
||||
".sh": "bash",
|
||||
".yaml": "yaml",
|
||||
".yml": "yaml",
|
||||
".json": "json",
|
||||
".toml": "toml",
|
||||
".html": "html",
|
||||
".css": "css",
|
||||
".sql": "sql",
|
||||
".md": "markdown",
|
||||
".c": "c",
|
||||
".cpp": "cpp",
|
||||
".h": "c",
|
||||
".hpp": "cpp",
|
||||
}
|
||||
for ext, lang in ext_map.items():
|
||||
if path.endswith(ext):
|
||||
return lang
|
||||
return ""
|
||||
@@ -125,6 +125,13 @@ When done, respond with a JSON result block:
|
||||
"confidence": "VERIFIED|INFERRED|UNCERTAIN", \
|
||||
"memory_candidates": [{"content": "<fact>", "confidence": 0.0-1.0}]}
|
||||
|
||||
## IMPORTANT: Always Use Tools for Code
|
||||
- When asked to write or generate code, you MUST use `fs_write` to write the \
|
||||
code to a file and `run_code` to verify it. Do NOT just describe the code in \
|
||||
your summary or findings — the user needs the actual code as a tool artifact.
|
||||
- For new code requests, write to `/workspace/` (e.g. `/workspace/fibonacci.py`).
|
||||
- When using `run_code`, set working_dir to `/workspace/output`.
|
||||
|
||||
## Constraints
|
||||
- Do NOT fabricate file contents. Always read files before modifying them.
|
||||
- Do NOT attempt to use tools not listed in your capabilities.
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import grpc
|
||||
from google.protobuf import timestamp_pb2
|
||||
@@ -135,9 +136,12 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
|
||||
|
||||
Pipeline: decompose → filter → dispatch → confidence → memory gate → respond.
|
||||
"""
|
||||
start_time = time.monotonic()
|
||||
logger.info(
|
||||
"ProcessRequest: session=%s, message=%s",
|
||||
"ProcessRequest: session=%s, message_len=%d, has_config=%s, preview=%.100s",
|
||||
request.session_id,
|
||||
len(request.user_message),
|
||||
request.HasField("session_config"),
|
||||
request.user_message[:100],
|
||||
)
|
||||
|
||||
@@ -179,10 +183,17 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
|
||||
try:
|
||||
subtasks = await decomposer.decompose(session_ctx, request.user_message)
|
||||
except Exception as e:
|
||||
logger.error("Decomposition failed: %s", e)
|
||||
elapsed = time.monotonic() - start_time
|
||||
logger.error(
|
||||
"Decomposition failed: session=%s, error=%s, elapsed=%.2fs",
|
||||
request.session_id,
|
||||
e,
|
||||
elapsed,
|
||||
exc_info=True,
|
||||
)
|
||||
yield orchestrator_pb2.ProcessRequestResponse(
|
||||
state=orchestrator_pb2.ORCHESTRATION_STATE_COMPLETE,
|
||||
message="Failed to decompose request. Please try again.",
|
||||
message=f"Failed to decompose request: {type(e).__name__}. Please try again.",
|
||||
)
|
||||
return
|
||||
|
||||
@@ -260,6 +271,15 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
|
||||
|
||||
outcomes = await dispatcher.dispatch_all()
|
||||
|
||||
for outcome in outcomes:
|
||||
logger.info(
|
||||
"Subtask outcome: session=%s, subtask=%s, status=%s%s",
|
||||
request.session_id,
|
||||
outcome.subtask_id,
|
||||
outcome.status,
|
||||
f", error={outcome.error}" if outcome.error else "",
|
||||
)
|
||||
|
||||
# Step 5: Evaluate confidence signals.
|
||||
# TODO: Wire ConfidenceReplanner to re-dispatch low-confidence subtasks.
|
||||
evaluator = ConfidenceEvaluator(self._config.confidence)
|
||||
@@ -289,14 +309,16 @@ class OrchestratorServiceImpl(orchestrator_pb2_grpc.OrchestratorServiceServicer)
|
||||
# Build final SubagentResult summary.
|
||||
final_result = _build_final_result(outcomes, confidence_report)
|
||||
|
||||
elapsed = time.monotonic() - start_time
|
||||
logger.info(
|
||||
"ProcessRequest complete: session=%s, subtasks=%d, "
|
||||
"confidence=%.2f, memory_accepted=%d/%d",
|
||||
"confidence=%.2f, memory_accepted=%d/%d, elapsed=%.2fs",
|
||||
request.session_id,
|
||||
len(subtasks),
|
||||
confidence_report.overall_confidence,
|
||||
gating_report.accepted_count,
|
||||
gating_report.total_candidates,
|
||||
elapsed,
|
||||
)
|
||||
|
||||
yield orchestrator_pb2.ProcessRequestResponse(
|
||||
@@ -341,7 +363,7 @@ def _synthesize_response(
|
||||
def _build_final_result(outcomes: list, confidence_report) -> common_pb2.SubagentResult:
|
||||
"""Build the final SubagentResult from all outcomes."""
|
||||
summaries: list[str] = []
|
||||
artifacts: list[str] = []
|
||||
artifacts: list[common_pb2.Artifact] = []
|
||||
all_candidates: list[common_pb2.MemoryCandidate] = []
|
||||
has_failure = False
|
||||
has_success = False
|
||||
|
||||
126
services/orchestrator/tests/test_agent_utils_artifacts.py
Normal file
126
services/orchestrator/tests/test_agent_utils_artifacts.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Tests for _build_artifacts in agent_utils."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from llm_multiverse.v1 import common_pb2
|
||||
|
||||
from orchestrator.agent_utils import _build_artifacts, build_success_result
|
||||
from orchestrator.output_collector import OutputCollector
|
||||
from orchestrator.parser import DoneSignalParsed
|
||||
|
||||
|
||||
def _make_parsed(
|
||||
summary: str = "Done.",
|
||||
findings: list[str] | None = None,
|
||||
confidence: str = "VERIFIED",
|
||||
) -> DoneSignalParsed:
|
||||
return DoneSignalParsed(
|
||||
summary=summary,
|
||||
findings=findings or [],
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
def test_build_artifacts_from_collector():
|
||||
"""Collector entries are converted to Artifact protos."""
|
||||
collector = OutputCollector()
|
||||
collector.collect("fs_write", {"path": "/main.py", "content": "print(1)"}, "ok", True)
|
||||
collector.collect("run_code", {"language": "python"}, "1\n", True)
|
||||
|
||||
parsed = _make_parsed()
|
||||
artifacts = _build_artifacts(parsed, collector)
|
||||
|
||||
assert len(artifacts) == 2
|
||||
assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_CODE
|
||||
assert artifacts[0].label == "/main.py"
|
||||
assert artifacts[0].content == "print(1)"
|
||||
assert artifacts[0].metadata["language"] == "python"
|
||||
|
||||
assert artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_COMMAND_OUTPUT
|
||||
assert artifacts[1].content == "1\n"
|
||||
|
||||
|
||||
def test_build_artifacts_findings_as_text():
|
||||
"""Findings are always included as TEXT artifacts."""
|
||||
parsed = _make_parsed(findings=["Fixed null check", "Improved error handling"])
|
||||
artifacts = _build_artifacts(parsed, None)
|
||||
|
||||
assert len(artifacts) == 2
|
||||
assert all(a.artifact_type == common_pb2.ARTIFACT_TYPE_TEXT for a in artifacts)
|
||||
assert artifacts[0].label == "Finding"
|
||||
assert artifacts[0].content == "Fixed null check"
|
||||
assert artifacts[1].content == "Improved error handling"
|
||||
|
||||
|
||||
def test_build_artifacts_collector_plus_findings():
|
||||
"""Both collector entries and findings are included."""
|
||||
collector = OutputCollector()
|
||||
collector.collect("web_search", {"query": "python docs"}, "results...", True)
|
||||
|
||||
parsed = _make_parsed(findings=["Found documentation"])
|
||||
artifacts = _build_artifacts(parsed, collector)
|
||||
|
||||
assert len(artifacts) == 2
|
||||
assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_SEARCH_RESULT
|
||||
assert artifacts[0].label == "python docs"
|
||||
assert artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
|
||||
assert artifacts[1].content == "Found documentation"
|
||||
|
||||
|
||||
def test_build_artifacts_empty():
|
||||
"""No collector and no findings produces empty list."""
|
||||
parsed = _make_parsed()
|
||||
artifacts = _build_artifacts(parsed, None)
|
||||
assert artifacts == []
|
||||
|
||||
|
||||
def test_build_artifacts_none_parsed():
|
||||
"""None parsed (partial result path) still builds from collector."""
|
||||
collector = OutputCollector()
|
||||
collector.collect("fs_read", {"path": "/test.py"}, "content", True)
|
||||
artifacts = _build_artifacts(None, collector)
|
||||
assert len(artifacts) == 1
|
||||
assert artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
|
||||
|
||||
|
||||
def test_build_success_result_with_collector():
|
||||
"""build_success_result includes structured artifacts from collector."""
|
||||
collector = OutputCollector()
|
||||
collector.collect("fs_write", {"path": "/app.ts", "content": "const x = 1;"}, "ok", True)
|
||||
|
||||
parsed = _make_parsed(findings=["Created app.ts"])
|
||||
result = build_success_result(parsed, True, False, collector)
|
||||
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert len(result.artifacts) == 2
|
||||
assert result.artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_CODE
|
||||
assert result.artifacts[0].label == "/app.ts"
|
||||
assert result.artifacts[0].content == "const x = 1;"
|
||||
assert result.artifacts[1].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
|
||||
assert result.artifacts[1].content == "Created app.ts"
|
||||
|
||||
|
||||
def test_build_success_result_backward_compat_no_collector():
|
||||
"""build_success_result without collector still works (findings only)."""
|
||||
parsed = _make_parsed(findings=["Some finding"])
|
||||
result = build_success_result(parsed, False, False)
|
||||
|
||||
assert len(result.artifacts) == 1
|
||||
assert result.artifacts[0].artifact_type == common_pb2.ARTIFACT_TYPE_TEXT
|
||||
assert result.artifacts[0].content == "Some finding"
|
||||
|
||||
|
||||
def test_artifact_metadata_map():
|
||||
"""Metadata map is correctly populated on proto Artifact."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
"fs_write",
|
||||
{"path": "/src/lib.rs", "content": "fn main() {}"},
|
||||
"ok",
|
||||
True,
|
||||
)
|
||||
parsed = _make_parsed()
|
||||
artifacts = _build_artifacts(parsed, collector)
|
||||
|
||||
assert artifacts[0].metadata["path"] == "/src/lib.rs"
|
||||
assert artifacts[0].metadata["language"] == "rust"
|
||||
@@ -96,7 +96,8 @@ async def test_simple_assistant_task():
|
||||
result = await agent.run(_make_request())
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert "Docker" in result.summary
|
||||
assert result.artifacts == ["Uses namespaces"]
|
||||
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
|
||||
assert "Uses namespaces" in finding_contents
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
|
||||
|
||||
|
||||
@@ -100,7 +100,8 @@ async def test_simple_coding_task():
|
||||
result = await agent.run(_make_request())
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert "Fixed" in result.summary
|
||||
assert result.artifacts == ["Fixed null check"]
|
||||
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
|
||||
assert "Fixed null check" in finding_contents
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
|
||||
|
||||
@@ -458,10 +459,10 @@ async def test_compaction_triggers_and_continues():
|
||||
'{"done": true, "summary": "Done after compaction.", "confidence": "VERIFIED"}',
|
||||
],
|
||||
exec_output="A" * 600,
|
||||
config=AgentConfig(max_iterations=20, max_tokens=1800),
|
||||
config=AgentConfig(max_iterations=20, max_tokens=2400),
|
||||
)
|
||||
agent._gateway.inference = AsyncMock(return_value="- Short summary")
|
||||
result = await agent.run(_make_request(max_tokens=1800))
|
||||
result = await agent.run(_make_request(max_tokens=2400))
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert "Done after compaction" in result.summary
|
||||
|
||||
|
||||
@@ -67,7 +67,8 @@ async def test_e2e_web_search_research_task():
|
||||
result = await ctx.agent.run(make_request())
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert "Paris" in result.summary
|
||||
assert result.artifacts == ["Paris"]
|
||||
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
|
||||
assert "Paris" in finding_contents
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
assert result.source == common_pb2.RESULT_SOURCE_WEB
|
||||
|
||||
@@ -306,9 +307,9 @@ async def test_e2e_subagent_result_schema():
|
||||
assert isinstance(result.summary, str)
|
||||
assert len(result.summary) > 0
|
||||
|
||||
# Artifacts is a list of strings
|
||||
# Artifacts is a list of Artifact proto messages
|
||||
assert len(result.artifacts) > 0
|
||||
assert all(isinstance(a, str) for a in result.artifacts)
|
||||
assert all(hasattr(a, "label") and hasattr(a, "content") for a in result.artifacts)
|
||||
|
||||
# Quality is valid enum
|
||||
assert result.result_quality in (
|
||||
|
||||
@@ -145,7 +145,7 @@ async def test_researcher_multi_step_web_search():
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
assert result.source == common_pb2.RESULT_SOURCE_WEB
|
||||
assert "Quantum" in result.summary or "quantum" in result.summary
|
||||
assert len(result.artifacts) == 3
|
||||
assert len(result.artifacts) == 5
|
||||
assert len(result.new_memory_candidates) == 1
|
||||
assert agent._broker.execute_tool.call_count == 2
|
||||
assert agent._broker.discover_tools.call_count == 1
|
||||
@@ -443,7 +443,8 @@ async def test_researcher_produces_valid_subagent_result():
|
||||
assert result.summary == "Validated result."
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
assert result.source == common_pb2.RESULT_SOURCE_WEB
|
||||
assert result.artifacts == ["finding1"]
|
||||
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
|
||||
assert "finding1" in finding_contents
|
||||
assert result.failure_reason == ""
|
||||
assert len(result.new_memory_candidates) == 1
|
||||
assert result.new_memory_candidates[0].content == "fact1"
|
||||
|
||||
207
services/orchestrator/tests/test_output_collector.py
Normal file
207
services/orchestrator/tests/test_output_collector.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Tests for the OutputCollector class."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from orchestrator.output_collector import OutputCollector
|
||||
|
||||
|
||||
def test_collect_fs_write():
|
||||
"""fs_write produces a CODE artifact with file content from parameters."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="fs_write",
|
||||
parameters={"path": "/src/main.py", "content": "def hello():\n pass\n"},
|
||||
output="File written successfully",
|
||||
success=True,
|
||||
)
|
||||
entries = collector.entries
|
||||
assert len(entries) == 1
|
||||
e = entries[0]
|
||||
assert e.tool_name == "fs_write"
|
||||
assert e.artifact_type == "code"
|
||||
assert e.label == "/src/main.py"
|
||||
assert e.content == "def hello():\n pass\n"
|
||||
assert e.metadata["path"] == "/src/main.py"
|
||||
assert e.metadata["language"] == "python"
|
||||
|
||||
|
||||
def test_collect_fs_write_file_path_param():
|
||||
"""fs_write also works with 'file_path' parameter key."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="fs_write",
|
||||
parameters={"file_path": "/app.js", "content": "console.log('hi')"},
|
||||
output="ok",
|
||||
success=True,
|
||||
)
|
||||
assert collector.entries[0].label == "/app.js"
|
||||
assert collector.entries[0].metadata["language"] == "javascript"
|
||||
|
||||
|
||||
def test_collect_fs_read():
|
||||
"""fs_read produces a TEXT artifact with tool output as content."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="fs_read",
|
||||
parameters={"path": "/config.yaml"},
|
||||
output="key: value\nother: 42",
|
||||
success=True,
|
||||
)
|
||||
entries = collector.entries
|
||||
assert len(entries) == 1
|
||||
e = entries[0]
|
||||
assert e.artifact_type == "text"
|
||||
assert e.label == "/config.yaml"
|
||||
assert e.content == "key: value\nother: 42"
|
||||
assert e.metadata["language"] == "yaml"
|
||||
|
||||
|
||||
def test_collect_run_code():
|
||||
"""run_code produces a COMMAND_OUTPUT artifact."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="run_code",
|
||||
parameters={"language": "python", "code": "print(42)"},
|
||||
output="42\n",
|
||||
success=True,
|
||||
)
|
||||
entries = collector.entries
|
||||
assert len(entries) == 1
|
||||
e = entries[0]
|
||||
assert e.artifact_type == "command_output"
|
||||
assert e.label == "Code execution"
|
||||
assert e.content == "42\n"
|
||||
assert e.metadata["language"] == "python"
|
||||
|
||||
|
||||
def test_collect_run_shell():
|
||||
"""run_shell produces a COMMAND_OUTPUT artifact with truncated command label."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="run_shell",
|
||||
parameters={"command": "ls -la /tmp"},
|
||||
output="total 4\ndrwxrwxrwt 2 root root 40 Jan 1 00:00 .\n",
|
||||
success=True,
|
||||
)
|
||||
entries = collector.entries
|
||||
assert len(entries) == 1
|
||||
e = entries[0]
|
||||
assert e.artifact_type == "command_output"
|
||||
assert e.label == "ls -la /tmp"
|
||||
assert e.metadata["command"] == "ls -la /tmp"
|
||||
|
||||
|
||||
def test_collect_web_search():
|
||||
"""web_search produces a SEARCH_RESULT artifact."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="web_search",
|
||||
parameters={"query": "python async tutorial"},
|
||||
output="Result 1: ...\nResult 2: ...",
|
||||
success=True,
|
||||
)
|
||||
entries = collector.entries
|
||||
assert len(entries) == 1
|
||||
e = entries[0]
|
||||
assert e.artifact_type == "search_result"
|
||||
assert e.label == "python async tutorial"
|
||||
assert "Result 1" in e.content
|
||||
|
||||
|
||||
def test_skips_failed_tools():
|
||||
"""Failed tool executions are not collected."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="fs_write",
|
||||
parameters={"path": "/test.py", "content": "code"},
|
||||
output="Permission denied",
|
||||
success=False,
|
||||
)
|
||||
assert len(collector.entries) == 0
|
||||
|
||||
|
||||
def test_skips_memory_tools():
|
||||
"""memory_read and memory_write are excluded."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="memory_read",
|
||||
parameters={"query": "something"},
|
||||
output="memory content",
|
||||
success=True,
|
||||
)
|
||||
collector.collect(
|
||||
tool_name="memory_write",
|
||||
parameters={"content": "save this"},
|
||||
output="ok",
|
||||
success=True,
|
||||
)
|
||||
assert len(collector.entries) == 0
|
||||
|
||||
|
||||
def test_skips_unknown_tools():
|
||||
"""Unknown tool names are ignored."""
|
||||
collector = OutputCollector()
|
||||
collector.collect(
|
||||
tool_name="custom_tool",
|
||||
parameters={"data": "stuff"},
|
||||
output="result",
|
||||
success=True,
|
||||
)
|
||||
assert len(collector.entries) == 0
|
||||
|
||||
|
||||
def test_multiple_tools_collected():
|
||||
"""Multiple tool outputs are collected in order."""
|
||||
collector = OutputCollector()
|
||||
collector.collect("fs_read", {"path": "/a.py"}, "content_a", True)
|
||||
collector.collect("fs_write", {"path": "/b.py", "content": "code_b"}, "ok", True)
|
||||
collector.collect("run_code", {"language": "python"}, "output_c", True)
|
||||
|
||||
entries = collector.entries
|
||||
assert len(entries) == 3
|
||||
assert entries[0].label == "/a.py"
|
||||
assert entries[1].label == "/b.py"
|
||||
assert entries[2].label == "Code execution"
|
||||
|
||||
|
||||
def test_content_truncation():
|
||||
"""Oversized content is truncated."""
|
||||
collector = OutputCollector()
|
||||
huge_content = "x" * (300 * 1024)
|
||||
collector.collect(
|
||||
tool_name="fs_read",
|
||||
parameters={"path": "/big.txt"},
|
||||
output=huge_content,
|
||||
success=True,
|
||||
)
|
||||
assert len(collector.entries) == 1
|
||||
assert len(collector.entries[0].content) < len(huge_content)
|
||||
assert collector.entries[0].content.endswith("... [truncated]")
|
||||
|
||||
|
||||
def test_entries_returns_copy():
|
||||
"""entries property returns a copy, not the internal list."""
|
||||
collector = OutputCollector()
|
||||
collector.collect("fs_read", {"path": "/a.py"}, "content", True)
|
||||
entries1 = collector.entries
|
||||
entries2 = collector.entries
|
||||
assert entries1 == entries2
|
||||
assert entries1 is not entries2
|
||||
|
||||
|
||||
def test_language_detection():
|
||||
"""Language is correctly guessed from various file extensions."""
|
||||
collector = OutputCollector()
|
||||
test_cases = [
|
||||
("/app.ts", "typescript"),
|
||||
("/style.css", "css"),
|
||||
("/main.go", "go"),
|
||||
("/lib.rs", "rust"),
|
||||
("/query.sql", "sql"),
|
||||
("/unknown.xyz", ""),
|
||||
]
|
||||
for path, expected_lang in test_cases:
|
||||
collector = OutputCollector()
|
||||
collector.collect("fs_read", {"path": path}, "content", True)
|
||||
actual = collector.entries[0].metadata.get("language", "")
|
||||
assert actual == expected_lang, f"Expected '{expected_lang}' for {path}, got '{actual}'"
|
||||
@@ -93,7 +93,8 @@ async def test_simple_research_task():
|
||||
result = await agent.run(_make_request())
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert "Paris" in result.summary
|
||||
assert result.artifacts == ["Paris"]
|
||||
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
|
||||
assert "Paris" in finding_contents
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
|
||||
|
||||
|
||||
@@ -419,7 +419,7 @@ def test_build_final_result_all_success():
|
||||
r1 = common_pb2.SubagentResult(
|
||||
status=common_pb2.RESULT_STATUS_SUCCESS,
|
||||
summary="Done A",
|
||||
artifacts=["file1.txt"],
|
||||
artifacts=[common_pb2.Artifact(label="Finding", content="file1.txt", artifact_type=common_pb2.ARTIFACT_TYPE_TEXT)],
|
||||
)
|
||||
outcomes = [SubtaskOutcome(subtask_id="t-1", status="success", result=r1)]
|
||||
report = ConfidenceReport(
|
||||
@@ -430,7 +430,7 @@ def test_build_final_result_all_success():
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
assert "Done A" in result.summary
|
||||
assert "file1.txt" in result.artifacts
|
||||
assert any(a.content == "file1.txt" for a in result.artifacts)
|
||||
|
||||
|
||||
def test_build_final_result_partial():
|
||||
|
||||
@@ -99,7 +99,8 @@ async def test_simple_sysadmin_task():
|
||||
result = await agent.run(_make_request())
|
||||
assert result.status == common_pb2.RESULT_STATUS_SUCCESS
|
||||
assert "nginx" in result.summary
|
||||
assert result.artifacts == ["Config valid"]
|
||||
finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
|
||||
assert "Config valid" in finding_contents
|
||||
assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ from readability import Document
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_USER_AGENT = (
|
||||
"Mozilla/5.0 (compatible; llm-multiverse-search/0.1; +https://localhost)"
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||
)
|
||||
DEFAULT_TIMEOUT = 10.0
|
||||
DEFAULT_MAX_CONTENT_LENGTH = 8000 # characters
|
||||
|
||||
Reference in New Issue
Block a user