llm-multiverse/services/orchestrator/tests/test_sysadmin.py

"""Tests for the sysadmin agent loop."""

from __future__ import annotations

from unittest.mock import AsyncMock

from llm_multiverse.v1 import common_pb2, memory_pb2, orchestrator_pb2, tool_broker_pb2

from orchestrator.agent_utils import format_memory
from orchestrator.sysadmin import SysadminAgent
from orchestrator.config import AgentConfig


def _make_request(
    task: str = "Configure nginx reverse proxy",
    memory_context: list[str] | None = None,
    max_tokens: int = 4096,
) -> orchestrator_pb2.SubagentRequest:
    ctx = common_pb2.SessionContext(session_id="sess-1", user_id="user-1")
    req = orchestrator_pb2.SubagentRequest(
        context=ctx,
        agent_id="sys-test",
        agent_type=common_pb2.AGENT_TYPE_SYSADMIN,
        task=task,
        max_tokens=max_tokens,
    )
    if memory_context:
        req.relevant_memory_context.extend(memory_context)
    return req


def _make_tool(name: str) -> tool_broker_pb2.ToolDefinition:
    return tool_broker_pb2.ToolDefinition(
        name=name,
        description=f"{name} tool",
        parameters={
            "query": tool_broker_pb2.ParameterSchema(type="string", description="Query")
        },
        required_params=["query"],
    )


def _make_agent(
    gateway_responses: list[str] | None = None,
    tools: list[tool_broker_pb2.ToolDefinition] | None = None,
    exec_output: str = "command output",
    exec_success: bool = True,
    memory_results: list | None = None,
    config: AgentConfig | None = None,
) -> SysadminAgent:
    gateway = AsyncMock()
    broker = AsyncMock()
    memory = AsyncMock()

    if gateway_responses is not None:
        gateway.stream_inference = AsyncMock(side_effect=gateway_responses)
    else:
        gateway.stream_inference = AsyncMock(
            return_value='{"done": true, "summary": "Done.", "confidence": "VERIFIED"}'
        )

    if tools is not None:
        broker.discover_tools = AsyncMock(return_value=tools)
    else:
        broker.discover_tools = AsyncMock(
            return_value=[
                _make_tool("run_shell"),
                _make_tool("package_install"),
                _make_tool("fs_read"),
                _make_tool("fs_write"),
            ]
        )

    broker.execute_tool = AsyncMock(return_value=(exec_output, exec_success))

    if memory_results is not None:
        memory.query_memory = AsyncMock(return_value=memory_results)
    else:
        memory.query_memory = AsyncMock(return_value=[])

    return SysadminAgent(
        model_gateway=gateway,
        tool_broker=broker,
        memory=memory,
        config=config or AgentConfig(),
    )


async def test_simple_sysadmin_task():
    """Model reads config, runs shell command, then signals done."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "fs_read", "parameters": {"file_path": "/etc/nginx/nginx.conf"}}',
            '{"tool": "run_shell", "parameters": {"command": "nginx -t"}}',
            '{"done": true, "summary": "Configured nginx.", "findings": ["Config valid"], "confidence": "VERIFIED"}',
        ],
        exec_output="nginx: configuration file syntax is ok",
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "nginx" in result.summary
    finding_contents = [a.content for a in result.artifacts if a.label == "Finding"]
    assert "Config valid" in finding_contents
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED


async def test_tool_discovery_uses_sysadmin_type():
    """Verify DiscoverTools uses AGENT_TYPE_SYSADMIN."""
    agent = _make_agent()
    await agent.run(_make_request())
    agent._broker.discover_tools.assert_called_once()
    args, kwargs = agent._broker.discover_tools.call_args
    all_values = list(args) + list(kwargs.values())
    assert common_pb2.AGENT_TYPE_SYSADMIN in all_values


async def test_no_tools_available():
    """Discover returns empty list — agent fails."""
    agent = _make_agent(tools=[])
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_FAILED
    assert "No tools" in result.failure_reason


async def test_tool_execution_uses_sysadmin_type():
    """Verify ExecuteTool uses AGENT_TYPE_SYSADMIN."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "run_shell", "parameters": {"command": "whoami"}}',
            '{"done": true, "summary": "Done.", "confidence": "VERIFIED"}',
        ],
    )
    await agent.run(_make_request())
    agent._broker.execute_tool.assert_called_once()
    args, kwargs = agent._broker.execute_tool.call_args
    all_values = list(args) + list(kwargs.values())
    assert common_pb2.AGENT_TYPE_SYSADMIN in all_values


async def test_memory_context_from_request():
    """Pre-filled memory context skips QueryMemory call."""
    agent = _make_agent()
    request = _make_request(memory_context=["Pre-loaded memory"])
    await agent.run(request)
    agent._memory.query_memory.assert_not_called()


async def test_memory_query_enrichment():
    """No pre-filled memory triggers QueryMemory call."""
    mem_result = memory_pb2.QueryMemoryResponse(
        rank=0,
        entry=memory_pb2.MemoryEntry(name="relevant-mem", description="Some info"),
        cosine_similarity=0.9,
    )
    agent = _make_agent(memory_results=[mem_result])
    await agent.run(_make_request())
    agent._memory.query_memory.assert_called_once()


async def test_max_iterations_termination():
    """Always returning tool calls hits max iterations."""
    agent = _make_agent(
        gateway_responses=['{"tool": "run_shell", "parameters": {"command": "ls"}}']
        * 15,
        config=AgentConfig(max_iterations=3),
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_PARTIAL
    assert "Max iterations" in result.summary


async def test_timeout_termination():
    """Timeout triggers partial result."""
    agent = _make_agent(config=AgentConfig(timeout_seconds=0))
    agent._gateway.stream_inference = AsyncMock(
        return_value='{"tool": "run_shell", "parameters": {"command": "ls"}}'
    )
    result = await agent.run(_make_request())
    assert result.status in (
        common_pb2.RESULT_STATUS_PARTIAL,
        common_pb2.RESULT_STATUS_SUCCESS,
    )


async def test_consecutive_tool_failures():
    """Three consecutive tool failures -> FAILED."""
    agent = _make_agent(
        gateway_responses=['{"tool": "run_shell", "parameters": {"command": "ls"}}']
        * 5,
        exec_success=False,
        exec_output="error",
        config=AgentConfig(max_iterations=10),
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_FAILED
    assert "consecutive failures" in result.failure_reason


async def test_model_gateway_error():
    """Gateway UNAVAILABLE -> FAILED."""
    import grpc

    agent = _make_agent()
    agent._gateway.stream_inference = AsyncMock(
        side_effect=grpc.aio.AioRpcError(
            grpc.StatusCode.UNAVAILABLE,
            initial_metadata=grpc.aio.Metadata(),
            trailing_metadata=grpc.aio.Metadata(),
            details="down",
        )
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_FAILED
    assert "gateway" in result.failure_reason.lower()


async def test_confidence_verified_maps_to_success():
    agent = _make_agent(
        gateway_responses=[
            '{"done": true, "summary": "Done.", "confidence": "VERIFIED"}'
        ]
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert result.result_quality == common_pb2.RESULT_QUALITY_VERIFIED


async def test_confidence_uncertain_maps_to_partial():
    agent = _make_agent(
        gateway_responses=[
            '{"done": true, "summary": "Not sure.", "confidence": "UNCERTAIN"}'
        ]
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_PARTIAL
    assert result.result_quality == common_pb2.RESULT_QUALITY_UNCERTAIN


async def test_run_shell_sets_source_tool_output():
    """Using run_shell sets source to RESULT_SOURCE_TOOL_OUTPUT."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "run_shell", "parameters": {"command": "systemctl status nginx"}}',
            '{"done": true, "summary": "Checked status.", "confidence": "VERIFIED"}',
        ],
        exec_output="active (running)",
    )
    result = await agent.run(_make_request())
    assert result.source == common_pb2.RESULT_SOURCE_TOOL_OUTPUT


async def test_package_install_sets_source_tool_output():
    """Using package_install sets source to RESULT_SOURCE_TOOL_OUTPUT."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "package_install", "parameters": {"packages": "nginx"}}',
            '{"done": true, "summary": "Installed nginx.", "confidence": "VERIFIED"}',
        ],
        exec_output="installed nginx",
    )
    result = await agent.run(_make_request())
    assert result.source == common_pb2.RESULT_SOURCE_TOOL_OUTPUT


async def test_fs_read_sets_source_tool_output():
    """Using fs_read sets source to RESULT_SOURCE_TOOL_OUTPUT."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "fs_read", "parameters": {"file_path": "/etc/hosts"}}',
            '{"done": true, "summary": "Read file.", "confidence": "VERIFIED"}',
        ],
        exec_output="127.0.0.1 localhost",
    )
    result = await agent.run(_make_request())
    assert result.source == common_pb2.RESULT_SOURCE_TOOL_OUTPUT


async def test_no_tools_sets_source_model_knowledge():
    """No tool usage sets source to RESULT_SOURCE_MODEL_KNOWLEDGE."""
    agent = _make_agent(
        gateway_responses=[
            '{"done": true, "summary": "I know this.", "confidence": "VERIFIED"}'
        ]
    )
    result = await agent.run(_make_request())
    assert result.source == common_pb2.RESULT_SOURCE_MODEL_KNOWLEDGE


async def test_unknown_tool_reports_error():
    """Model calls a tool not in discovered set."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "web_search", "parameters": {"query": "nginx docs"}}',
            '{"done": true, "summary": "Done.", "confidence": "UNCERTAIN"}',
        ],
    )
    result = await agent.run(_make_request())
    assert result.status in (
        common_pb2.RESULT_STATUS_PARTIAL,
        common_pb2.RESULT_STATUS_SUCCESS,
    )
    agent._broker.execute_tool.assert_not_called()


async def test_tool_discovery_grpc_error():
    """Tool discovery failure -> FAILED result."""
    import grpc

    agent = _make_agent()
    agent._broker.discover_tools = AsyncMock(
        side_effect=grpc.aio.AioRpcError(
            grpc.StatusCode.UNAVAILABLE,
            initial_metadata=grpc.aio.Metadata(),
            trailing_metadata=grpc.aio.Metadata(),
            details="down",
        )
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_FAILED
    assert "discovery" in result.failure_reason.lower()


async def test_memory_service_unavailable_continues():
    """Memory service error is handled gracefully."""
    import grpc

    agent = _make_agent(
        gateway_responses=[
            '{"done": true, "summary": "Done without memory.", "confidence": "VERIFIED"}'
        ]
    )
    agent._memory.query_memory = AsyncMock(
        side_effect=grpc.aio.AioRpcError(
            grpc.StatusCode.UNAVAILABLE,
            initial_metadata=grpc.aio.Metadata(),
            trailing_metadata=grpc.aio.Metadata(),
            details="down",
        )
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS


async def test_parse_error_handling():
    """ParseError from model output is handled."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "", "parameters": {}}',
            '{"done": true, "summary": "Done after error.", "confidence": "VERIFIED"}',
        ]
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "Done after error" in result.summary


async def test_plain_reasoning_continues():
    """Model produces plain text, then done signal."""
    agent = _make_agent(
        gateway_responses=[
            "Let me check the system configuration...",
            '{"done": true, "summary": "Analyzed.", "confidence": "INFERRED"}',
        ]
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert result.result_quality == common_pb2.RESULT_QUALITY_INFERRED


async def test_tool_execution_grpc_error():
    """Tool execution gRPC error is handled gracefully."""
    import grpc

    agent = _make_agent(
        gateway_responses=['{"tool": "run_shell", "parameters": {"command": "ls"}}']
        * 5,
        config=AgentConfig(max_iterations=10),
    )
    agent._broker.execute_tool = AsyncMock(
        side_effect=grpc.aio.AioRpcError(
            grpc.StatusCode.INTERNAL,
            initial_metadata=grpc.aio.Metadata(),
            trailing_metadata=grpc.aio.Metadata(),
            details="exec failed",
        )
    )
    result = await agent.run(_make_request())
    assert result.status == common_pb2.RESULT_STATUS_FAILED
    assert "consecutive failures" in result.failure_reason


def test_format_memory_with_description():
    entry = memory_pb2.MemoryEntry(name="test-mem", description="A description")
    response = memory_pb2.QueryMemoryResponse(
        rank=0, entry=entry, cosine_similarity=0.9
    )
    result = format_memory(response)
    assert "[Memory: test-mem]" in result
    assert "A description" in result


def test_format_memory_with_cached_segment():
    entry = memory_pb2.MemoryEntry(
        name="test-mem", description="desc", corpus="full corpus"
    )
    response = memory_pb2.QueryMemoryResponse(
        rank=0,
        entry=entry,
        cosine_similarity=0.9,
        cached_extracted_segment="extracted segment",
    )
    result = format_memory(response)
    assert "extracted segment" in result
    assert "full corpus" not in result


async def test_agent_id_prefix():
    """SysadminAgent generates agent IDs with 'sys-' prefix."""
    agent = _make_agent()
    request = _make_request()
    request.agent_id = ""  # Clear to trigger auto-generation
    await agent.run(request)


async def test_compaction_triggers_and_continues():
    """Compaction triggers mid-loop and allows the agent to continue."""
    agent = _make_agent(
        gateway_responses=[
            '{"tool": "run_shell", "parameters": {"command": "ls /etc"}}',
            '{"tool": "run_shell", "parameters": {"command": "cat /etc/hosts"}}',
            '{"tool": "run_shell", "parameters": {"command": "df -h"}}',
            '{"tool": "run_shell", "parameters": {"command": "free -m"}}',
            '{"done": true, "summary": "Done after compaction.", "confidence": "VERIFIED"}',
        ],
        exec_output="A" * 600,
        config=AgentConfig(max_iterations=20, max_tokens=1800),
    )
    agent._gateway.inference = AsyncMock(return_value="- Short summary")
    result = await agent.run(_make_request(max_tokens=1800))
    assert result.status == common_pb2.RESULT_STATUS_SUCCESS
    assert "Done after compaction" in result.summary


async def test_compaction_failure_gives_partial():
    """If compaction can't free enough space, loop terminates with PARTIAL."""
    agent = _make_agent(
        gateway_responses=['{"tool": "run_shell", "parameters": {"command": "x"}}']
        * 10,
        exec_output="B" * 500,
        config=AgentConfig(max_iterations=20, max_tokens=200),
    )
    agent._gateway.inference = AsyncMock(return_value="still big summary " * 50)
    result = await agent.run(_make_request(max_tokens=200))
    assert result.status == common_pb2.RESULT_STATUS_PARTIAL


async def test_partial_result_message():
    """Partial result uses sysadmin-specific message."""
    agent = _make_agent(
        gateway_responses=['{"tool": "run_shell", "parameters": {"command": "ls"}}']
        * 5,
        config=AgentConfig(max_iterations=2),
    )
    result = await agent.run(_make_request())
    assert "System administration incomplete" in result.summary


async def test_failed_result_message():
    """Failed result uses sysadmin-specific message."""
    agent = _make_agent(tools=[])
    result = await agent.run(_make_request())
    assert "System administration failed" in result.summary