Merge pull request 'feat: scaffold Model Gateway Rust project (#38)' (#136) from feature/issue-38-scaffold-model-gateway into main

This commit was merged in pull request #136.
This commit is contained in:
2026-03-10 13:43:22 +01:00
9 changed files with 570 additions and 0 deletions

19
Cargo.lock generated
View File

@@ -1450,6 +1450,25 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "model-gateway"
version = "0.1.0"
dependencies = [
"anyhow",
"llm-multiverse-proto",
"prost",
"prost-types",
"serde",
"tempfile",
"thiserror",
"tokio",
"tokio-stream",
"toml",
"tonic",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "multimap"
version = "0.10.1"

View File

@@ -5,4 +5,5 @@ members = [
"services/audit",
"services/secrets",
"services/memory",
"services/model-gateway",
]

View File

@@ -41,6 +41,7 @@
| #35 | Implement QueryMemory gRPC endpoint (server-streaming) | Phase 4 | `COMPLETED` | Rust | [issue-035.md](issue-035.md) |
| #36 | Implement GetCorrelated gRPC endpoint | Phase 4 | `COMPLETED` | Rust | [issue-036.md](issue-036.md) |
| #37 | Integration tests for Memory Service | Phase 4 | `COMPLETED` | Rust | [issue-037.md](issue-037.md) |
| #38 | Scaffold Model Gateway Rust project | Phase 5 | `IMPLEMENTING` | Rust | [issue-038.md](issue-038.md) |
## Status Legend

View File

@@ -0,0 +1,48 @@
# Implementation Plan — Issue #38: Scaffold Model Gateway Rust project
## Metadata
| Field | Value |
|---|---|
| Issue | [#38](https://git.shahondin1624.de/llm-multiverse/llm-multiverse/issues/38) |
| Title | Scaffold Model Gateway Rust project |
| Milestone | Phase 5: Model Gateway |
| Labels | |
| Status | `IMPLEMENTING` |
| Language | Rust |
| Related Plans | [issue-018.md](issue-018.md), [issue-022.md](issue-022.md), [issue-027.md](issue-027.md) |
| Blocked by | #16 (completed) |
## Acceptance Criteria
- [ ] Cargo workspace member created (`services/model-gateway/`)
- [ ] Dependency on proto-gen crate
- [ ] Tonic gRPC server boilerplate compiles
- [ ] Configuration loading (address, port, Ollama URL, model routing config)
- [ ] Health check endpoint responds
## Implementation Steps
1. Add workspace member to root Cargo.toml
2. Create `services/model-gateway/Cargo.toml` with proto-gen dependency
3. Create `src/lib.rs` with module exports
4. Create `src/config.rs` with Config + ModelRoutingConfig (TOML loading, serde defaults)
5. Create `src/service.rs` with ModelGatewayService trait impl (IsModelReady + stubs)
6. Create `src/main.rs` with tonic server entry point
7. Unit tests for config
## Files to Create/Modify
| File | Action | Purpose |
|---|---|---|
| `Cargo.toml` | Modify | Add workspace member |
| `services/model-gateway/Cargo.toml` | Create | Crate manifest |
| `services/model-gateway/src/lib.rs` | Create | Module exports |
| `services/model-gateway/src/main.rs` | Create | Server entry point |
| `services/model-gateway/src/config.rs` | Create | Configuration with TOML loading |
| `services/model-gateway/src/service.rs` | Create | gRPC service impl |
## Deviation Log
| Deviation | Reason |
|---|---|

View File

@@ -0,0 +1,23 @@
[package]
name = "model-gateway"
version = "0.1.0"
edition = "2021"
description = "Model Gateway service for llm-multiverse — wraps Ollama HTTP API via gRPC"
publish = false
[dependencies]
llm-multiverse-proto = { path = "../../gen/rust" }
tonic = "0.13"
prost = "0.13"
prost-types = "0.13"
tokio = { version = "1", features = ["full"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
serde = { version = "1", features = ["derive"] }
toml = "0.8"
anyhow = "1"
thiserror = "2"
tokio-stream = "0.1"
[dev-dependencies]
tempfile = "3"

View File

@@ -0,0 +1,250 @@
use serde::Deserialize;
use std::collections::HashMap;
/// Model routing configuration — maps task complexity to specific model names.
#[derive(Debug, Clone, Deserialize)]
pub struct ModelRoutingConfig {
/// Default model for unspecified complexity (default: "llama3.2:3b").
#[serde(default = "default_default_model")]
pub default_model: String,
/// Model for simple tasks (default: "llama3.2:3b").
#[serde(default = "default_simple_model")]
pub simple_model: String,
/// Model for complex tasks (default: "llama3.2:14b").
#[serde(default = "default_complex_model")]
pub complex_model: String,
/// Model for embedding generation (default: "nomic-embed-text").
#[serde(default = "default_embedding_model")]
pub embedding_model: String,
/// Additional model aliases (e.g., "code" -> "codellama:7b").
#[serde(default)]
pub aliases: HashMap<String, String>,
}
fn default_default_model() -> String {
"llama3.2:3b".to_string()
}
fn default_simple_model() -> String {
"llama3.2:3b".to_string()
}
fn default_complex_model() -> String {
"llama3.2:14b".to_string()
}
fn default_embedding_model() -> String {
"nomic-embed-text".to_string()
}
impl Default for ModelRoutingConfig {
fn default() -> Self {
Self {
default_model: default_default_model(),
simple_model: default_simple_model(),
complex_model: default_complex_model(),
embedding_model: default_embedding_model(),
aliases: HashMap::new(),
}
}
}
impl ModelRoutingConfig {
/// Return a list of all unique configured model names.
pub fn all_models(&self) -> Vec<String> {
let mut models = vec![
self.default_model.clone(),
self.simple_model.clone(),
self.complex_model.clone(),
self.embedding_model.clone(),
];
for v in self.aliases.values() {
models.push(v.clone());
}
models.sort();
models.dedup();
models
}
}
/// Top-level configuration for the Model Gateway service.
#[derive(Debug, Clone, Deserialize)]
pub struct Config {
/// Listen host (default: "[::1]").
#[serde(default = "default_host")]
pub host: String,
/// Listen port (default: 50055).
#[serde(default = "default_port")]
pub port: u16,
/// Ollama HTTP API base URL (default: "http://localhost:11434").
#[serde(default = "default_ollama_url")]
pub ollama_url: String,
/// Audit service gRPC address for logging.
pub audit_addr: Option<String>,
/// Model routing configuration.
#[serde(default)]
pub routing: ModelRoutingConfig,
}
fn default_host() -> String {
"[::1]".to_string()
}
fn default_port() -> u16 {
50055
}
fn default_ollama_url() -> String {
"http://localhost:11434".to_string()
}
impl Default for Config {
fn default() -> Self {
Self {
host: default_host(),
port: default_port(),
ollama_url: default_ollama_url(),
audit_addr: None,
routing: ModelRoutingConfig::default(),
}
}
}
impl Config {
/// Load config from file, falling back to defaults.
pub fn load(path: Option<&str>) -> anyhow::Result<Self> {
match path {
Some(p) => {
let contents = std::fs::read_to_string(p)?;
let config: Config = toml::from_str(&contents)?;
Ok(config)
}
None => Ok(Self::default()),
}
}
pub fn listen_addr(&self) -> String {
format!("{}:{}", self.host, self.port)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = Config::default();
assert_eq!(config.host, "[::1]");
assert_eq!(config.port, 50055);
assert_eq!(config.ollama_url, "http://localhost:11434");
assert!(config.audit_addr.is_none());
assert_eq!(config.routing.default_model, "llama3.2:3b");
assert_eq!(config.routing.embedding_model, "nomic-embed-text");
}
#[test]
fn test_listen_addr() {
let config = Config::default();
assert_eq!(config.listen_addr(), "[::1]:50055");
}
#[test]
fn test_load_from_toml() {
let dir = tempfile::tempdir().unwrap();
let config_path = dir.path().join("gateway.toml");
std::fs::write(
&config_path,
r#"
host = "0.0.0.0"
port = 9999
ollama_url = "http://gpu-server:11434"
audit_addr = "http://[::1]:50052"
[routing]
default_model = "mistral:7b"
simple_model = "phi3:mini"
complex_model = "llama3.2:70b"
embedding_model = "mxbai-embed-large"
[routing.aliases]
code = "codellama:7b"
"#,
)
.unwrap();
let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
assert_eq!(config.host, "0.0.0.0");
assert_eq!(config.port, 9999);
assert_eq!(config.ollama_url, "http://gpu-server:11434");
assert_eq!(config.audit_addr.as_deref(), Some("http://[::1]:50052"));
assert_eq!(config.routing.default_model, "mistral:7b");
assert_eq!(config.routing.simple_model, "phi3:mini");
assert_eq!(config.routing.complex_model, "llama3.2:70b");
assert_eq!(config.routing.embedding_model, "mxbai-embed-large");
assert_eq!(config.routing.aliases.get("code").unwrap(), "codellama:7b");
}
#[test]
fn test_load_no_file_uses_defaults() {
let config = Config::load(None).unwrap();
assert_eq!(config.port, 50055);
}
#[test]
fn test_routing_defaults() {
let routing = ModelRoutingConfig::default();
assert_eq!(routing.default_model, "llama3.2:3b");
assert_eq!(routing.simple_model, "llama3.2:3b");
assert_eq!(routing.complex_model, "llama3.2:14b");
assert_eq!(routing.embedding_model, "nomic-embed-text");
assert!(routing.aliases.is_empty());
}
#[test]
fn test_all_models() {
let mut routing = ModelRoutingConfig::default();
routing.aliases.insert("code".into(), "codellama:7b".into());
let models = routing.all_models();
assert!(models.contains(&"llama3.2:3b".to_string()));
assert!(models.contains(&"llama3.2:14b".to_string()));
assert!(models.contains(&"nomic-embed-text".to_string()));
assert!(models.contains(&"codellama:7b".to_string()));
}
#[test]
fn test_all_models_deduplicates() {
let routing = ModelRoutingConfig::default();
// default_model and simple_model are both "llama3.2:3b"
let models = routing.all_models();
let count = models.iter().filter(|m| *m == "llama3.2:3b").count();
assert_eq!(count, 1);
}
#[test]
fn test_routing_from_toml_uses_defaults_when_omitted() {
let dir = tempfile::tempdir().unwrap();
let config_path = dir.path().join("gateway.toml");
std::fs::write(
&config_path,
r#"
host = "0.0.0.0"
port = 9999
"#,
)
.unwrap();
let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
assert_eq!(config.routing.default_model, "llama3.2:3b");
assert_eq!(config.routing.embedding_model, "nomic-embed-text");
}
}

View File

@@ -0,0 +1,2 @@
pub mod config;
pub mod service;

View File

@@ -0,0 +1,44 @@
use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::ModelGatewayServiceServer;
use model_gateway::config::Config;
use model_gateway::service::ModelGatewayServiceImpl;
use tonic::transport::Server;
use tracing_subscriber::EnvFilter;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
EnvFilter::from_default_env().add_directive("model_gateway=info".parse()?),
)
.init();
let config_path = std::env::var("MODEL_GATEWAY_CONFIG").ok();
let config = Config::load(config_path.as_deref())?;
tracing::info!(
addr = %config.listen_addr(),
ollama_url = %config.ollama_url,
default_model = %config.routing.default_model,
"Starting Model Gateway"
);
let addr = config.listen_addr().parse()?;
let service = ModelGatewayServiceImpl::new(config);
tracing::info!(%addr, "Model Gateway listening");
Server::builder()
.add_service(ModelGatewayServiceServer::new(service))
.serve_with_shutdown(addr, shutdown_signal())
.await?;
tracing::info!("Model Gateway shut down gracefully");
Ok(())
}
async fn shutdown_signal() {
tokio::signal::ctrl_c()
.await
.expect("failed to listen for ctrl+c");
tracing::info!("Shutdown signal received");
}

View File

@@ -0,0 +1,182 @@
use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::ModelGatewayService;
use llm_multiverse_proto::llm_multiverse::v1::{
GenerateEmbeddingRequest, GenerateEmbeddingResponse, InferenceRequest, InferenceResponse,
IsModelReadyRequest, IsModelReadyResponse, StreamInferenceRequest, StreamInferenceResponse,
};
use tonic::{Request, Response, Status};
use crate::config::Config;
/// Implementation of the ModelGatewayService gRPC trait.
pub struct ModelGatewayServiceImpl {
config: Config,
}
impl ModelGatewayServiceImpl {
pub fn new(config: Config) -> Self {
Self { config }
}
}
#[tonic::async_trait]
impl ModelGatewayService for ModelGatewayServiceImpl {
type StreamInferenceStream =
tokio_stream::wrappers::ReceiverStream<Result<StreamInferenceResponse, Status>>;
async fn stream_inference(
&self,
_request: Request<StreamInferenceRequest>,
) -> Result<Response<Self::StreamInferenceStream>, Status> {
Err(Status::unimplemented(
"StreamInference not yet implemented",
))
}
async fn inference(
&self,
_request: Request<InferenceRequest>,
) -> Result<Response<InferenceResponse>, Status> {
Err(Status::unimplemented("Inference not yet implemented"))
}
async fn generate_embedding(
&self,
_request: Request<GenerateEmbeddingRequest>,
) -> Result<Response<GenerateEmbeddingResponse>, Status> {
Err(Status::unimplemented(
"GenerateEmbedding not yet implemented",
))
}
async fn is_model_ready(
&self,
request: Request<IsModelReadyRequest>,
) -> Result<Response<IsModelReadyResponse>, Status> {
let req = request.into_inner();
let all_models = self.config.routing.all_models();
if let Some(model_name) = &req.model_name {
let available = all_models.contains(model_name);
Ok(Response::new(IsModelReadyResponse {
ready: available,
available_models: if available {
vec![model_name.clone()]
} else {
vec![]
},
error_message: if available {
None
} else {
Some(format!("Model '{}' is not configured", model_name))
},
}))
} else {
Ok(Response::new(IsModelReadyResponse {
ready: true,
available_models: all_models,
error_message: None,
}))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_config() -> Config {
Config::default()
}
#[tokio::test]
async fn test_is_model_ready_all_models() {
let svc = ModelGatewayServiceImpl::new(test_config());
let req = Request::new(IsModelReadyRequest { model_name: None });
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
assert!(resp.ready);
assert!(!resp.available_models.is_empty());
assert!(resp.available_models.contains(&"llama3.2:3b".to_string()));
assert!(resp
.available_models
.contains(&"nomic-embed-text".to_string()));
assert!(resp.error_message.is_none());
}
#[tokio::test]
async fn test_is_model_ready_specific_model_found() {
let svc = ModelGatewayServiceImpl::new(test_config());
let req = Request::new(IsModelReadyRequest {
model_name: Some("llama3.2:3b".to_string()),
});
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
assert!(resp.ready);
assert_eq!(resp.available_models, vec!["llama3.2:3b"]);
assert!(resp.error_message.is_none());
}
#[tokio::test]
async fn test_is_model_ready_specific_model_not_found() {
let svc = ModelGatewayServiceImpl::new(test_config());
let req = Request::new(IsModelReadyRequest {
model_name: Some("nonexistent:latest".to_string()),
});
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
assert!(!resp.ready);
assert!(resp.available_models.is_empty());
assert!(resp.error_message.is_some());
}
#[tokio::test]
async fn test_is_model_ready_with_aliases() {
let mut config = test_config();
config
.routing
.aliases
.insert("code".into(), "codellama:7b".into());
let svc = ModelGatewayServiceImpl::new(config);
let req = Request::new(IsModelReadyRequest {
model_name: Some("codellama:7b".to_string()),
});
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
assert!(resp.ready);
}
#[tokio::test]
async fn test_stream_inference_unimplemented() {
let svc = ModelGatewayServiceImpl::new(test_config());
let req = Request::new(StreamInferenceRequest { params: None });
let result = svc.stream_inference(req).await;
assert!(result.is_err());
assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
}
#[tokio::test]
async fn test_inference_unimplemented() {
let svc = ModelGatewayServiceImpl::new(test_config());
let req = Request::new(InferenceRequest { params: None });
let result = svc.inference(req).await;
assert!(result.is_err());
assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
}
#[tokio::test]
async fn test_generate_embedding_unimplemented() {
let svc = ModelGatewayServiceImpl::new(test_config());
let req = Request::new(GenerateEmbeddingRequest {
context: None,
text: "test".into(),
model: None,
});
let result = svc.generate_embedding(req).await;
assert!(result.is_err());
assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
}
}