Merge pull request 'feat: scaffold Model Gateway Rust project (#38)' (#136) from feature/issue-38-scaffold-model-gateway into main
This commit was merged in pull request #136.
This commit is contained in:
19
Cargo.lock
generated
19
Cargo.lock
generated
@@ -1450,6 +1450,25 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "model-gateway"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"llm-multiverse-proto",
|
||||
"prost",
|
||||
"prost-types",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"toml",
|
||||
"tonic",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multimap"
|
||||
version = "0.10.1"
|
||||
|
||||
@@ -5,4 +5,5 @@ members = [
|
||||
"services/audit",
|
||||
"services/secrets",
|
||||
"services/memory",
|
||||
"services/model-gateway",
|
||||
]
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
| #35 | Implement QueryMemory gRPC endpoint (server-streaming) | Phase 4 | `COMPLETED` | Rust | [issue-035.md](issue-035.md) |
|
||||
| #36 | Implement GetCorrelated gRPC endpoint | Phase 4 | `COMPLETED` | Rust | [issue-036.md](issue-036.md) |
|
||||
| #37 | Integration tests for Memory Service | Phase 4 | `COMPLETED` | Rust | [issue-037.md](issue-037.md) |
|
||||
| #38 | Scaffold Model Gateway Rust project | Phase 5 | `IMPLEMENTING` | Rust | [issue-038.md](issue-038.md) |
|
||||
|
||||
## Status Legend
|
||||
|
||||
|
||||
48
implementation-plans/issue-038.md
Normal file
48
implementation-plans/issue-038.md
Normal file
@@ -0,0 +1,48 @@
|
||||
# Implementation Plan — Issue #38: Scaffold Model Gateway Rust project
|
||||
|
||||
## Metadata
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Issue | [#38](https://git.shahondin1624.de/llm-multiverse/llm-multiverse/issues/38) |
|
||||
| Title | Scaffold Model Gateway Rust project |
|
||||
| Milestone | Phase 5: Model Gateway |
|
||||
| Labels | |
|
||||
| Status | `IMPLEMENTING` |
|
||||
| Language | Rust |
|
||||
| Related Plans | [issue-018.md](issue-018.md), [issue-022.md](issue-022.md), [issue-027.md](issue-027.md) |
|
||||
| Blocked by | #16 (completed) |
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Cargo workspace member created (`services/model-gateway/`)
|
||||
- [ ] Dependency on proto-gen crate
|
||||
- [ ] Tonic gRPC server boilerplate compiles
|
||||
- [ ] Configuration loading (address, port, Ollama URL, model routing config)
|
||||
- [ ] Health check endpoint responds
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. Add workspace member to root Cargo.toml
|
||||
2. Create `services/model-gateway/Cargo.toml` with proto-gen dependency
|
||||
3. Create `src/lib.rs` with module exports
|
||||
4. Create `src/config.rs` with Config + ModelRoutingConfig (TOML loading, serde defaults)
|
||||
5. Create `src/service.rs` with ModelGatewayService trait impl (IsModelReady + stubs)
|
||||
6. Create `src/main.rs` with tonic server entry point
|
||||
7. Unit tests for config
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
| File | Action | Purpose |
|
||||
|---|---|---|
|
||||
| `Cargo.toml` | Modify | Add workspace member |
|
||||
| `services/model-gateway/Cargo.toml` | Create | Crate manifest |
|
||||
| `services/model-gateway/src/lib.rs` | Create | Module exports |
|
||||
| `services/model-gateway/src/main.rs` | Create | Server entry point |
|
||||
| `services/model-gateway/src/config.rs` | Create | Configuration with TOML loading |
|
||||
| `services/model-gateway/src/service.rs` | Create | gRPC service impl |
|
||||
|
||||
## Deviation Log
|
||||
|
||||
| Deviation | Reason |
|
||||
|---|---|
|
||||
23
services/model-gateway/Cargo.toml
Normal file
23
services/model-gateway/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "model-gateway"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Model Gateway service for llm-multiverse — wraps Ollama HTTP API via gRPC"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
llm-multiverse-proto = { path = "../../gen/rust" }
|
||||
tonic = "0.13"
|
||||
prost = "0.13"
|
||||
prost-types = "0.13"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
toml = "0.8"
|
||||
anyhow = "1"
|
||||
thiserror = "2"
|
||||
tokio-stream = "0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
250
services/model-gateway/src/config.rs
Normal file
250
services/model-gateway/src/config.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Model routing configuration — maps task complexity to specific model names.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ModelRoutingConfig {
|
||||
/// Default model for unspecified complexity (default: "llama3.2:3b").
|
||||
#[serde(default = "default_default_model")]
|
||||
pub default_model: String,
|
||||
|
||||
/// Model for simple tasks (default: "llama3.2:3b").
|
||||
#[serde(default = "default_simple_model")]
|
||||
pub simple_model: String,
|
||||
|
||||
/// Model for complex tasks (default: "llama3.2:14b").
|
||||
#[serde(default = "default_complex_model")]
|
||||
pub complex_model: String,
|
||||
|
||||
/// Model for embedding generation (default: "nomic-embed-text").
|
||||
#[serde(default = "default_embedding_model")]
|
||||
pub embedding_model: String,
|
||||
|
||||
/// Additional model aliases (e.g., "code" -> "codellama:7b").
|
||||
#[serde(default)]
|
||||
pub aliases: HashMap<String, String>,
|
||||
}
|
||||
|
||||
fn default_default_model() -> String {
|
||||
"llama3.2:3b".to_string()
|
||||
}
|
||||
|
||||
fn default_simple_model() -> String {
|
||||
"llama3.2:3b".to_string()
|
||||
}
|
||||
|
||||
fn default_complex_model() -> String {
|
||||
"llama3.2:14b".to_string()
|
||||
}
|
||||
|
||||
fn default_embedding_model() -> String {
|
||||
"nomic-embed-text".to_string()
|
||||
}
|
||||
|
||||
impl Default for ModelRoutingConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
default_model: default_default_model(),
|
||||
simple_model: default_simple_model(),
|
||||
complex_model: default_complex_model(),
|
||||
embedding_model: default_embedding_model(),
|
||||
aliases: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ModelRoutingConfig {
|
||||
/// Return a list of all unique configured model names.
|
||||
pub fn all_models(&self) -> Vec<String> {
|
||||
let mut models = vec![
|
||||
self.default_model.clone(),
|
||||
self.simple_model.clone(),
|
||||
self.complex_model.clone(),
|
||||
self.embedding_model.clone(),
|
||||
];
|
||||
for v in self.aliases.values() {
|
||||
models.push(v.clone());
|
||||
}
|
||||
models.sort();
|
||||
models.dedup();
|
||||
models
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level configuration for the Model Gateway service.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Config {
|
||||
/// Listen host (default: "[::1]").
|
||||
#[serde(default = "default_host")]
|
||||
pub host: String,
|
||||
|
||||
/// Listen port (default: 50055).
|
||||
#[serde(default = "default_port")]
|
||||
pub port: u16,
|
||||
|
||||
/// Ollama HTTP API base URL (default: "http://localhost:11434").
|
||||
#[serde(default = "default_ollama_url")]
|
||||
pub ollama_url: String,
|
||||
|
||||
/// Audit service gRPC address for logging.
|
||||
pub audit_addr: Option<String>,
|
||||
|
||||
/// Model routing configuration.
|
||||
#[serde(default)]
|
||||
pub routing: ModelRoutingConfig,
|
||||
}
|
||||
|
||||
fn default_host() -> String {
|
||||
"[::1]".to_string()
|
||||
}
|
||||
|
||||
fn default_port() -> u16 {
|
||||
50055
|
||||
}
|
||||
|
||||
fn default_ollama_url() -> String {
|
||||
"http://localhost:11434".to_string()
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
host: default_host(),
|
||||
port: default_port(),
|
||||
ollama_url: default_ollama_url(),
|
||||
audit_addr: None,
|
||||
routing: ModelRoutingConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Load config from file, falling back to defaults.
|
||||
pub fn load(path: Option<&str>) -> anyhow::Result<Self> {
|
||||
match path {
|
||||
Some(p) => {
|
||||
let contents = std::fs::read_to_string(p)?;
|
||||
let config: Config = toml::from_str(&contents)?;
|
||||
Ok(config)
|
||||
}
|
||||
None => Ok(Self::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn listen_addr(&self) -> String {
|
||||
format!("{}:{}", self.host, self.port)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_default_config() {
|
||||
let config = Config::default();
|
||||
assert_eq!(config.host, "[::1]");
|
||||
assert_eq!(config.port, 50055);
|
||||
assert_eq!(config.ollama_url, "http://localhost:11434");
|
||||
assert!(config.audit_addr.is_none());
|
||||
assert_eq!(config.routing.default_model, "llama3.2:3b");
|
||||
assert_eq!(config.routing.embedding_model, "nomic-embed-text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_listen_addr() {
|
||||
let config = Config::default();
|
||||
assert_eq!(config.listen_addr(), "[::1]:50055");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_from_toml() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let config_path = dir.path().join("gateway.toml");
|
||||
std::fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
host = "0.0.0.0"
|
||||
port = 9999
|
||||
ollama_url = "http://gpu-server:11434"
|
||||
audit_addr = "http://[::1]:50052"
|
||||
|
||||
[routing]
|
||||
default_model = "mistral:7b"
|
||||
simple_model = "phi3:mini"
|
||||
complex_model = "llama3.2:70b"
|
||||
embedding_model = "mxbai-embed-large"
|
||||
|
||||
[routing.aliases]
|
||||
code = "codellama:7b"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
|
||||
assert_eq!(config.host, "0.0.0.0");
|
||||
assert_eq!(config.port, 9999);
|
||||
assert_eq!(config.ollama_url, "http://gpu-server:11434");
|
||||
assert_eq!(config.audit_addr.as_deref(), Some("http://[::1]:50052"));
|
||||
assert_eq!(config.routing.default_model, "mistral:7b");
|
||||
assert_eq!(config.routing.simple_model, "phi3:mini");
|
||||
assert_eq!(config.routing.complex_model, "llama3.2:70b");
|
||||
assert_eq!(config.routing.embedding_model, "mxbai-embed-large");
|
||||
assert_eq!(config.routing.aliases.get("code").unwrap(), "codellama:7b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_no_file_uses_defaults() {
|
||||
let config = Config::load(None).unwrap();
|
||||
assert_eq!(config.port, 50055);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_routing_defaults() {
|
||||
let routing = ModelRoutingConfig::default();
|
||||
assert_eq!(routing.default_model, "llama3.2:3b");
|
||||
assert_eq!(routing.simple_model, "llama3.2:3b");
|
||||
assert_eq!(routing.complex_model, "llama3.2:14b");
|
||||
assert_eq!(routing.embedding_model, "nomic-embed-text");
|
||||
assert!(routing.aliases.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_models() {
|
||||
let mut routing = ModelRoutingConfig::default();
|
||||
routing.aliases.insert("code".into(), "codellama:7b".into());
|
||||
|
||||
let models = routing.all_models();
|
||||
assert!(models.contains(&"llama3.2:3b".to_string()));
|
||||
assert!(models.contains(&"llama3.2:14b".to_string()));
|
||||
assert!(models.contains(&"nomic-embed-text".to_string()));
|
||||
assert!(models.contains(&"codellama:7b".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_models_deduplicates() {
|
||||
let routing = ModelRoutingConfig::default();
|
||||
// default_model and simple_model are both "llama3.2:3b"
|
||||
let models = routing.all_models();
|
||||
let count = models.iter().filter(|m| *m == "llama3.2:3b").count();
|
||||
assert_eq!(count, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_routing_from_toml_uses_defaults_when_omitted() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let config_path = dir.path().join("gateway.toml");
|
||||
std::fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
host = "0.0.0.0"
|
||||
port = 9999
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
|
||||
assert_eq!(config.routing.default_model, "llama3.2:3b");
|
||||
assert_eq!(config.routing.embedding_model, "nomic-embed-text");
|
||||
}
|
||||
}
|
||||
2
services/model-gateway/src/lib.rs
Normal file
2
services/model-gateway/src/lib.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod config;
|
||||
pub mod service;
|
||||
44
services/model-gateway/src/main.rs
Normal file
44
services/model-gateway/src/main.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::ModelGatewayServiceServer;
|
||||
use model_gateway::config::Config;
|
||||
use model_gateway::service::ModelGatewayServiceImpl;
|
||||
use tonic::transport::Server;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::from_default_env().add_directive("model_gateway=info".parse()?),
|
||||
)
|
||||
.init();
|
||||
|
||||
let config_path = std::env::var("MODEL_GATEWAY_CONFIG").ok();
|
||||
let config = Config::load(config_path.as_deref())?;
|
||||
|
||||
tracing::info!(
|
||||
addr = %config.listen_addr(),
|
||||
ollama_url = %config.ollama_url,
|
||||
default_model = %config.routing.default_model,
|
||||
"Starting Model Gateway"
|
||||
);
|
||||
|
||||
let addr = config.listen_addr().parse()?;
|
||||
let service = ModelGatewayServiceImpl::new(config);
|
||||
|
||||
tracing::info!(%addr, "Model Gateway listening");
|
||||
|
||||
Server::builder()
|
||||
.add_service(ModelGatewayServiceServer::new(service))
|
||||
.serve_with_shutdown(addr, shutdown_signal())
|
||||
.await?;
|
||||
|
||||
tracing::info!("Model Gateway shut down gracefully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn shutdown_signal() {
|
||||
tokio::signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to listen for ctrl+c");
|
||||
tracing::info!("Shutdown signal received");
|
||||
}
|
||||
182
services/model-gateway/src/service.rs
Normal file
182
services/model-gateway/src/service.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::ModelGatewayService;
|
||||
use llm_multiverse_proto::llm_multiverse::v1::{
|
||||
GenerateEmbeddingRequest, GenerateEmbeddingResponse, InferenceRequest, InferenceResponse,
|
||||
IsModelReadyRequest, IsModelReadyResponse, StreamInferenceRequest, StreamInferenceResponse,
|
||||
};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
use crate::config::Config;
|
||||
|
||||
/// Implementation of the ModelGatewayService gRPC trait.
|
||||
pub struct ModelGatewayServiceImpl {
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl ModelGatewayServiceImpl {
|
||||
pub fn new(config: Config) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl ModelGatewayService for ModelGatewayServiceImpl {
|
||||
type StreamInferenceStream =
|
||||
tokio_stream::wrappers::ReceiverStream<Result<StreamInferenceResponse, Status>>;
|
||||
|
||||
async fn stream_inference(
|
||||
&self,
|
||||
_request: Request<StreamInferenceRequest>,
|
||||
) -> Result<Response<Self::StreamInferenceStream>, Status> {
|
||||
Err(Status::unimplemented(
|
||||
"StreamInference not yet implemented",
|
||||
))
|
||||
}
|
||||
|
||||
async fn inference(
|
||||
&self,
|
||||
_request: Request<InferenceRequest>,
|
||||
) -> Result<Response<InferenceResponse>, Status> {
|
||||
Err(Status::unimplemented("Inference not yet implemented"))
|
||||
}
|
||||
|
||||
async fn generate_embedding(
|
||||
&self,
|
||||
_request: Request<GenerateEmbeddingRequest>,
|
||||
) -> Result<Response<GenerateEmbeddingResponse>, Status> {
|
||||
Err(Status::unimplemented(
|
||||
"GenerateEmbedding not yet implemented",
|
||||
))
|
||||
}
|
||||
|
||||
async fn is_model_ready(
|
||||
&self,
|
||||
request: Request<IsModelReadyRequest>,
|
||||
) -> Result<Response<IsModelReadyResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let all_models = self.config.routing.all_models();
|
||||
|
||||
if let Some(model_name) = &req.model_name {
|
||||
let available = all_models.contains(model_name);
|
||||
Ok(Response::new(IsModelReadyResponse {
|
||||
ready: available,
|
||||
available_models: if available {
|
||||
vec![model_name.clone()]
|
||||
} else {
|
||||
vec![]
|
||||
},
|
||||
error_message: if available {
|
||||
None
|
||||
} else {
|
||||
Some(format!("Model '{}' is not configured", model_name))
|
||||
},
|
||||
}))
|
||||
} else {
|
||||
Ok(Response::new(IsModelReadyResponse {
|
||||
ready: true,
|
||||
available_models: all_models,
|
||||
error_message: None,
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_config() -> Config {
|
||||
Config::default()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_is_model_ready_all_models() {
|
||||
let svc = ModelGatewayServiceImpl::new(test_config());
|
||||
let req = Request::new(IsModelReadyRequest { model_name: None });
|
||||
|
||||
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
|
||||
assert!(resp.ready);
|
||||
assert!(!resp.available_models.is_empty());
|
||||
assert!(resp.available_models.contains(&"llama3.2:3b".to_string()));
|
||||
assert!(resp
|
||||
.available_models
|
||||
.contains(&"nomic-embed-text".to_string()));
|
||||
assert!(resp.error_message.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_is_model_ready_specific_model_found() {
|
||||
let svc = ModelGatewayServiceImpl::new(test_config());
|
||||
let req = Request::new(IsModelReadyRequest {
|
||||
model_name: Some("llama3.2:3b".to_string()),
|
||||
});
|
||||
|
||||
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
|
||||
assert!(resp.ready);
|
||||
assert_eq!(resp.available_models, vec!["llama3.2:3b"]);
|
||||
assert!(resp.error_message.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_is_model_ready_specific_model_not_found() {
|
||||
let svc = ModelGatewayServiceImpl::new(test_config());
|
||||
let req = Request::new(IsModelReadyRequest {
|
||||
model_name: Some("nonexistent:latest".to_string()),
|
||||
});
|
||||
|
||||
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
|
||||
assert!(!resp.ready);
|
||||
assert!(resp.available_models.is_empty());
|
||||
assert!(resp.error_message.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_is_model_ready_with_aliases() {
|
||||
let mut config = test_config();
|
||||
config
|
||||
.routing
|
||||
.aliases
|
||||
.insert("code".into(), "codellama:7b".into());
|
||||
|
||||
let svc = ModelGatewayServiceImpl::new(config);
|
||||
let req = Request::new(IsModelReadyRequest {
|
||||
model_name: Some("codellama:7b".to_string()),
|
||||
});
|
||||
|
||||
let resp = svc.is_model_ready(req).await.unwrap().into_inner();
|
||||
assert!(resp.ready);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stream_inference_unimplemented() {
|
||||
let svc = ModelGatewayServiceImpl::new(test_config());
|
||||
let req = Request::new(StreamInferenceRequest { params: None });
|
||||
|
||||
let result = svc.stream_inference(req).await;
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inference_unimplemented() {
|
||||
let svc = ModelGatewayServiceImpl::new(test_config());
|
||||
let req = Request::new(InferenceRequest { params: None });
|
||||
|
||||
let result = svc.inference(req).await;
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_embedding_unimplemented() {
|
||||
let svc = ModelGatewayServiceImpl::new(test_config());
|
||||
let req = Request::new(GenerateEmbeddingRequest {
|
||||
context: None,
|
||||
text: "test".into(),
|
||||
model: None,
|
||||
});
|
||||
|
||||
let result = svc.generate_embedding(req).await;
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err().code(), tonic::Code::Unimplemented);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user