Merge pull request 'fix: resolve tech debt from issue #31 review (#120)' (#130) from feature/issue-120-tech-debt-31-review into main

2026-03-10 09:39:07 +01:00
parent 4759accc63 960e5527e8
commit bb3a5f7348
7 changed files with 290 additions and 190 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -680,6 +680,21 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
 [[package]]
 name = "futures"
 version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
 dependencies = [
 "futures-channel",
 "futures-core",
 "futures-executor",
 "futures-io",
 "futures-sink",
 "futures-task",
 "futures-util",
 ]
 [[package]]
 name = "futures-channel"
 version = "0.3.32"
@@ -696,12 +711,34 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
 [[package]]
 name = "futures-executor"
 version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
 dependencies = [
 "futures-core",
 "futures-task",
 "futures-util",
 ]
 [[package]]
 name = "futures-io"
 version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
 [[package]]
 name = "futures-macro"
 version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "futures-sink"
 version = "0.3.32"
@@ -720,8 +757,10 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
 dependencies = [
 "futures-channel",
 "futures-core",
 "futures-io",
 "futures-macro",
 "futures-sink",
 "futures-task",
 "memchr",
@@ -1367,6 +1406,7 @@ dependencies = [
 "anyhow",
 "chrono",
 "duckdb",
 "futures",
 "llm-multiverse-proto",
 "prost",
 "prost-types",
--- a/implementation-plans/_index.md
+++ b/implementation-plans/_index.md
@@ -34,6 +34,7 @@
 | #114 | Tech debt: minor findings from issue #28 review | Phase 4 | `COMPLETED` | Rust | [issue-114.md](issue-114.md) |
 | #116 | Tech debt: minor findings from issue #29 review | Phase 4 | `COMPLETED` | Rust | [issue-116.md](issue-116.md) |
 | #118 | Tech debt: minor findings from issue #30 review | Phase 4 | `COMPLETED` | Rust | [issue-118.md](issue-118.md) |
 | #120 | Tech debt: minor findings from issue #31 review | Phase 4 | `COMPLETED` | Rust | [issue-120.md](issue-120.md) |
 ## Status Legend
--- a/implementation-plans/issue-120.md
+++ b/implementation-plans/issue-120.md
@@ -0,0 +1,43 @@
 # Issue #120: Tech debt: minor findings from issue #31 review
 ## Summary
 Address three tech debt items from the extraction step review:
 1. **Add [UNTRUSTED CONTENT] marker for external-provenance memories** - Wrap corpus text in `[UNTRUSTED CONTENT]...[END UNTRUSTED CONTENT]` markers when the memory has external provenance (provenance == 2), providing prompt injection mitigation.
 2. **Parallel extraction for batch candidates** - Change `extract_batch` from sequential to parallel processing using `futures::future::join_all`, reducing latency for multiple candidates.
 3. **Consolidate duplicate mock gateway test helpers** - Merge `FullMockGateway` (service.rs) and `TestGateway` (service.rs query_memory test) into a shared `test_helpers` module.
 ## Item 1: [UNTRUSTED CONTENT] markers
 ### Approach
 Add a `provenance` parameter to `build_extraction_prompt`. When `provenance == 2` (external), wrap the corpus block in `[UNTRUSTED CONTENT]...[END UNTRUSTED CONTENT]` markers. Update `extract` and `extract_batch` to pass the candidate's provenance field through.
 ### Files changed
 - `services/memory/src/extraction/prompt.rs` - Add provenance param to `build_extraction_prompt`
 - `services/memory/src/extraction/mod.rs` - Pass provenance through `extract` and `extract_batch`
 ## Item 2: Parallel extraction
 ### Approach
 Replace the sequential `for` loop in `extract_batch` with `futures::future::join_all` to process all candidates concurrently. The `ExtractionClient` wraps a `ModelGatewayServiceClient` which supports concurrent calls via `clone()`.
 ### Files changed
 - `services/memory/Cargo.toml` - Add `futures` dependency
 - `services/memory/src/extraction/mod.rs` - Rewrite `extract_batch` with `join_all`
 ## Item 3: Consolidate mock gateway helpers
 ### Approach
 Create a `test_helpers` module at the top of the `#[cfg(test)]` section in `service.rs` containing a single configurable `MockGateway` struct that handles both embedding and inference RPCs. Replace `FullMockGateway` and the inline `TestGateway` with this unified type.
 ### Files changed
 - `services/memory/src/service.rs` - Consolidate into shared `test_helpers` module, update test functions
--- a/services/memory/Cargo.toml
+++ b/services/memory/Cargo.toml
@@ -21,6 +21,7 @@ tokio-stream = "0.1"
 duckdb = { version = "1", features = ["bundled"] }
 chrono = "0.4"
 regex = "1"
 futures = "0.3"
 [dev-dependencies]
 tempfile = "3"
--- a/services/memory/src/extraction/mod.rs
+++ b/services/memory/src/extraction/mod.rs
@@ -67,6 +67,9 @@ impl ExtractionClient {
    /// 4. Include a confidence score (0.0-1.0)
    ///
    /// Uses `TaskComplexity::Simple` for lightweight model routing.
    ///
    /// The `provenance` parameter controls whether the corpus is wrapped in
    /// `[UNTRUSTED CONTENT]` markers (when `provenance == 2`, i.e. external).
    pub async fn extract(
        &self,
        context: &SessionContext,
@@ -74,6 +77,7 @@ impl ExtractionClient {
        corpus: &str,
        memory_name: &str,
        memory_id: &str,
        provenance: i32,
    ) -> Result<ExtractionResult, ExtractionError> {
        // Skip extraction for empty corpus
        if corpus.is_empty() {
@@ -84,7 +88,8 @@ impl ExtractionClient {
            });
        }
-        let extraction_prompt = prompt::build_extraction_prompt(query, corpus, memory_name);
+        let extraction_prompt =
            prompt::build_extraction_prompt(query, corpus, memory_name, provenance);
        let request = InferenceRequest {
            params: Some(InferenceParams {
@@ -104,9 +109,10 @@ impl ExtractionClient {
        Ok(prompt::parse_extraction_response(&response.text, memory_id))
    }
-    /// Extract segments for multiple candidates.
+    /// Extract segments for multiple candidates in parallel.
    ///
-    /// Processes candidates sequentially to avoid overloading the gateway.
+    /// Uses `futures::future::join_all` to process all candidates concurrently,
    /// reducing total latency compared to sequential processing.
    /// On failure for a single candidate, that candidate's extraction is
    /// skipped (returns the full corpus as fallback) and processing continues.
    pub async fn extract_batch(
@@ -115,9 +121,9 @@ impl ExtractionClient {
        query: &str,
        candidates: &[RetrievalCandidate],
    ) -> Vec<ExtractionResult> {
-        let mut results = Vec::with_capacity(candidates.len());
+        let futures: Vec<_> = candidates
-
+            .iter()
-        for candidate in candidates {
+            .map(|candidate| async {
                match self
                    .extract(
                        context,
@@ -125,26 +131,28 @@ impl ExtractionClient {
                        &candidate.corpus,
                        &candidate.name,
                        &candidate.memory_id,
                        candidate.provenance,
                    )
                    .await
                {
-                Ok(result) => results.push(result),
+                    Ok(result) => result,
                    Err(e) => {
                        tracing::warn!(
                            memory_id = %candidate.memory_id,
                            error = %e,
                            "Extraction failed for candidate, using full corpus as fallback"
                        );
-                    results.push(ExtractionResult {
+                        ExtractionResult {
                            memory_id: candidate.memory_id.clone(),
                            segment: candidate.corpus.clone(),
                            confidence: 0.0,
                    });
                        }
                    }
                }
            })
            .collect();
-        results
+        futures::future::join_all(futures).await
    }
 }
@@ -302,7 +310,7 @@ mod tests {
        };
        let result = client
-            .extract(&ctx, "test query", "full corpus text here", "mem name", "mem-1")
+            .extract(&ctx, "test query", "full corpus text here", "mem name", "mem-1", 1)
            .await
            .unwrap();
@@ -329,7 +337,7 @@ mod tests {
        };
        let result = client
-            .extract(&ctx, "test query", "corpus", "name", "mem-1")
+            .extract(&ctx, "test query", "corpus", "name", "mem-1", 1)
            .await;
        assert!(result.is_err());
@@ -359,7 +367,7 @@ mod tests {
        };
        let result = client
-            .extract(&ctx, "test query", "", "mem name", "mem-1")
+            .extract(&ctx, "test query", "", "mem name", "mem-1", 1)
            .await
            .unwrap();
--- a/services/memory/src/extraction/prompt.rs
+++ b/services/memory/src/extraction/prompt.rs
@@ -20,7 +20,22 @@ pub struct ExtractionResult {
 /// The prompt instructs the model to extract only the relevant segment
 /// from the corpus, given the query context. Output format is structured
 /// to enable reliable parsing.
-pub fn build_extraction_prompt(query: &str, corpus: &str, memory_name: &str) -> String {
+///
 /// When `provenance` is `2` (external), the corpus is wrapped in
 /// `[UNTRUSTED CONTENT]...[END UNTRUSTED CONTENT]` markers to mitigate
 /// prompt injection from external-provenance memories.
 pub fn build_extraction_prompt(
    query: &str,
    corpus: &str,
    memory_name: &str,
    provenance: i32,
 ) -> String {
    let corpus_block = if provenance == 2 {
        format!("[UNTRUSTED CONTENT]\n{corpus}\n[END UNTRUSTED CONTENT]")
    } else {
        corpus.to_string()
    };
    format!(
        "Given the following search query and memory content, extract ONLY the segment \
 of the content that is most relevant to the query. Return a concise, focused \
@@ -31,7 +46,7 @@ Memory name: {memory_name}\n\
 \n\
 Content:\n\
 ---\n\
-{corpus}\n\
+{corpus_block}\n\
 ---\n\
 \n\
 Respond in this exact format:\n\
@@ -99,22 +114,43 @@ mod tests {
    #[test]
    fn test_build_extraction_prompt_contains_query() {
-        let prompt = build_extraction_prompt("how to sort arrays", "some corpus", "mem name");
+        let prompt = build_extraction_prompt("how to sort arrays", "some corpus", "mem name", 1);
        assert!(prompt.contains("how to sort arrays"));
    }
    #[test]
    fn test_build_extraction_prompt_contains_corpus() {
-        let prompt = build_extraction_prompt("query", "the corpus content here", "mem name");
+        let prompt = build_extraction_prompt("query", "the corpus content here", "mem name", 1);
        assert!(prompt.contains("the corpus content here"));
    }
    #[test]
    fn test_build_extraction_prompt_contains_memory_name() {
-        let prompt = build_extraction_prompt("query", "corpus", "Rust Sort Algorithm");
+        let prompt = build_extraction_prompt("query", "corpus", "Rust Sort Algorithm", 1);
        assert!(prompt.contains("Rust Sort Algorithm"));
    }
    #[test]
    fn test_build_extraction_prompt_internal_no_untrusted_marker() {
        let prompt = build_extraction_prompt("query", "corpus text", "mem", 1);
        assert!(!prompt.contains("[UNTRUSTED CONTENT]"));
        assert!(!prompt.contains("[END UNTRUSTED CONTENT]"));
    }
    #[test]
    fn test_build_extraction_prompt_external_has_untrusted_marker() {
        let prompt = build_extraction_prompt("query", "external corpus", "mem", 2);
        assert!(prompt.contains("[UNTRUSTED CONTENT]"));
        assert!(prompt.contains("[END UNTRUSTED CONTENT]"));
        assert!(prompt.contains("external corpus"));
    }
    #[test]
    fn test_build_extraction_prompt_unspecified_no_untrusted_marker() {
        let prompt = build_extraction_prompt("query", "corpus text", "mem", 0);
        assert!(!prompt.contains("[UNTRUSTED CONTENT]"));
    }
    #[test]
    fn test_parse_extraction_response_valid() {
        let response = "SEGMENT: The quicksort algorithm divides the array.\nCONFIDENCE: 0.8";
--- a/services/memory/src/service.rs
+++ b/services/memory/src/service.rs
@@ -463,6 +463,113 @@ mod tests {
    use crate::config::{CacheConfig, ExtractionConfig, ProvenanceConfig, RetrievalConfig};
    use llm_multiverse_proto::llm_multiverse::v1::{MemoryEntry, SessionContext};
    /// Shared mock Model Gateway for tests that need embedding and/or inference RPCs.
    ///
    /// Consolidates `FullMockGateway` and the inline `TestGateway` that were
    /// previously duplicated across test functions.
    mod test_helpers {
        use crate::db::schema::EMBEDDING_DIM;
        use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::{
            ModelGatewayService, ModelGatewayServiceServer,
        };
        use llm_multiverse_proto::llm_multiverse::v1::{
            GenerateEmbeddingRequest, GenerateEmbeddingResponse, InferenceRequest,
            InferenceResponse, IsModelReadyRequest, IsModelReadyResponse,
            StreamInferenceRequest, StreamInferenceResponse,
        };
        use std::net::SocketAddr;
        use tonic::{Request, Response, Status};
        /// A configurable mock gateway that handles embedding and inference RPCs.
        ///
        /// - `inference_response`: If `Some`, inference calls return this text.
        ///   If `None`, inference calls return `Unimplemented`.
        /// - Embedding calls always return a vector of `0.10` values matching
        ///   `EMBEDDING_DIM`.
        pub struct MockGateway {
            pub inference_response: Option<String>,
        }
        impl MockGateway {
            /// Create a gateway that supports embeddings but not inference.
            pub fn embedding_only() -> Self {
                Self {
                    inference_response: None,
                }
            }
            /// Create a gateway that supports both embeddings and inference.
            pub fn full(inference_text: &str) -> Self {
                Self {
                    inference_response: Some(inference_text.to_string()),
                }
            }
        }
        #[tonic::async_trait]
        impl ModelGatewayService for MockGateway {
            type StreamInferenceStream =
                tokio_stream::Empty<Result<StreamInferenceResponse, Status>>;
            async fn stream_inference(
                &self,
                _request: Request<StreamInferenceRequest>,
            ) -> Result<Response<Self::StreamInferenceStream>, Status> {
                Err(Status::unimplemented("not needed"))
            }
            async fn inference(
                &self,
                _request: Request<InferenceRequest>,
            ) -> Result<Response<InferenceResponse>, Status> {
                match &self.inference_response {
                    Some(text) => Ok(Response::new(InferenceResponse {
                        text: text.clone(),
                        finish_reason: "stop".to_string(),
                        tokens_used: 30,
                    })),
                    None => Err(Status::unimplemented("not needed")),
                }
            }
            async fn generate_embedding(
                &self,
                _request: Request<GenerateEmbeddingRequest>,
            ) -> Result<Response<GenerateEmbeddingResponse>, Status> {
                let embedding = vec![0.10_f32; EMBEDDING_DIM];
                Ok(Response::new(GenerateEmbeddingResponse {
                    embedding,
                    dimensions: EMBEDDING_DIM as u32,
                }))
            }
            async fn is_model_ready(
                &self,
                _request: Request<IsModelReadyRequest>,
            ) -> Result<Response<IsModelReadyResponse>, Status> {
                Err(Status::unimplemented("not needed"))
            }
        }
        /// Start a mock Model Gateway server and return its address.
        pub async fn start_mock_gateway(gateway: MockGateway) -> SocketAddr {
            let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
                .await
                .unwrap();
            let addr = listener.local_addr().unwrap();
            let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener);
            tokio::spawn(async move {
                tonic::transport::Server::builder()
                    .add_service(ModelGatewayServiceServer::new(gateway))
                    .serve_with_incoming(incoming)
                    .await
                    .unwrap();
            });
            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
            addr
        }
    }
    fn valid_ctx() -> SessionContext {
        SessionContext {
            session_id: "sess-1".into(),
@@ -649,7 +756,6 @@ mod tests {
        .expect("DB setup failed");
        // Create service with mock embedding client via a real mock gateway server.
        // (MockEmbeddingGenerator can't be used directly since the field expects EmbeddingClient.)
        let db_arc = Arc::new(db);
        let mut svc = MemoryServiceImpl::new(
            db_arc,
@@ -658,75 +764,9 @@ mod tests {
            ExtractionConfig::default(),
            CacheConfig::default(),
        );
        // Set embedding client to mock by storing it as Arc<Mutex<dyn EmbeddingGenerator>>
        // Since the field type is Option<Arc<Mutex<EmbeddingClient>>>, we need a different approach.
        // Instead, set up a real mock gateway server.
        // Use the mock gateway server approach from embedding tests
        use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::{
            ModelGatewayService, ModelGatewayServiceServer,
        };
        use llm_multiverse_proto::llm_multiverse::v1::{
            GenerateEmbeddingRequest, GenerateEmbeddingResponse, InferenceRequest,
            InferenceResponse, IsModelReadyRequest, IsModelReadyResponse,
            StreamInferenceRequest, StreamInferenceResponse,
        };
        struct TestGateway;
        #[tonic::async_trait]
        impl ModelGatewayService for TestGateway {
            type StreamInferenceStream =
                tokio_stream::Empty<Result<StreamInferenceResponse, Status>>;
            async fn stream_inference(
                &self,
                _request: Request<StreamInferenceRequest>,
            ) -> Result<Response<Self::StreamInferenceStream>, Status> {
                Err(Status::unimplemented("not needed"))
            }
            async fn inference(
                &self,
                _request: Request<InferenceRequest>,
            ) -> Result<Response<InferenceResponse>, Status> {
                Err(Status::unimplemented("not needed"))
            }
            async fn generate_embedding(
                &self,
                _request: Request<GenerateEmbeddingRequest>,
            ) -> Result<Response<GenerateEmbeddingResponse>, Status> {
                // Return a vector with value 0.10 matching test data
                let embedding = vec![0.10_f32; EMBEDDING_DIM];
                Ok(Response::new(GenerateEmbeddingResponse {
                    embedding,
                    dimensions: EMBEDDING_DIM as u32,
                }))
            }
            async fn is_model_ready(
                &self,
                _request: Request<IsModelReadyRequest>,
            ) -> Result<Response<IsModelReadyResponse>, Status> {
                Err(Status::unimplemented("not needed"))
            }
        }
        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
            .await
            .expect("bind failed");
        let addr = listener.local_addr().expect("no local addr");
        let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener);
        tokio::spawn(async move {
            tonic::transport::Server::builder()
                .add_service(ModelGatewayServiceServer::new(TestGateway))
                .serve_with_incoming(incoming)
                .await
                .expect("server failed");
        });
        tokio::time::sleep(std::time::Duration::from_millis(50)).await;
        let addr =
            test_helpers::start_mock_gateway(test_helpers::MockGateway::embedding_only()).await;
        let embedding_client = crate::embedding::EmbeddingClient::connect(&format!("http://{addr}"))
            .await
            .expect("connect failed");
@@ -982,81 +1022,12 @@ mod tests {
        assert_eq!(prov.trust_level, crate::provenance::TrustLevel::Revoked);
    }
-    /// Helper to set up a mock gateway server that handles both embedding and inference.
+    /// Start a full mock gateway (embedding + inference) for extraction tests.
-    /// Returns the gateway address.
+    async fn start_full_mock_gateway() -> std::net::SocketAddr {
-    mod extraction_test_helpers {
+        test_helpers::start_mock_gateway(test_helpers::MockGateway::full(
-        use crate::db::schema::EMBEDDING_DIM;
+            "SEGMENT: Extracted relevant text.\nCONFIDENCE: 0.85",
-        use llm_multiverse_proto::llm_multiverse::v1::model_gateway_service_server::{
+        ))
            ModelGatewayService, ModelGatewayServiceServer,
        };
        use llm_multiverse_proto::llm_multiverse::v1::{
            GenerateEmbeddingRequest, GenerateEmbeddingResponse, InferenceRequest,
            InferenceResponse, IsModelReadyRequest, IsModelReadyResponse,
            StreamInferenceRequest, StreamInferenceResponse,
        };
        use std::net::SocketAddr;
        use tonic::{Request, Response, Status};
        pub struct FullMockGateway;
        #[tonic::async_trait]
        impl ModelGatewayService for FullMockGateway {
            type StreamInferenceStream =
                tokio_stream::Empty<Result<StreamInferenceResponse, Status>>;
            async fn stream_inference(
                &self,
                _request: Request<StreamInferenceRequest>,
            ) -> Result<Response<Self::StreamInferenceStream>, Status> {
                Err(Status::unimplemented("not needed"))
            }
            async fn inference(
                &self,
                _request: Request<InferenceRequest>,
            ) -> Result<Response<InferenceResponse>, Status> {
                Ok(Response::new(InferenceResponse {
                    text: "SEGMENT: Extracted relevant text.\nCONFIDENCE: 0.85".to_string(),
                    finish_reason: "stop".to_string(),
                    tokens_used: 30,
                }))
            }
            async fn generate_embedding(
                &self,
                _request: Request<GenerateEmbeddingRequest>,
            ) -> Result<Response<GenerateEmbeddingResponse>, Status> {
                let embedding = vec![0.10_f32; EMBEDDING_DIM];
                Ok(Response::new(GenerateEmbeddingResponse {
                    embedding,
                    dimensions: EMBEDDING_DIM as u32,
                }))
            }
            async fn is_model_ready(
                &self,
                _request: Request<IsModelReadyRequest>,
            ) -> Result<Response<IsModelReadyResponse>, Status> {
                Err(Status::unimplemented("not needed"))
            }
        }
        pub async fn start_full_mock_gateway() -> SocketAddr {
            let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
        .await
                .unwrap();
            let addr = listener.local_addr().unwrap();
            let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener);
            tokio::spawn(async move {
                tonic::transport::Server::builder()
                    .add_service(ModelGatewayServiceServer::new(FullMockGateway))
                    .serve_with_incoming(incoming)
                    .await
                    .unwrap();
            });
            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
            addr
        }
    }
    /// Helper to populate DB with test data for extraction tests.
@@ -1101,7 +1072,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)
@@ -1168,7 +1139,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)
@@ -1230,7 +1201,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)
@@ -1293,7 +1264,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)
@@ -1344,7 +1315,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)
@@ -1413,7 +1384,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)
@@ -1474,7 +1445,7 @@ mod tests {
        populate_test_db(&db);
        let db_arc = Arc::new(db);
-        let addr = extraction_test_helpers::start_full_mock_gateway().await;
+        let addr = start_full_mock_gateway().await;
        let endpoint = format!("http://{addr}");
        let embedding_client = crate::embedding::EmbeddingClient::connect(&endpoint)