Compare commits
2 Commits
09053a8af9
...
b0b98d5b1c
| Author | SHA1 | Date | |
|---|---|---|---|
| b0b98d5b1c | |||
|
|
92214cdb4d |
42
docker/docker-compose.searxng.yml
Normal file
42
docker/docker-compose.searxng.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
# Docker Compose for SearXNG — standalone testing
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker/docker-compose.searxng.yml up -d
|
||||
#
|
||||
# Verification:
|
||||
# curl "http://localhost:8888/search?q=rust+programming&format=json" | jq .
|
||||
#
|
||||
# The port 8888 is exposed for local testing only.
|
||||
# In production, SearXNG is accessed only via the internal Docker network
|
||||
# at searxng:8080 by the Search Service.
|
||||
|
||||
services:
|
||||
searxng:
|
||||
image: searxng/searxng:2024.12.29-b91e848b0
|
||||
container_name: llm-searxng
|
||||
volumes:
|
||||
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro
|
||||
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
|
||||
ports:
|
||||
- "8888:8080"
|
||||
environment:
|
||||
- SEARXNG_SECRET=dev-secret-change-in-production
|
||||
networks:
|
||||
- internal
|
||||
restart: unless-stopped
|
||||
cap_drop:
|
||||
- ALL
|
||||
cap_add:
|
||||
- CHOWN
|
||||
- SETGID
|
||||
- SETUID
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--spider", "--quiet", "http://localhost:8080/healthz"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
networks:
|
||||
internal:
|
||||
driver: bridge
|
||||
18
docker/searxng/limiter.toml
Normal file
18
docker/searxng/limiter.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
# SearXNG rate limiter configuration
|
||||
# Docs: https://docs.searxng.org/admin/searx.limiter.html
|
||||
#
|
||||
# This limiter configuration is used when Redis is available.
|
||||
# Without Redis, SearXNG falls back to a simple in-memory limiter.
|
||||
# For the initial llm-multiverse setup, Redis is not included.
|
||||
# If rate limiting issues arise, add a Redis container and enable
|
||||
# the limiter in settings.yml.
|
||||
|
||||
[botdetection.ip_limit]
|
||||
# Link token lifetime in seconds
|
||||
link_token = 600
|
||||
|
||||
[botdetection.ip_lists]
|
||||
# No IP-based blocking for internal service use
|
||||
pass_ip = [
|
||||
"0.0.0.0/0",
|
||||
]
|
||||
116
docker/searxng/settings.yml
Normal file
116
docker/searxng/settings.yml
Normal file
@@ -0,0 +1,116 @@
|
||||
# SearXNG configuration for llm-multiverse Search Service
|
||||
# Docs: https://docs.searxng.org/admin/settings/index.html
|
||||
|
||||
use_default_settings: true
|
||||
|
||||
general:
|
||||
instance_name: "llm-multiverse-search"
|
||||
debug: false
|
||||
enable_metrics: false
|
||||
|
||||
server:
|
||||
# Bind inside the container
|
||||
bind_address: "0.0.0.0"
|
||||
port: 8080
|
||||
secret_key: "change-me-in-production"
|
||||
# Limiter protects against abuse; requires Redis for full functionality.
|
||||
# Without Redis, a simple in-memory limiter is used.
|
||||
limiter: false
|
||||
# Public instance features disabled (this is an internal-only service)
|
||||
public_instance: false
|
||||
|
||||
search:
|
||||
# Enable JSON output format for programmatic access
|
||||
formats:
|
||||
- html
|
||||
- json
|
||||
safe_search: 0
|
||||
default_lang: "en"
|
||||
autocomplete: ""
|
||||
|
||||
# Configure which search engines to enable and their priorities.
|
||||
# We keep a curated set relevant to software development and research.
|
||||
engines:
|
||||
# -- General web search --
|
||||
- name: google
|
||||
engine: google
|
||||
shortcut: g
|
||||
disabled: false
|
||||
|
||||
- name: duckduckgo
|
||||
engine: duckduckgo
|
||||
shortcut: ddg
|
||||
disabled: false
|
||||
|
||||
- name: bing
|
||||
engine: bing
|
||||
shortcut: bi
|
||||
disabled: false
|
||||
|
||||
- name: brave
|
||||
engine: brave
|
||||
shortcut: br
|
||||
disabled: false
|
||||
|
||||
# -- Knowledge / Reference --
|
||||
- name: wikipedia
|
||||
engine: wikipedia
|
||||
shortcut: wp
|
||||
disabled: false
|
||||
|
||||
- name: wikidata
|
||||
engine: wikidata
|
||||
shortcut: wd
|
||||
disabled: false
|
||||
|
||||
# -- Software development --
|
||||
- name: github
|
||||
engine: github
|
||||
shortcut: gh
|
||||
disabled: false
|
||||
|
||||
- name: stackoverflow
|
||||
engine: stackoverflow
|
||||
shortcut: so
|
||||
disabled: false
|
||||
|
||||
- name: crates.io
|
||||
engine: crates_io
|
||||
shortcut: crates
|
||||
disabled: false
|
||||
|
||||
- name: pypi
|
||||
engine: pypi
|
||||
shortcut: pypi
|
||||
disabled: false
|
||||
|
||||
- name: docs.rs
|
||||
engine: docs_rs
|
||||
shortcut: drs
|
||||
disabled: false
|
||||
|
||||
# -- Disable noisy/irrelevant engines --
|
||||
- name: 9gag
|
||||
engine: 9gag
|
||||
disabled: true
|
||||
|
||||
- name: flickr
|
||||
engine: flickr
|
||||
disabled: true
|
||||
|
||||
- name: unsplash
|
||||
engine: unsplash
|
||||
disabled: true
|
||||
|
||||
outgoing:
|
||||
# Timeout for requests to upstream search engines
|
||||
request_timeout: 5.0
|
||||
# Maximum number of parallel requests to search engines
|
||||
max_request_timeout: 10.0
|
||||
# Use default pool connections
|
||||
pool_connections: 100
|
||||
pool_maxsize: 20
|
||||
# No proxy — direct connections from the container
|
||||
proxies: {}
|
||||
# DNS resolver (use system default)
|
||||
enable_http2: true
|
||||
@@ -47,6 +47,7 @@
|
||||
| #41 | Implement StreamInference gRPC endpoint | Phase 5 | `COMPLETED` | Rust | [issue-041.md](issue-041.md) |
|
||||
| #42 | Implement Inference + GenerateEmbedding endpoints | Phase 5 | `COMPLETED` | Rust | [issue-042.md](issue-042.md) |
|
||||
| #43 | Integration tests for Model Gateway | Phase 5 | `COMPLETED` | Rust | [issue-043.md](issue-043.md) |
|
||||
| #44 | Set up SearXNG Docker container | Phase 6 | `COMPLETED` | Docker / YAML | [issue-044.md](issue-044.md) |
|
||||
|
||||
## Status Legend
|
||||
|
||||
|
||||
118
implementation-plans/issue-044.md
Normal file
118
implementation-plans/issue-044.md
Normal file
@@ -0,0 +1,118 @@
|
||||
# Implementation Plan — Issue #44: Set up SearXNG Docker container
|
||||
|
||||
## Metadata
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Issue | [#44](https://git.shahondin1624.de/llm-multiverse/llm-multiverse/issues/44) |
|
||||
| Title | Set up SearXNG Docker container |
|
||||
| Milestone | Phase 6: Search Service |
|
||||
| Labels | — |
|
||||
| Status | `COMPLETED` |
|
||||
| Language | Docker / YAML |
|
||||
| Related Plans | — |
|
||||
| Blocked by | — |
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] SearXNG Docker image configured with relevant search engines
|
||||
- [ ] JSON API enabled and accessible on internal network
|
||||
- [ ] Search engines configured (Google, DuckDuckGo, Wikipedia, etc.)
|
||||
- [ ] Rate limiting configured to avoid upstream bans
|
||||
- [ ] Docker Compose snippet for standalone testing
|
||||
|
||||
## Architecture Analysis
|
||||
|
||||
### Service Context
|
||||
- SearXNG is a meta-search engine used by the Search Service (Python) as its search backend.
|
||||
- The Search Service queries SearXNG's JSON API for web results, then processes them through a readability pipeline.
|
||||
- SearXNG runs as a Docker container on the internal network, accessible only to the Search Service.
|
||||
- Per the architecture doc: "Wraps local SearXNG instance (Docker, JSON API)".
|
||||
|
||||
### Existing Patterns
|
||||
- No existing Docker files in the project yet. This is the first Docker configuration.
|
||||
- The architecture specifies two Docker networks: `edge` (Caddy ↔ orchestrator) and `internal` (inter-service communication).
|
||||
- SearXNG only needs to be on the `internal` network.
|
||||
|
||||
### Dependencies
|
||||
- Docker and Docker Compose available on the target machine.
|
||||
- No dependencies on other project services.
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### 1. Create SearXNG configuration directory
|
||||
|
||||
Create `docker/searxng/` directory with:
|
||||
- `settings.yml` — SearXNG configuration (search engines, JSON API, rate limiting)
|
||||
- `limiter.toml` — Rate limiter configuration
|
||||
|
||||
### 2. SearXNG `settings.yml`
|
||||
|
||||
Key configuration:
|
||||
- **JSON API enabled:** `search.formats: [html, json]`
|
||||
- **Search engines:** Google, DuckDuckGo, Wikipedia, Bing, Brave, GitHub, StackOverflow (relevant for development tasks)
|
||||
- **Rate limiting:** Enable built-in rate limiter to avoid upstream bans
|
||||
- **No outgoing proxy** — direct connections from the container
|
||||
- **Server settings:** Bind to `0.0.0.0:8080` inside the container
|
||||
- **Disable analytics/tracking features**
|
||||
- **Secret key:** Generated placeholder (overridden at runtime via env var)
|
||||
|
||||
### 3. Rate limiter configuration (`limiter.toml`)
|
||||
|
||||
SearXNG's built-in limiter uses `redis` for rate limiting, but for simplicity we can use the file-based approach or just configure reasonable request intervals in settings.yml via the `outgoing.request_timeout` and engine-level `timeout` values.
|
||||
|
||||
### 4. Docker Compose snippet
|
||||
|
||||
Create `docker/docker-compose.searxng.yml` for standalone testing:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
searxng:
|
||||
image: searxng/searxng:latest
|
||||
container_name: llm-searxng
|
||||
volumes:
|
||||
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro
|
||||
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
|
||||
ports:
|
||||
- "8888:8080" # Expose for standalone testing only
|
||||
environment:
|
||||
- SEARXNG_SECRET=dev-secret-change-in-production
|
||||
networks:
|
||||
- internal
|
||||
restart: unless-stopped
|
||||
|
||||
networks:
|
||||
internal:
|
||||
driver: bridge
|
||||
```
|
||||
|
||||
The port mapping `8888:8080` is for standalone testing. In production, the port will not be exposed externally — only the internal Docker network name (`searxng:8080`) will be used by the Search Service.
|
||||
|
||||
### 5. Tests / Verification
|
||||
|
||||
Since this is Docker infrastructure, verification is manual:
|
||||
- `docker compose -f docker/docker-compose.searxng.yml up -d`
|
||||
- `curl http://localhost:8888/search?q=test&format=json` should return JSON search results
|
||||
- Document these verification steps in the compose file comments
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
| File | Action | Purpose |
|
||||
|---|---|---|
|
||||
| `docker/searxng/settings.yml` | Create | SearXNG configuration with engines, JSON API, rate limiting |
|
||||
| `docker/searxng/limiter.toml` | Create | Rate limiter configuration |
|
||||
| `docker/docker-compose.searxng.yml` | Create | Standalone Docker Compose for testing |
|
||||
|
||||
## Risks and Edge Cases
|
||||
|
||||
- **Upstream engine availability:** Search engines may rate-limit or block SearXNG. The rate limiter configuration mitigates this but doesn't eliminate it.
|
||||
- **SearXNG image updates:** Using `latest` tag — may want to pin to a specific version for reproducibility.
|
||||
- **Redis not included:** SearXNG's advanced rate limiting uses Redis. For the initial setup, we use the simpler built-in limiter without Redis. Redis can be added later if needed.
|
||||
- **Secret key:** Must be changed for production deployments. Using env var override.
|
||||
|
||||
## Deviation Log
|
||||
|
||||
_(Filled during implementation if deviations from plan occur)_
|
||||
|
||||
| Deviation | Reason |
|
||||
|---|---|
|
||||
Reference in New Issue
Block a user