-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml
More file actions
428 lines (390 loc) · 17.6 KB
/
config.yaml
File metadata and controls
428 lines (390 loc) · 17.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
# LLM Directory Configuration
# Request Configuration
timeout: 30.0 # Request timeout in seconds
max_retries: 3 # Maximum number of retries
# Retry Configuration
retry_config:
max_attempts: 3 # Maximum retry attempts
base_delay: 1.0 # Base delay between retries (seconds)
max_delay: 60.0 # Maximum delay between retries (seconds)
exponential_base: 2.0 # Exponential backoff base
jitter: true # Add random jitter to prevent thundering herd
# Rate Limiting Configuration
rate_limit_config:
requests_per_minute: 600 # Maximum requests per minute
tokens_per_minute: 150000 # Maximum tokens per minute
burst_size: 10 # Burst capacity
# Advanced Configuration
fetch_models_on_init: false # Fetch available models on initialization
# Model Configuration
# Define available models with their capabilities
models:
# Qwen Models
Qwen/Qwen3-235B-A22B-Instruct-2507:
description: Qwen 3 235b A22B Instruct 2507 the updated version of Qwen3 235B
A22B, with significant improvements in performance. This model is non-thinking.
input_price_per_1m: 0.13
max_input_tokens: 256000
max_output_tokens: 64000
name: Qwen/Qwen3-235B-A22B-Instruct-2507
output_price_per_1m: 0.5
Qwen/Qwen3-235B-A22B-Thinking-2507:
description: The thinking version of Qwen 3 235b A22B 2507, with enhanced reasoning
capabilities and step-by-step problem solving.
input_price_per_1m: 0.3
max_input_tokens: 256000
max_output_tokens: 64000
name: Qwen/Qwen3-235B-A22B-Thinking-2507
output_price_per_1m: 0.5
Qwen/Qwen3-Next-80B-A3B-Instruct:
description: Based on the new Qwen3-Next architecture (hybrid attention, highly
sparse MoE, training-stability optimizations, and multi-token prediction), the
Qwen3-Next-80B-A3B-Instruct model delivers extreme efficiency with only 3B active
parameters per pass. It performs comparably to Qwen3-235B-A22B-Instruct-2507
and shows clear advantages on ultra-long context tasks (up to 256K tokens).
input_price_per_1m: 0.15
max_input_tokens: 256000
max_output_tokens: 64000
name: Qwen/Qwen3-Next-80B-A3B-Instruct
output_price_per_1m: 0.65
# DeepSeek Models
deepseek-ai/deepseek-v3.2-exp:
description: DeepSeek V3.2 Exp, DeepSeek's latest model offering far better performance
especially on longer contexts than its predecessors. Current flagship model
by DeepSeek. FP8.
input_price_per_1m: 0.28
max_input_tokens: 163800
max_output_tokens: 40960
name: deepseek-ai/deepseek-v3.2-exp
output_price_per_1m: 0.42
deepseek-ai/deepseek-v3.2-exp-thinking:
description: DeepSeek V3.2 Exp Thinking, DeepSeek's latest model offering far
better performance especially on longer contexts than its predecessors. Current
flagship model by DeepSeek. FP8.
input_price_per_1m: 0.28
max_input_tokens: 163800
max_output_tokens: 40960
name: deepseek-ai/deepseek-v3.2-exp-thinking
output_price_per_1m: 0.42
# Minimax Models
minimax/minimax-m2.1:
description: MiniMax M2.1 builds on M2 with enhanced context understanding and
improved complex tool use. 230B parameter MoE model (10B active) optimized for
agentic workflows and long-horizon tasks. FP8.
input_price_per_1m: 0.33
max_input_tokens: 200000
max_output_tokens: 50000
name: minimax/minimax-m2.1
output_price_per_1m: 1.32
# Kimi Models
moonshotai/Kimi-K2-Instruct-0905:
description: Kimi K2 0905. Kimi-k2 is a Mixture-of-Experts (MoE) foundation model
with exceptional coding and agent capabilities, featuring 1 trillion total parameters
and 32 billion activated parameters. In benchmark evaluations covering general
knowledge reasoning, programming, mathematics, and agent-related tasks, the
K2 model outperforms other leading open-source models. Quantized at FP8.
input_price_per_1m: 0.4
max_input_tokens: 256000
max_output_tokens: 64000
name: moonshotai/Kimi-K2-Instruct-0905
output_price_per_1m: 2.0
moonshotai/kimi-k2-thinking:
description: Kimi K2 0905. Kimi-k2 is a Mixture-of-Experts (MoE) foundation model
with exceptional coding and agent capabilities, featuring 1 trillion total parameters
and 32 billion activated parameters. In benchmark evaluations covering general
knowledge reasoning, programming, mathematics, and agent-related tasks, the
K2 model outperforms other leading open-source models. Quantized at FP8.
input_price_per_1m: 0.4
max_input_tokens: 256000
max_output_tokens: 64000
name: moonshotai/Kimi-K2-Instruct-0905
output_price_per_1m: 2.0
moonshotai/kimi-k2.5:
description: Kimi K2.5 is Moonshot AI's native multimodal model built on Kimi K2
with ~15T mixed visual and text tokens, delivering strong general reasoning, visual
coding, and agentic tool-calling. This route uses instant (non-thinking) mode
for faster responses.
input_price_per_1m: 0.6
max_input_tokens: 256000
max_output_tokens: 64000
name: moonshotai/kimi-k2.5
output_price_per_1m: 3.0
moonshotai/kimi-k2.5:thinking:
description: Kimi K2.5 with thinking mode enabled. Built on Kimi K2 with ~15T mixed
visual and text tokens, it excels at general reasoning, visual coding, and agentic
tool-calling. Produces reasoning traces for complex multi-step workflows.
input_price_per_1m: 0.6
max_input_tokens: 256000
max_output_tokens: 64000
name: moonshotai/kimi-k2.5:thinking
output_price_per_1m: 3.0
# Qwen Coder Models
qwen/qwen3-coder:
description: Qwen 3 Coder 480B, a 480 billion total parameter model with 35B active,
and 160 total experts with 8 active. Performs similar to Claude 4 Sonnet in
coding benchmarks, but does so at a much lower price.
input_price_per_1m: 0.13
max_input_tokens: 262000
max_output_tokens: 65500
name: qwen/qwen3-coder
output_price_per_1m: 0.5
qwen3-30b-a3b-instruct-2507:
description: Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts
language model from Qwen, with 3.3B active parameters per inference. Significant
improvements in general capabilities, including instruction following, logical
reasoning, text comprehension, mathematics, science, coding and tool usage.
input_price_per_1m: 0.2
max_input_tokens: 256000
max_output_tokens: 64000
name: qwen3-30b-a3b-instruct-2507
output_price_per_1m: 0.5
# GLM Models
z-ai/glm-4.6:
description: Latest GLM series chat model with strong general performance. Quantized
at FP8
input_price_per_1m: 0.6
max_input_tokens: 200000
max_output_tokens: 50000
name: z-ai/glm-4.6
output_price_per_1m: 1.7
z-ai/glm-4.6:thinking:
description: Thinking version of the latest GLM series chat model with strong
general performance. Quantized at FP8
input_price_per_1m: 0.6
max_input_tokens: 200000
max_output_tokens: 50000
name: z-ai/glm-4.6:thinking
output_price_per_1m: 1.7
zai-org/glm-4.7-flash:
description: Thinking version of the latest GLM series chat model with strong
general performance. Quantized at FP8
input_price_per_1m: 0.6
max_input_tokens: 200000
max_output_tokens: 50000
name: zai-org/glm-4.7-flash
output_price_per_1m: 1.7
# Chutes Models
zai-org/GLM-4.7-TEE:
description: Latest GLM series chat model with strong general performance. Quantized
at FP8
input_price_per_1m: 0.6
max_input_tokens: 200000
max_output_tokens: 50000
name: zai-org/GLM-4.7-TEE
output_price_per_1m: 1.7
deepseek-ai/DeepSeek-V3.2-Speciale-TEE:
description: DeepSeek V3.2 Exp Thinking, DeepSeek's latest model offering far
better performance especially on longer contexts than its predecessors. Current
flagship model by DeepSeek. FP8.
input_price_per_1m: 0.28
max_input_tokens: 163800
max_output_tokens: 40960
name: deepseek-ai/DeepSeek-V3.2-Speciale-TEE
output_price_per_1m: 0.42
openai/gpt-oss-120b-TEE:
description: DeepSeek V3.2 Exp Thinking, DeepSeek's latest model offering far
better performance especially on longer contexts than its predecessors. Current
flagship model by DeepSeek. FP8.
input_price_per_1m: 0.28
max_input_tokens: 131000
max_output_tokens: 40960
name: openai/gpt-oss-120b-TEE
output_price_per_1m: 0.42
moonshotai/Kimi-K2-Thinking-TEE:
description: DeepSeek V3.2 Exp Thinking, DeepSeek's latest model offering far
better performance especially on longer contexts than its predecessors. Current
flagship model by DeepSeek. FP8.
input_price_per_1m: 0.28
max_input_tokens: 131000
max_output_tokens: 40960
name: moonshotai/Kimi-K2-Thinking-TEE
output_price_per_1m: 0.42
nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16:
description: Latest GLM series chat model with strong general performance. Quantized
at FP8
input_price_per_1m: 0.6
max_input_tokens: 256000
max_output_tokens: 64000
name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
output_price_per_1m: 0.6
# Langfuse Observability Configuration (optional)
langfuse:
enabled: true # Enable/disable Langfuse tracking
# public_key: pk-lf-... # Set LANGFUSE_PUBLIC_KEY in .env
# secret_key: sk-lf-... # Set LANGFUSE_SECRET_KEY in .env
# host: http://localhost:3000 # Set LANGFUSE_HOST in .env
timeout: 5.0 # Request timeout
enabled_for_litellm: true # Enable LiteLLM auto-tracking
enabled_for_langchain: true # Enable LangChain callback tracking
enabled_for_custom_client: true # Enable custom client tracking
default_tags: [] # Default tags for traces
prompt_tracking: # Prompt tracking configuration (optional)
enabled: true # Enable/disable prompt tracking in Langfuse (default: false to avoid breaking observability)
store_in_langfuse: true # Store prompts in Langfuse (requires enabled: true)
store_in_postgresql: true # Store prompts in PostgreSQL (always enabled as fallback)
cost_tracking: # Cost tracking configuration (optional)
enabled: true # Enable/disable cost aggregation and tracking
# Model pricing is automatically read from models section above
# Optional: override pricing here if needed
# model_pricing:
# "qwen/qwen3-coder": 0.0001 # per 1K tokens (example)
docker: # Docker Compose service management
compose_file: langfuse/docker-compose.yml # Path to docker-compose.yml file
services: [] # Optional: specific services to manage (empty = all services)
# Workflows Configuration (optional)
# This section configures LangGraph workflow behavior (checkpoints, streaming, etc.)
# For LangGraph workflow LOGGING, see the "Logging Configuration" section below
workflows:
enabled: true # Enable/disable workflows support
checkpoint: # Checkpoint persistence settings
enabled: false # Enable checkpoint persistence
type: memory # "memory" (in-memory) or "sqlite" (file-based)
path: checkpoints.db # Path for SQLite checkpoints (only used if type is "sqlite")
human_in_loop: # Human-in-the-loop settings
enabled: false # Enable human-in-the-loop nodes
timeout: 300.0 # Timeout in seconds for human approval
approval_required: true # Require explicit approval (vs auto-approve)
default_action: reject # Default action if timeout: "approve" or "reject"
streaming: # Streaming settings
enabled: false # Enable streaming for graph execution
stream_mode: values # "values" (final state), "updates" (state updates), or "debug" (all events)
include_events: true # Include event details in stream
swarm_analysis: # Swarm Analysis configuration
enable_tool_calling: true # Enable tool calling for agents (default: true)
enable_static_dependency_resolution: true # Enable static dependency resolution (default: true)
max_tool_calls_per_agent: 10 # Maximum tool calls per agent (default: 10)
# Role Validation Configuration
role_validation:
# Safe phrases that contain suspicious words but are legitimate
# These phrases will always be allowed even if they contain words like "command", "execute", etc.
safe_phrases:
- command pattern
- command pattern validator
- execution pattern
- execution context
- execution flow
- execution path
- execution model
- execution environment
- system architecture
- system design
- system analyst
- system reviewer
- architecture pattern
- design pattern
- pattern validator
- pattern analyzer
- tool execution
- code execution
- shell script
- shell integration
- root cause
- root analysis
- admin panel
- admin interface
- admin dashboard
# Dangerous patterns that should always be rejected (regex patterns)
# These are context-independent security risks
dangerous_patterns:
- (?i)\b(delete|drop|truncate|remove)\s+(all|everything|data|database|table|file|system)\b
- (?i)\b(execute|exec|eval)\s+(arbitrary|system|shell|command|code)\b
- (?i)\b(sudo|root)\s+(access|privilege|permission|execute)\b
- (?i)\b(grant|revoke)\s+(all|admin|root|sudo)\b
- (?i)\b(alter|update|insert)\s+(system|database|config|security)\b
- (?i)^(admin|root|sudo|system|shell|command|execute|delete|drop|truncate|remove|alter|update|insert|grant|revoke|eval|exec)\s*$
# Character normalization map (special chars -> safe replacements)
char_normalization:
'!': exclamation
'#': hash
$: dollar
'%': percent
'&': and
(: left_paren
): right_paren
'*': star
+: plus
/: slash
<: less
'=': equals
'>': greater
'?': question
'@': at
'[': left_bracket
\: backslash
']': right_bracket
^: caret
'{': left_brace
'|': pipe
'}': right_brace
'~': tilde
# Allowed character pattern (regex)
# Expanded from original to allow common punctuation
allowed_char_pattern: ^[a-zA-Z0-9\s\-_&()]+$
# Maximum role name length
max_role_name_length: 100
# Prompt Validation Configuration
prompt_validation:
# Minimum validity score threshold (0.0-1.0)
# Prompts with overall_score >= this value are considered valid
min_validity_score: 0.7
# High confidence threshold (0.0-1.0)
# Prompts with overall_score >= this value are considered high confidence
high_confidence_threshold: 0.9
# Database Configuration (CodeLumen)
# All database services can be configured via environment variables or this config file
# Environment variables take precedence over config file values
database:
postgresql:
host: localhost # PostgreSQL host (POSTGRES_HOST)
port: 5432 # PostgreSQL port (POSTGRES_PORT)
user: postgres # PostgreSQL user (POSTGRES_USER)
password: postgres # PostgreSQL password (POSTGRES_PASSWORD) - CHANGE IN PRODUCTION
database: codelumen # Database name (POSTGRES_DB)
schema: codelumen # Main schema name (POSTGRES_SCHEMA)
cache_schema: codelumen_cache # Cache schema name (POSTGRES_CACHE_SCHEMA)
redis:
host: localhost # Redis host (REDIS_HOST)
port: 6379 # Redis port (REDIS_PORT)
password: myredissecret # Redis password (REDIS_AUTH or REDIS_PASSWORD) - CHANGE IN PRODUCTION
db: 1 # Redis database number 0-15 (REDIS_DB) - Use DB 1 for CodeLumen (separate from Langfuse)
key_prefix: 'codelumen:' # Key prefix for all CodeLumen keys (REDIS_KEY_PREFIX)
socket_timeout: 5 # Socket timeout in seconds (REDIS_SOCKET_TIMEOUT)
socket_connect_timeout: 5 # Connection timeout in seconds (REDIS_SOCKET_CONNECT_TIMEOUT)
health_check_interval: 30 # Health check interval in seconds (REDIS_HEALTH_CHECK_INTERVAL)
clickhouse:
host: localhost # ClickHouse host (CLICKHOUSE_HOST)
http_port: 8123 # ClickHouse HTTP port (CLICKHOUSE_HTTP_PORT)
native_port: 9000 # ClickHouse native port (CLICKHOUSE_NATIVE_PORT)
user: clickhouse # ClickHouse user (CLICKHOUSE_USER)
password: clickhouse # ClickHouse password (CLICKHOUSE_PASSWORD) - CHANGE IN PRODUCTION
database: codelumen_analytics # Database name (CLICKHOUSE_DB)
secure: false # Use secure (HTTPS) connection (CLICKHOUSE_SECURE)
verify: true # Verify SSL certificates (CLICKHOUSE_VERIFY)
timeout: 30 # Request timeout in seconds (CLICKHOUSE_TIMEOUT)
minio:
endpoint: localhost:9090 # MinIO endpoint host:port (MINIO_ENDPOINT)
access_key: minio # MinIO access key (MINIO_ACCESS_KEY or MINIO_ROOT_USER)
secret_key: miniosecret # MinIO secret key (MINIO_SECRET_KEY or MINIO_ROOT_PASSWORD) - CHANGE IN PRODUCTION
bucket: codelumen # Default bucket name (MINIO_BUCKET)
region: us-east-1 # S3 region (MINIO_REGION)
secure: false # Use secure (HTTPS) connection (MINIO_SECURE)
force_path_style: true # Force path-style addressing (MINIO_FORCE_PATH_STYLE)
# Logging Configuration
logging:
enabled: true # Enable/disable logging
log_dir: logs # Directory for log files
log_level: INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL
enable_file_logging: true # Enable file logging
enable_console_logging: true # Enable console logging
use_json_format: false # JSON for files (false = readable format)
date_based_files: true # app_YYYY-MM-DD.log vs app.log
rotation: # Log rotation settings
max_bytes: 10485760 # 10MB (default 5MB in CodeLumen v2)
backup_count: 5 # Number of backup files to keep per day
retention_days: 30 # Keep logs for 30 days (old files will be automatically deleted)
enable_time_rotation: true # Enable daily rotation at midnight (works with date_based_files)
module_loggers: # Module-specific loggers
enabled: true # Enable module-specific loggers
modules: # Separate logs for these modules
- langfuse # Langfuse integration logs (logs/langfuse/)
- workflows # LangGraph workflow execution logs (logs/workflows/)