ai-agent/internal/config/qwen_router.go
admin 8dc496b626
Some checks failed
CI / test (push) Has been cancelled
Release / release (push) Failing after 4m36s
first commit
2026-03-08 15:40:34 +07:00

365 lines
10 KiB
Go

package config
import (
"context"
"strings"
"sync"
"time"
)
type QwenModelRouter struct {
config *ModelConfig
overrideLog []ModelOverride
modeContext ModeContext
mu sync.RWMutex
}
type ModeContext int
const (
ModeAskContext ModeContext = iota
ModePlanContext
ModeBuildContext
)
type QwenComplexity string
const (
QwenTrivial QwenComplexity = "trivial"
QwenSimple QwenComplexity = "simple"
QwenModerate QwenComplexity = "moderate"
QwenAdvanced QwenComplexity = "advanced"
)
var (
qwenTrivialIndicators = []string{
"what is", "who is", "when is", "where is",
"define", "meaning of", "synonym", "antonym",
"list files", "show me", "display",
"yes", "no", "ok", "thanks",
"hello", "hi", "hey",
}
qwenSimpleIndicators = []string{
"how do i", "explain", "what does", "why does",
"find", "search", "get", "read",
"print", "echo", "cat", "ls", "grep",
"simple", "quick", "fast", "brief",
"check", "verify", "test",
"create file", "write file", "save",
}
qwenModerateIndicators = []string{
"create", "generate", "add", "modify", "update",
"fix", "debug", "refactor", "optimize",
"function", "class", "method", "interface",
"test", "unit test", "integration test",
"script", "command", "pipeline",
"compare", "analyze", "review",
"multiple", "several", "across",
}
qwenAdvancedIndicators = []string{
"architecture", "design pattern", "system design",
"infrastructure", "deployment", "scaling",
"security audit", "performance optimization",
"multi-step", "complex", "comprehensive",
"build a", "implement", "develop", "engineer",
"full stack", "end-to-end", "production",
"migration", "refactor entire", "rewrite",
}
qwenCodePatterns = map[string]QwenComplexity{
"variable": QwenSimple,
"constant": QwenSimple,
"function": QwenSimple,
"loop": QwenSimple,
"condition": QwenSimple,
"array": QwenSimple,
"slice": QwenSimple,
"map": QwenSimple,
"struct": QwenModerate,
"interface": QwenModerate,
"generics": QwenModerate,
"concurrency": QwenModerate,
"goroutine": QwenModerate,
"channel": QwenModerate,
"mutex": QwenModerate,
"architecture": QwenAdvanced,
"pattern": QwenAdvanced,
"microservice": QwenAdvanced,
"distributed": QwenAdvanced,
"kubernetes": QwenAdvanced,
}
)
func NewQwenModelRouter(cfg *ModelConfig) *QwenModelRouter {
return &QwenModelRouter{
config: cfg,
overrideLog: make([]ModelOverride, 0),
modeContext: ModeAskContext,
}
}
func (r *QwenModelRouter) SetModeContext(mode ModeContext) {
r.mu.Lock()
defer r.mu.Unlock()
r.modeContext = mode
}
func (r *QwenModelRouter) ClassifyTaskComplexity(query string) QwenComplexity {
return classifyQwenTask(query, r.modeContext)
}
func (r *QwenModelRouter) SelectModel(query string) string {
complexity := r.ClassifyTaskComplexity(query)
return r.config.SelectModelForTask(string(complexity))
}
func (r *QwenModelRouter) SelectModelForMode(query string, mode ModeContext) string {
switch mode {
case ModeAskContext:
return r.selectAskModel(query)
case ModePlanContext:
return r.selectPlanModel(query)
case ModeBuildContext:
return r.selectBuildModel(query)
}
return r.SelectModel(query)
}
func (r *QwenModelRouter) selectAskModel(query string) string {
complexity := classifyQwenTask(query, ModeAskContext)
switch complexity {
case QwenTrivial, QwenSimple:
if r.isModelAvailable("qwen3.5:0.8b") {
return "qwen3.5:0.8b"
}
return "qwen3.5:2b"
case QwenModerate:
return "qwen3.5:2b"
case QwenAdvanced:
return "qwen3.5:4b"
default:
return "qwen3.5:2b"
}
}
func (r *QwenModelRouter) selectPlanModel(query string) string {
complexity := classifyQwenTask(query, ModePlanContext)
switch complexity {
case QwenTrivial, QwenSimple:
return "qwen3.5:2b"
case QwenModerate:
return "qwen3.5:4b"
case QwenAdvanced:
return "qwen3.5:9b"
default:
return "qwen3.5:4b"
}
}
func (r *QwenModelRouter) selectBuildModel(query string) string {
complexity := classifyQwenTask(query, ModeBuildContext)
switch complexity {
case QwenTrivial, QwenSimple:
return "qwen3.5:2b"
case QwenModerate:
return "qwen3.5:4b"
case QwenAdvanced:
return "qwen3.5:9b"
default:
return "qwen3.5:4b"
}
}
func (r *QwenModelRouter) isModelAvailable(name string) bool {
for _, m := range r.config.Models {
if m.Name == name {
return true
}
}
return false
}
func classifyQwenTask(query string, mode ModeContext) QwenComplexity {
lowerQuery := strings.ToLower(query)
words := strings.Fields(lowerQuery)
wordCount := len(words)
score := 0
for _, indicator := range qwenTrivialIndicators {
if strings.Contains(lowerQuery, indicator) {
score -= 4
}
}
for _, indicator := range qwenSimpleIndicators {
if strings.Contains(lowerQuery, indicator) {
score -= 1
}
}
for _, indicator := range qwenModerateIndicators {
if strings.Contains(lowerQuery, indicator) {
score += 2
}
}
for _, indicator := range qwenAdvancedIndicators {
if strings.Contains(lowerQuery, indicator) {
score += 4
}
}
for pattern, complexity := range qwenCodePatterns {
if strings.Contains(lowerQuery, pattern) {
switch complexity {
case QwenSimple:
score -= 1
case QwenModerate:
score += 2
case QwenAdvanced:
score += 4
}
}
}
if wordCount > 50 {
score += 3
} else if wordCount > 30 {
score += 1
} else if wordCount < 5 && score <= 0 {
score -= 2
}
if strings.Contains(lowerQuery, "why") || strings.Contains(lowerQuery, "reason") {
score += 2
}
if strings.Contains(lowerQuery, "how") && wordCount > 10 {
score += 1
}
if strings.Contains(lowerQuery, "?") && wordCount < 10 {
score -= 1
}
switch mode {
case ModeAskContext:
score -= 1
case ModeBuildContext:
score += 1
}
switch {
case score <= -3:
return QwenTrivial
case score <= 1:
return QwenSimple
case score <= 5:
return QwenModerate
default:
return QwenAdvanced
}
}
func (r *QwenModelRouter) RecordOverride(query, userModel string) {
r.mu.Lock()
defer r.mu.Unlock()
routerModel := r.SelectModel(query)
r.overrideLog = append(r.overrideLog, ModelOverride{
Query: query,
UserModel: userModel,
RouterModel: routerModel,
Timestamp: time.Now(),
})
if len(r.overrideLog) > 100 {
r.overrideLog = r.overrideLog[len(r.overrideLog)-100:]
}
}
func (r *QwenModelRouter) GetLearnedPatterns() map[string]QwenComplexity {
r.mu.RLock()
defer r.mu.RUnlock()
if len(r.overrideLog) < 3 {
return nil
}
wordCounts := make(map[string]map[QwenComplexity]int)
for _, o := range r.overrideLog {
if o.Query == "" || o.UserModel == "" {
continue
}
var complexity QwenComplexity
switch {
case strings.Contains(o.UserModel, "0.8b"):
complexity = QwenTrivial
case strings.Contains(o.UserModel, "2b"):
complexity = QwenSimple
case strings.Contains(o.UserModel, "4b"):
complexity = QwenModerate
case strings.Contains(o.UserModel, "9b"):
complexity = QwenAdvanced
default:
continue
}
words := strings.Fields(strings.ToLower(o.Query))
for _, w := range words {
if len(w) < 3 {
continue
}
if _, ok := wordCounts[w]; !ok {
wordCounts[w] = make(map[QwenComplexity]int)
}
wordCounts[w][complexity]++
}
}
wordComplexity := make(map[string]QwenComplexity)
for word, counts := range wordCounts {
var maxCount int
var dominant QwenComplexity
for c, cnt := range counts {
if cnt > maxCount {
maxCount = cnt
dominant = c
}
}
if maxCount >= 2 {
wordComplexity[word] = dominant
}
}
return wordComplexity
}
func (r *QwenModelRouter) SelectAvailableModelForTask(ctx context.Context, pinger ModelPinger, query string) string {
preferred := r.SelectModel(query)
fallbackOrder := []string{
preferred,
"qwen3.5:2b",
"qwen3.5:0.8b",
"qwen3.5:4b",
"qwen3.5:9b",
}
for _, model := range fallbackOrder {
if err := pinger.PingModel(ctx, model); err == nil {
return model
}
}
return r.config.DefaultModel
}
func (r *QwenModelRouter) GetRecommendedModel(query string) (model string, reason string, complexity QwenComplexity) {
r.mu.RLock()
mode := r.modeContext
r.mu.RUnlock()
complexity = classifyQwenTask(query, mode)
switch complexity {
case QwenTrivial:
model = "qwen3.5:0.8b"
reason = "trivial task - ultra-fast response"
case QwenSimple:
model = "qwen3.5:2b"
reason = "simple task - balanced speed/capability"
case QwenModerate:
model = "qwen3.5:4b"
reason = "moderate complexity - multi-step reasoning"
case QwenAdvanced:
model = "qwen3.5:9b"
reason = "advanced task - complex reasoning required"
}
switch mode {
case ModeAskContext:
reason += " (ASK mode - prefer speed)"
case ModePlanContext:
reason += " (PLAN mode - prefer reasoning)"
case ModeBuildContext:
reason += " (BUILD mode - prefer capability)"
}
return model, reason, complexity
}