365 lines
10 KiB
Go
365 lines
10 KiB
Go
package config
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type QwenModelRouter struct {
|
|
config *ModelConfig
|
|
overrideLog []ModelOverride
|
|
modeContext ModeContext
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
type ModeContext int
|
|
|
|
const (
|
|
ModeAskContext ModeContext = iota
|
|
ModePlanContext
|
|
ModeBuildContext
|
|
)
|
|
|
|
type QwenComplexity string
|
|
|
|
const (
|
|
QwenTrivial QwenComplexity = "trivial"
|
|
QwenSimple QwenComplexity = "simple"
|
|
QwenModerate QwenComplexity = "moderate"
|
|
QwenAdvanced QwenComplexity = "advanced"
|
|
)
|
|
|
|
var (
|
|
qwenTrivialIndicators = []string{
|
|
"what is", "who is", "when is", "where is",
|
|
"define", "meaning of", "synonym", "antonym",
|
|
"list files", "show me", "display",
|
|
"yes", "no", "ok", "thanks",
|
|
"hello", "hi", "hey",
|
|
}
|
|
qwenSimpleIndicators = []string{
|
|
"how do i", "explain", "what does", "why does",
|
|
"find", "search", "get", "read",
|
|
"print", "echo", "cat", "ls", "grep",
|
|
"simple", "quick", "fast", "brief",
|
|
"check", "verify", "test",
|
|
"create file", "write file", "save",
|
|
}
|
|
qwenModerateIndicators = []string{
|
|
"create", "generate", "add", "modify", "update",
|
|
"fix", "debug", "refactor", "optimize",
|
|
"function", "class", "method", "interface",
|
|
"test", "unit test", "integration test",
|
|
"script", "command", "pipeline",
|
|
"compare", "analyze", "review",
|
|
"multiple", "several", "across",
|
|
}
|
|
qwenAdvancedIndicators = []string{
|
|
"architecture", "design pattern", "system design",
|
|
"infrastructure", "deployment", "scaling",
|
|
"security audit", "performance optimization",
|
|
"multi-step", "complex", "comprehensive",
|
|
"build a", "implement", "develop", "engineer",
|
|
"full stack", "end-to-end", "production",
|
|
"migration", "refactor entire", "rewrite",
|
|
}
|
|
qwenCodePatterns = map[string]QwenComplexity{
|
|
"variable": QwenSimple,
|
|
"constant": QwenSimple,
|
|
"function": QwenSimple,
|
|
"loop": QwenSimple,
|
|
"condition": QwenSimple,
|
|
"array": QwenSimple,
|
|
"slice": QwenSimple,
|
|
"map": QwenSimple,
|
|
"struct": QwenModerate,
|
|
"interface": QwenModerate,
|
|
"generics": QwenModerate,
|
|
"concurrency": QwenModerate,
|
|
"goroutine": QwenModerate,
|
|
"channel": QwenModerate,
|
|
"mutex": QwenModerate,
|
|
"architecture": QwenAdvanced,
|
|
"pattern": QwenAdvanced,
|
|
"microservice": QwenAdvanced,
|
|
"distributed": QwenAdvanced,
|
|
"kubernetes": QwenAdvanced,
|
|
}
|
|
)
|
|
|
|
func NewQwenModelRouter(cfg *ModelConfig) *QwenModelRouter {
|
|
return &QwenModelRouter{
|
|
config: cfg,
|
|
overrideLog: make([]ModelOverride, 0),
|
|
modeContext: ModeAskContext,
|
|
}
|
|
}
|
|
|
|
func (r *QwenModelRouter) SetModeContext(mode ModeContext) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
r.modeContext = mode
|
|
}
|
|
|
|
func (r *QwenModelRouter) ClassifyTaskComplexity(query string) QwenComplexity {
|
|
return classifyQwenTask(query, r.modeContext)
|
|
}
|
|
|
|
func (r *QwenModelRouter) SelectModel(query string) string {
|
|
complexity := r.ClassifyTaskComplexity(query)
|
|
return r.config.SelectModelForTask(string(complexity))
|
|
}
|
|
|
|
func (r *QwenModelRouter) SelectModelForMode(query string, mode ModeContext) string {
|
|
switch mode {
|
|
case ModeAskContext:
|
|
return r.selectAskModel(query)
|
|
case ModePlanContext:
|
|
return r.selectPlanModel(query)
|
|
case ModeBuildContext:
|
|
return r.selectBuildModel(query)
|
|
}
|
|
return r.SelectModel(query)
|
|
}
|
|
|
|
func (r *QwenModelRouter) selectAskModel(query string) string {
|
|
complexity := classifyQwenTask(query, ModeAskContext)
|
|
switch complexity {
|
|
case QwenTrivial, QwenSimple:
|
|
if r.isModelAvailable("qwen3.5:0.8b") {
|
|
return "qwen3.5:0.8b"
|
|
}
|
|
return "qwen3.5:2b"
|
|
case QwenModerate:
|
|
return "qwen3.5:2b"
|
|
case QwenAdvanced:
|
|
return "qwen3.5:4b"
|
|
default:
|
|
return "qwen3.5:2b"
|
|
}
|
|
}
|
|
|
|
func (r *QwenModelRouter) selectPlanModel(query string) string {
|
|
complexity := classifyQwenTask(query, ModePlanContext)
|
|
switch complexity {
|
|
case QwenTrivial, QwenSimple:
|
|
return "qwen3.5:2b"
|
|
case QwenModerate:
|
|
return "qwen3.5:4b"
|
|
case QwenAdvanced:
|
|
return "qwen3.5:9b"
|
|
default:
|
|
return "qwen3.5:4b"
|
|
}
|
|
}
|
|
|
|
func (r *QwenModelRouter) selectBuildModel(query string) string {
|
|
complexity := classifyQwenTask(query, ModeBuildContext)
|
|
switch complexity {
|
|
case QwenTrivial, QwenSimple:
|
|
return "qwen3.5:2b"
|
|
case QwenModerate:
|
|
return "qwen3.5:4b"
|
|
case QwenAdvanced:
|
|
return "qwen3.5:9b"
|
|
default:
|
|
return "qwen3.5:4b"
|
|
}
|
|
}
|
|
|
|
func (r *QwenModelRouter) isModelAvailable(name string) bool {
|
|
for _, m := range r.config.Models {
|
|
if m.Name == name {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func classifyQwenTask(query string, mode ModeContext) QwenComplexity {
|
|
lowerQuery := strings.ToLower(query)
|
|
words := strings.Fields(lowerQuery)
|
|
wordCount := len(words)
|
|
score := 0
|
|
for _, indicator := range qwenTrivialIndicators {
|
|
if strings.Contains(lowerQuery, indicator) {
|
|
score -= 4
|
|
}
|
|
}
|
|
for _, indicator := range qwenSimpleIndicators {
|
|
if strings.Contains(lowerQuery, indicator) {
|
|
score -= 1
|
|
}
|
|
}
|
|
for _, indicator := range qwenModerateIndicators {
|
|
if strings.Contains(lowerQuery, indicator) {
|
|
score += 2
|
|
}
|
|
}
|
|
for _, indicator := range qwenAdvancedIndicators {
|
|
if strings.Contains(lowerQuery, indicator) {
|
|
score += 4
|
|
}
|
|
}
|
|
for pattern, complexity := range qwenCodePatterns {
|
|
if strings.Contains(lowerQuery, pattern) {
|
|
switch complexity {
|
|
case QwenSimple:
|
|
score -= 1
|
|
case QwenModerate:
|
|
score += 2
|
|
case QwenAdvanced:
|
|
score += 4
|
|
}
|
|
}
|
|
}
|
|
if wordCount > 50 {
|
|
score += 3
|
|
} else if wordCount > 30 {
|
|
score += 1
|
|
} else if wordCount < 5 && score <= 0 {
|
|
score -= 2
|
|
}
|
|
if strings.Contains(lowerQuery, "why") || strings.Contains(lowerQuery, "reason") {
|
|
score += 2
|
|
}
|
|
if strings.Contains(lowerQuery, "how") && wordCount > 10 {
|
|
score += 1
|
|
}
|
|
if strings.Contains(lowerQuery, "?") && wordCount < 10 {
|
|
score -= 1
|
|
}
|
|
switch mode {
|
|
case ModeAskContext:
|
|
score -= 1
|
|
case ModeBuildContext:
|
|
score += 1
|
|
}
|
|
switch {
|
|
case score <= -3:
|
|
return QwenTrivial
|
|
case score <= 1:
|
|
return QwenSimple
|
|
case score <= 5:
|
|
return QwenModerate
|
|
default:
|
|
return QwenAdvanced
|
|
}
|
|
}
|
|
|
|
func (r *QwenModelRouter) RecordOverride(query, userModel string) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
routerModel := r.SelectModel(query)
|
|
r.overrideLog = append(r.overrideLog, ModelOverride{
|
|
Query: query,
|
|
UserModel: userModel,
|
|
RouterModel: routerModel,
|
|
Timestamp: time.Now(),
|
|
})
|
|
if len(r.overrideLog) > 100 {
|
|
r.overrideLog = r.overrideLog[len(r.overrideLog)-100:]
|
|
}
|
|
}
|
|
|
|
func (r *QwenModelRouter) GetLearnedPatterns() map[string]QwenComplexity {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
if len(r.overrideLog) < 3 {
|
|
return nil
|
|
}
|
|
wordCounts := make(map[string]map[QwenComplexity]int)
|
|
for _, o := range r.overrideLog {
|
|
if o.Query == "" || o.UserModel == "" {
|
|
continue
|
|
}
|
|
var complexity QwenComplexity
|
|
switch {
|
|
case strings.Contains(o.UserModel, "0.8b"):
|
|
complexity = QwenTrivial
|
|
case strings.Contains(o.UserModel, "2b"):
|
|
complexity = QwenSimple
|
|
case strings.Contains(o.UserModel, "4b"):
|
|
complexity = QwenModerate
|
|
case strings.Contains(o.UserModel, "9b"):
|
|
complexity = QwenAdvanced
|
|
default:
|
|
continue
|
|
}
|
|
words := strings.Fields(strings.ToLower(o.Query))
|
|
for _, w := range words {
|
|
if len(w) < 3 {
|
|
continue
|
|
}
|
|
if _, ok := wordCounts[w]; !ok {
|
|
wordCounts[w] = make(map[QwenComplexity]int)
|
|
}
|
|
wordCounts[w][complexity]++
|
|
}
|
|
}
|
|
wordComplexity := make(map[string]QwenComplexity)
|
|
for word, counts := range wordCounts {
|
|
var maxCount int
|
|
var dominant QwenComplexity
|
|
for c, cnt := range counts {
|
|
if cnt > maxCount {
|
|
maxCount = cnt
|
|
dominant = c
|
|
}
|
|
}
|
|
if maxCount >= 2 {
|
|
wordComplexity[word] = dominant
|
|
}
|
|
}
|
|
return wordComplexity
|
|
}
|
|
|
|
func (r *QwenModelRouter) SelectAvailableModelForTask(ctx context.Context, pinger ModelPinger, query string) string {
|
|
preferred := r.SelectModel(query)
|
|
fallbackOrder := []string{
|
|
preferred,
|
|
"qwen3.5:2b",
|
|
"qwen3.5:0.8b",
|
|
"qwen3.5:4b",
|
|
"qwen3.5:9b",
|
|
}
|
|
for _, model := range fallbackOrder {
|
|
if err := pinger.PingModel(ctx, model); err == nil {
|
|
return model
|
|
}
|
|
}
|
|
return r.config.DefaultModel
|
|
}
|
|
|
|
func (r *QwenModelRouter) GetRecommendedModel(query string) (model string, reason string, complexity QwenComplexity) {
|
|
r.mu.RLock()
|
|
mode := r.modeContext
|
|
r.mu.RUnlock()
|
|
complexity = classifyQwenTask(query, mode)
|
|
switch complexity {
|
|
case QwenTrivial:
|
|
model = "qwen3.5:0.8b"
|
|
reason = "trivial task - ultra-fast response"
|
|
case QwenSimple:
|
|
model = "qwen3.5:2b"
|
|
reason = "simple task - balanced speed/capability"
|
|
case QwenModerate:
|
|
model = "qwen3.5:4b"
|
|
reason = "moderate complexity - multi-step reasoning"
|
|
case QwenAdvanced:
|
|
model = "qwen3.5:9b"
|
|
reason = "advanced task - complex reasoning required"
|
|
}
|
|
switch mode {
|
|
case ModeAskContext:
|
|
reason += " (ASK mode - prefer speed)"
|
|
case ModePlanContext:
|
|
reason += " (PLAN mode - prefer reasoning)"
|
|
case ModeBuildContext:
|
|
reason += " (BUILD mode - prefer capability)"
|
|
}
|
|
return model, reason, complexity
|
|
}
|