diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..86f2b7c --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,48 @@ +concurrency: 1 +shutdown_timeout: 30s + +server: + address: ":8080" + +stt: + url: "http://localhost:8178" + timeout: "30s" + +llm: + url: "http://localhost:8081/v1" + key: ${ODIDERE_LLM_KEY} + model: "default" + system_prompt: "You are a helpful voice assistant. Be concise." + timeout: "5m" + +tts: + url: "http://localhost:8880" + voice: "af_heart" + voice_map: + english: "af_heart" # American English + chinese: "zf_xiaobei" # Mandarin Chinese + japanese: "jf_alpha" # Japanese + spanish: "ef_dora" # Spanish + french: "ff_siwis" # French + hindi: "hf_alpha" # Hindi + italian: "if_sara" # Italian + portuguese: "pf_dora" # Brazilian Portuguese + korean: "kf_sarah" # Korean + timeout: "60s" + +tools: + - name: get_weather + description: "Get current weather for a location" + command: "curl" + arguments: + - "-s" + - "https://wttr.in/{{.location}}?format=j1" + parameters: + type: object + properties: + location: + type: string + description: "City name or location" + required: + - location + timeout: "10s" diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..fd7e851 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,130 @@ +// Package config loads and validates YAML configuration for odidere. +package config + +import ( + "fmt" + "os" + "regexp" + "time" + + "github.com/chimerical-llc/odidere/internal/llm" + "github.com/chimerical-llc/odidere/internal/stt" + "github.com/chimerical-llc/odidere/internal/tool" + "github.com/chimerical-llc/odidere/internal/tts" + + "gopkg.in/yaml.v3" +) + +// Config holds all application configuration sections. +type Config struct { + // Address is the host:port to listen on. + Address string `yaml:"address"` + + // Concurrency is the number of concurrent requests to allow. + // Defaults to 1. + Concurrency int `yaml:"concurrency"` + // ShutdownTimeout is the maximum time to wait for graceful shutdown. + // Defaults to "30s". + ShutdownTimeout string `yaml:"shutdown_timeout"` + // shutdownTimeout is the parsed duration, set during Load. + shutdownTimeout time.Duration `yaml:"-"` + + // LLM configures the language model client. + LLM llm.Config `yaml:"llm"` + // STT configures the speech-to-text client. + STT stt.Config `yaml:"stt"` + // Tools defines external commands available to the LLM. + Tools []tool.Tool `yaml:"tools"` + // TTS configures the text-to-speech client. + TTS tts.Config `yaml:"tts"` +} + +// ApplyDefaults sets default values for optional configuration fields. +// Called automatically by Load before validation. +func (cfg *Config) ApplyDefaults() { + if cfg.Concurrency == 0 { + cfg.Concurrency = 1 + } + if cfg.ShutdownTimeout == "" { + cfg.ShutdownTimeout = "30s" + } + if cfg.Address == "" { + cfg.Address = ":8080" + } +} + +// GetShutdownTimeout returns the parsed shutdown timeout duration. +func (cfg *Config) GetShutdownTimeout() time.Duration { + return cfg.shutdownTimeout +} + +// Validate checks that all required fields are present and valid. +// Delegates to each subsection's Validate method. +func (cfg *Config) Validate() error { + if cfg.Address == "" { + return fmt.Errorf("address required") + } + + if err := cfg.LLM.Validate(); err != nil { + return fmt.Errorf("invalid llm config: %w", err) + } + if err := cfg.STT.Validate(); err != nil { + return fmt.Errorf("invalid stt config: %w", err) + } + if err := cfg.TTS.Validate(); err != nil { + return fmt.Errorf("invalid tts config: %w", err) + } + if _, err := time.ParseDuration(cfg.ShutdownTimeout); err != nil { + return fmt.Errorf( + "invalid shutdown_timeout %q: %w", + cfg.ShutdownTimeout, err, + ) + } + for i, t := range cfg.Tools { + if err := t.Validate(); err != nil { + return fmt.Errorf("tools[%d] invalid: %w", i, err) + } + } + + return nil +} + +// Load reads a YAML configuration file, expands environment variables, +// applies defaults, and validates the result. Returns an error if the +// file cannot be read, parsed, or contains invalid configuration. +func Load(path string) (*Config, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read file: %w", err) + } + + // Expand environment variables. + // Unset variables are replaced with empty strings. + re := regexp.MustCompile(`\$\{([^}]+)\}`) + expanded := re.ReplaceAllStringFunc( + string(data), + func(match string) string { + // Extract variable name from ${VAR}. + v := match[2 : len(match)-1] + return os.Getenv(v) + }, + ) + + var cfg Config + if err := yaml.Unmarshal([]byte(expanded), &cfg); err != nil { + return nil, fmt.Errorf("parse config: %w", err) + } + cfg.ApplyDefaults() + + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("invalid config: %v", err) + } + + d, err := time.ParseDuration(cfg.ShutdownTimeout) + if err != nil { + return nil, fmt.Errorf("parse shutdown timeout: %v", err) + } + cfg.shutdownTimeout = d + + return &cfg, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..21d2a88 --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,399 @@ +package config + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +// validConfig returns a minimal valid YAML configuration. +func validConfig() string { + return ` +address: ":9090" +concurrency: 2 +shutdown_timeout: "60s" + +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +` +} + +func writeConfig(t *testing.T, content string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("write config file: %v", err) + } + return path +} + +func TestLoad(t *testing.T) { + tests := []struct { + name string + config string + setup func(t *testing.T) + shouldErr bool + check func(t *testing.T, cfg *Config) + }{ + { + name: "valid config", + config: validConfig(), + check: func(t *testing.T, cfg *Config) { + if cfg.Address != ":9090" { + t.Errorf( + "Address = %q, want %q", + cfg.Address, ":9090", + ) + } + if cfg.Concurrency != 2 { + t.Errorf( + "Concurrency = %d, want 2", + cfg.Concurrency, + ) + } + if cfg.ShutdownTimeout != "60s" { + t.Errorf( + "ShutdownTimeout = %q, want %q", + cfg.ShutdownTimeout, "60s", + ) + } + if cfg.GetShutdownTimeout() != 60*time.Second { + t.Errorf( + "GetShutdownTimeout() = %v, want 60s", + cfg.GetShutdownTimeout(), + ) + } + }, + }, + { + name: "defaults applied", + config: ` +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +`, + check: func(t *testing.T, cfg *Config) { + if cfg.Address != ":8080" { + t.Errorf( + "Address = %q, want %q", + cfg.Address, ":8080", + ) + } + if cfg.Concurrency != 1 { + t.Errorf( + "Concurrency = %d, want 1", + cfg.Concurrency, + ) + } + if cfg.ShutdownTimeout != "30s" { + t.Errorf( + "ShutdownTimeout = %q, want %q", + cfg.ShutdownTimeout, "30s", + ) + } + if cfg.GetShutdownTimeout() != 30*time.Second { + t.Errorf( + "GetShutdownTimeout() = %v, want 30s", + cfg.GetShutdownTimeout(), + ) + } + }, + }, + { + name: "env expansion", + config: ` +llm: + model: test-model + url: http://localhost:8080 + key: ${TEST_LLM_KEY} + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +`, + setup: func(t *testing.T) { + t.Setenv("TEST_LLM_KEY", "secret-api-key") + }, + check: func(t *testing.T, cfg *Config) { + if cfg.LLM.Key != "secret-api-key" { + t.Errorf( + "LLM.Key = %q, want %q", + cfg.LLM.Key, "secret-api-key", + ) + } + }, + }, + { + name: "env expansion unset var becomes empty", + config: ` +llm: + model: test-model + url: http://localhost:8080 + key: ${TEST_UNSET_VAR} + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +`, + check: func(t *testing.T, cfg *Config) { + if cfg.LLM.Key != "" { + t.Errorf( + "LLM.Key = %q, want empty", + cfg.LLM.Key, + ) + } + }, + }, + { + name: "invalid yaml", + config: `llm: [invalid yaml`, + shouldErr: true, + }, + { + name: "invalid shutdown_timeout", + config: ` +shutdown_timeout: "not-a-duration" + +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +`, + shouldErr: true, + }, + { + name: "missing llm model", + config: ` +llm: + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +`, + shouldErr: true, + }, + { + name: "missing llm url", + config: ` +llm: + model: test-model + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart +`, + shouldErr: true, + }, + { + name: "missing stt url", + config: ` +llm: + model: test-model + url: http://localhost:8080 + +stt: + timeout: "30s" + +tts: + url: http://localhost:8880 + voice: af_heart +`, + shouldErr: true, + }, + { + name: "missing tts url", + config: ` +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + voice: af_heart +`, + shouldErr: true, + }, + { + name: "missing tts voice", + config: ` +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 +`, + shouldErr: true, + }, + { + name: "config with tools", + config: ` +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart + +tools: + - name: echo + description: echoes input + command: echo + arguments: + - "{{.message}}" +`, + check: func(t *testing.T, cfg *Config) { + if len(cfg.Tools) != 1 { + t.Errorf( + "len(Tools) = %d, want 1", + len(cfg.Tools), + ) + } + if cfg.Tools[0].Name != "echo" { + t.Errorf( + "Tools[0].Name = %q, want %q", + cfg.Tools[0].Name, "echo", + ) + } + }, + }, + { + name: "invalid tool", + config: ` +llm: + model: test-model + url: http://localhost:8080 + +stt: + url: http://localhost:8178 + +tts: + url: http://localhost:8880 + voice: af_heart + +tools: + - name: "" + description: missing name + command: echo +`, + shouldErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.setup != nil { + tt.setup(t) + } + + path := writeConfig(t, tt.config) + cfg, err := Load(path) + + if tt.shouldErr { + if err == nil { + t.Fatal("Load() expected error") + } + return + } + + if err != nil { + t.Fatalf("Load() error = %v", err) + } + + if tt.check != nil { + tt.check(t, cfg) + } + }) + } +} + +func TestLoad_FileNotFound(t *testing.T) { + if _, err := Load("/nonexistent/path/config.yaml"); err == nil { + t.Fatal("Load() expected error for missing file") + } +} + +func TestApplyDefaults(t *testing.T) { + cfg := &Config{} + cfg.ApplyDefaults() + + if cfg.Address != ":8080" { + t.Errorf("Address = %q, want %q", cfg.Address, ":8080") + } + if cfg.Concurrency != 1 { + t.Errorf("Concurrency = %d, want 1", cfg.Concurrency) + } + if cfg.ShutdownTimeout != "30s" { + t.Errorf( + "ShutdownTimeout = %q, want %q", + cfg.ShutdownTimeout, "30s", + ) + } +} + +func TestApplyDefaults_NoOverwrite(t *testing.T) { + cfg := &Config{ + Address: ":9999", + Concurrency: 5, + ShutdownTimeout: "120s", + } + cfg.ApplyDefaults() + + if cfg.Address != ":9999" { + t.Errorf("Address = %q, want %q", cfg.Address, ":9999") + } + if cfg.Concurrency != 5 { + t.Errorf("Concurrency = %d, want 5", cfg.Concurrency) + } + if cfg.ShutdownTimeout != "120s" { + t.Errorf( + "ShutdownTimeout = %q, want %q", + cfg.ShutdownTimeout, "120s", + ) + } +} diff --git a/internal/service/service.go b/internal/service/service.go new file mode 100644 index 0000000..4101750 --- /dev/null +++ b/internal/service/service.go @@ -0,0 +1,856 @@ +// Package service orchestrates the odidere voice assistant server. +// It coordinates the HTTP server, STT/LLM/TTS clients, and handles +// graceful shutdown. +package service + +import ( + "context" + "embed" + "encoding/base64" + "encoding/json" + "fmt" + "html/template" + "io/fs" + "log/slog" + "net/http" + "os" + "os/signal" + "runtime/debug" + "strings" + "syscall" + "time" + + "github.com/chimerical-llc/odidere/internal/config" + "github.com/chimerical-llc/odidere/internal/llm" + "github.com/chimerical-llc/odidere/internal/service/templates" + "github.com/chimerical-llc/odidere/internal/stt" + "github.com/chimerical-llc/odidere/internal/tool" + "github.com/chimerical-llc/odidere/internal/tts" + + "github.com/google/uuid" + openai "github.com/sashabaranov/go-openai" + "golang.org/x/sync/semaphore" +) + +//go:embed all:static/* +var static embed.FS + +// Service is the main application coordinator. +// It owns the HTTP server and all processing clients. +type Service struct { + cfg *config.Config + llm *llm.Client + log *slog.Logger + mux *http.ServeMux + sem *semaphore.Weighted + server *http.Server + stt *stt.Client + tmpl *template.Template + tools *tool.Registry + tts *tts.Client +} + +// New creates a Service from the provided configuration. +// It initializes all clients and the HTTP server. +func New(cfg *config.Config, log *slog.Logger) (*Service, error) { + var svc = &Service{ + cfg: cfg, + log: log, + mux: http.NewServeMux(), + sem: semaphore.NewWeighted(int64(cfg.Concurrency)), + } + + // Setup tool registry. + registry, err := tool.NewRegistry(cfg.Tools) + if err != nil { + return nil, fmt.Errorf("load tools: %v", err) + } + svc.tools = registry + + // Create STT client. + sttClient, err := stt.NewClient(cfg.STT, log) + if err != nil { + return nil, fmt.Errorf("create STT client: %v", err) + } + svc.stt = sttClient + + // Create LLM client. + llmClient, err := llm.NewClient(cfg.LLM, registry, log) + if err != nil { + return nil, fmt.Errorf("create LLM client: %v", err) + } + svc.llm = llmClient + + // Create TTS client. + ttsClient, err := tts.NewClient(cfg.TTS, log) + if err != nil { + return nil, fmt.Errorf("create TTS client: %v", err) + } + svc.tts = ttsClient + + // Parse templates. + tmpl, err := templates.Parse() + if err != nil { + return nil, fmt.Errorf("parse templates: %v", err) + } + svc.tmpl = tmpl + + // Setup static file server. + staticFS, err := fs.Sub(static, "static") + if err != nil { + return nil, fmt.Errorf("setup static fs: %v", err) + } + + // Register routes. + svc.mux.HandleFunc("GET /", svc.home) + svc.mux.HandleFunc("GET /status", svc.status) + svc.mux.Handle( + "GET /static/", + http.StripPrefix( + "/static/", http.FileServer(http.FS(staticFS)), + ), + ) + svc.mux.HandleFunc("POST /v1/chat/voice", svc.voice) + svc.mux.HandleFunc("POST /v1/chat/voice/stream", svc.voiceStream) + svc.mux.HandleFunc("GET /v1/voices", svc.voices) + svc.mux.HandleFunc("GET /v1/models", svc.models) + + svc.server = &http.Server{ + Addr: cfg.Address, + Handler: svc, + } + + return svc, nil +} + +// ServeHTTP implements http.Handler. It logs requests, assigns a UUID, +// sets context values, handles panics, and delegates to the mux. +func (svc *Service) ServeHTTP(w http.ResponseWriter, r *http.Request) { + var ( + start = time.Now() + id = uuid.NewString() + ip = func() string { + if ip := r.Header.Get("X-Forwarded-For"); ip != "" { + if idx := strings.Index(ip, ","); idx != -1 { + return strings.TrimSpace(ip[:idx]) + } + return ip + } + return r.RemoteAddr + }() + log = svc.log.With(slog.Group( + "request", + slog.String("id", id), + slog.String("ip", ip), + slog.String("method", r.Method), + slog.String("path", r.URL.Path), + )) + ) + + // Log completion time. + defer func() { + log.InfoContext( + r.Context(), + "completed", + slog.Duration("duration", time.Since(start)), + ) + }() + + // Panic recovery. + defer func() { + if err := recover(); err != nil { + log.ErrorContext( + r.Context(), + "panic recovered", + slog.Any("error", err), + slog.String("stack", string(debug.Stack())), + ) + http.Error( + w, + http.StatusText( + http.StatusInternalServerError, + ), + http.StatusInternalServerError, + ) + } + }() + + // Enrich context with request-scoped values. + ctx := r.Context() + ctx = context.WithValue(ctx, "log", log) + ctx = context.WithValue(ctx, "id", id) + ctx = context.WithValue(ctx, "ip", ip) + r = r.WithContext(ctx) + + log.InfoContext(ctx, "handling") + + // Pass the request on to the multiplexer. + svc.mux.ServeHTTP(w, r) +} + +// Run starts the service and blocks until shutdown. +// Shutdown is triggered by SIGINT or SIGTERM. +func (svc *Service) Run(ctx context.Context) error { + svc.log.Info( + "starting odidere", + slog.Int("concurrency", svc.cfg.Concurrency), + slog.Group( + "llm", + slog.String("url", svc.cfg.LLM.URL), + slog.String("model", svc.cfg.LLM.Model), + ), + slog.Group( + "server", + slog.String("address", svc.cfg.Address), + ), + slog.Group( + "stt", + slog.String("url", svc.cfg.STT.URL), + ), + slog.Group( + "tools", + slog.Int("count", len(svc.tools.List())), + slog.Any( + "names", + strings.Join(svc.tools.List(), ","), + ), + ), + slog.Group( + "tts", + slog.String("url", svc.cfg.TTS.URL), + slog.String("default_voice", svc.cfg.TTS.Voice), + ), + slog.Any("shutdown_timeout", svc.cfg.GetShutdownTimeout()), + ) + + // Setup signal handling for graceful shutdown. + ctx, cancel := signal.NotifyContext( + ctx, + os.Interrupt, syscall.SIGTERM, + ) + defer cancel() + + // Start HTTP server in background. + var errs = make(chan error, 1) + go func() { + svc.log.Info( + "HTTP server listening", + slog.String("address", svc.cfg.Address), + ) + if err := svc.server.ListenAndServe(); err != nil && + err != http.ErrServerClosed { + errs <- err + } + close(errs) + }() + + // Wait for shutdown signal or server error. + select { + case <-ctx.Done(): + svc.log.Info("shutdown signal received") + case err := <-errs: + if err != nil { + return fmt.Errorf("server error: %w", err) + } + } + + // Graceful shutdown with timeout. + shutdownCtx, shutdownCancel := context.WithTimeout( + context.Background(), + svc.cfg.GetShutdownTimeout(), + ) + defer shutdownCancel() + + svc.log.Info("shutting down HTTP server") + if err := svc.server.Shutdown(shutdownCtx); err != nil { + svc.log.Warn( + "shutdown timeout reached", + slog.Any("error", err), + ) + } + + svc.log.Info("terminating") + return nil +} + +func (svc *Service) home(w http.ResponseWriter, r *http.Request) { + var ( + ctx = r.Context() + log = ctx.Value("log").(*slog.Logger) + ) + + if r.URL.Path != "/" { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "text/html; charset=utf-8") + if err := svc.tmpl.ExecuteTemplate( + w, "index.gohtml", nil, + ); err != nil { + log.ErrorContext( + ctx, "template error", slog.Any("error", err), + ) + http.Error( + w, + http.StatusText(http.StatusInternalServerError), + http.StatusInternalServerError, + ) + } +} + +// status returns server status. +func (svc *Service) status(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) +} + +// Request is the incoming request format for chat and voice endpoints. +type Request struct { + // Audio is base64-encoded audio data (webm) for transcription. + Audio string `json:"audio,omitempty"` + // Messages is the conversation history. + Messages []openai.ChatCompletionMessage `json:"messages"` + // Model is the LLM model ID. If empty, the default model is used. + Model string `json:"model,omitempty"` + // Voice is the voice ID for TTS. + Voice string `json:"voice,omitempty"` +} + +// Response is the response format for chat and voice endpoints. +type Response struct { + // Audio is the base64-encoded WAV audio response. + Audio string `json:"audio,omitempty"` + // DetectedLanguage is the language detected in the input speech. + DetectedLanguage string `json:"detected_language,omitempty"` + // Messages is the full list of messages generated during the query, + // including tool calls and tool results. + Messages []openai.ChatCompletionMessage `json:"messages,omitempty"` + // Model is the LLM model used for the response. + Model string `json:"used_model,omitempty"` + // Transcription is the transcribed user speech from the input audio. + Transcription string `json:"transcription,omitempty"` + // Voice is the voice used for TTS synthesis. + Voice string `json:"used_voice,omitempty"` +} + +// voice processes voice requests with audio input/output. +func (svc *Service) voice(w http.ResponseWriter, r *http.Request) { + var ( + ctx = r.Context() + log = ctx.Value("log").(*slog.Logger) + ) + + // Parse request. + r.Body = http.MaxBytesReader(w, r.Body, 32<<20) + var req Request + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + log.ErrorContext( + ctx, + "failed to decode request", + slog.Any("error", err), + ) + http.Error(w, "invalid request", http.StatusBadRequest) + return + } + + // Validate messages. + if len(req.Messages) == 0 { + http.Error(w, "messages required", http.StatusBadRequest) + return + } + log.InfoContext(ctx, "messages", + slog.Any("data", req.Messages), + ) + + var ( + messages = req.Messages + transcription string + detectedLang string + ) + + // If audio provided, transcribe and append to last message. + if req.Audio != "" { + last := &messages[len(messages)-1] + if last.Role != openai.ChatMessageRoleUser { + http.Error( + w, + "last message must be role=user when audio is provided", + http.StatusBadRequest, + ) + return + } + + data, err := base64.StdEncoding.DecodeString(req.Audio) + if err != nil { + log.ErrorContext( + ctx, + "failed to decode audio", + slog.Any("error", err), + ) + http.Error(w, "invalid audio", http.StatusBadRequest) + return + } + + output, err := svc.stt.Transcribe(ctx, data) + if err != nil { + log.ErrorContext( + ctx, + "STT failed", + slog.Any("error", err), + ) + http.Error( + w, + "STT error", + http.StatusInternalServerError, + ) + return + } + + transcription = strings.TrimSpace(output.Text) + detectedLang = output.DetectedLanguage + if detectedLang == "" { + detectedLang = output.Language + } + log.InfoContext( + ctx, + "transcribed audio", + slog.String("text", transcription), + slog.String("language", detectedLang), + ) + + // Append transcription to last message's content. + switch { + // Already using MultiContent, append text part. + case len(last.MultiContent) > 0: + last.MultiContent = append(last.MultiContent, + openai.ChatMessagePart{ + Type: openai.ChatMessagePartTypeText, + Text: transcription, + }, + ) + last.Content = "" + + // Has string content, convert to MultiContent. + case last.Content != "": + last.MultiContent = []openai.ChatMessagePart{ + { + Type: openai.ChatMessagePartTypeText, + Text: last.Content, + }, + { + Type: openai.ChatMessagePartTypeText, + Text: transcription, + }, + } + last.Content = "" + // Empty message, just set content. + // Clear MultiContent, as required by the API spec. + default: + last.Content = transcription + last.MultiContent = nil + } + } + + // Get LLM response. + var model = req.Model + if model == "" { + model = svc.llm.DefaultModel() + } + msgs, err := svc.llm.Query(ctx, messages, model) + if err != nil { + log.ErrorContext( + ctx, + "LLM request failed", + slog.Any("error", err), + ) + http.Error(w, "LLM error", http.StatusInternalServerError) + return + } + if len(msgs) == 0 { + http.Error( + w, + "no response from LLM", + http.StatusInternalServerError, + ) + return + } + final := msgs[len(msgs)-1] + log.InfoContext( + ctx, + "LLM response", + slog.String("text", final.Content), + slog.String("model", model), + ) + + // Determine voice to use. + var voice = req.Voice + if req.Voice == "" && detectedLang != "" { + if autoVoice := svc.tts.SelectVoice( + detectedLang, + ); autoVoice != "" { + voice = autoVoice + log.InfoContext(ctx, "auto-selected voice", + slog.String("language", detectedLang), + slog.String("voice", voice), + ) + } + } else if req.Voice == "" { + log.WarnContext( + ctx, + "auto-voice enabled but no language detected", + ) + } + + // Generate audio response with selected voice. + audio, err := svc.tts.Synthesize(ctx, final.Content, voice) + if err != nil { + log.ErrorContext(ctx, "TTS failed", slog.Any("error", err)) + http.Error(w, "TTS error", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(Response{ + Audio: base64.StdEncoding.EncodeToString(audio), + DetectedLanguage: detectedLang, + Messages: msgs, + Model: model, + Transcription: transcription, + Voice: voice, + }); err != nil { + log.ErrorContext( + ctx, + "failed to json encode response", + slog.Any("error", err), + ) + } +} + +// StreamMessage is the SSE event payload for the streaming voice endpoint. +type StreamMessage struct { + // Audio is the base64-encoded WAV audio response. + Audio string `json:"audio,omitempty"` + // DetectedLanguage is the language detected in the input speech. + DetectedLanguage string `json:"detected_language,omitempty"` + // Error is an error message, if any. + Error string `json:"error,omitempty"` + // Message is the chat completion message. + Message openai.ChatCompletionMessage `json:"message"` + // Model is the LLM model used for the response. + Model string `json:"model,omitempty"` + // Transcription is the transcribed user speech from the input audio. + Transcription string `json:"transcription,omitempty"` + // Voice is the voice used for TTS synthesis. + Voice string `json:"voice,omitempty"` +} + +// voiceStream processes voice requests with streaming SSE output. +func (svc *Service) voiceStream(w http.ResponseWriter, r *http.Request) { + var ( + ctx = r.Context() + log = ctx.Value("log").(*slog.Logger) + ) + + // Check that the response writer supports flushing. + flusher, ok := w.(http.Flusher) + if !ok { + http.Error( + w, + "streaming not supported", + http.StatusInternalServerError, + ) + return + } + + // Parse request. + r.Body = http.MaxBytesReader(w, r.Body, 32<<20) + var req Request + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + log.ErrorContext( + ctx, + "failed to decode request", + slog.Any("error", err), + ) + http.Error(w, "invalid request", http.StatusBadRequest) + return + } + + // Validate messages. + if len(req.Messages) == 0 { + http.Error(w, "messages required", http.StatusBadRequest) + return + } + + // Acquire semaphore. + if err := svc.sem.Acquire(ctx, 1); err != nil { + http.Error( + w, + "service unavailable", + http.StatusServiceUnavailable, + ) + return + } + defer svc.sem.Release(1) + + var ( + messages = req.Messages + transcription string + detectedLang string + ) + + // If audio provided, transcribe and append to last message. + if req.Audio != "" { + last := &messages[len(messages)-1] + if last.Role != openai.ChatMessageRoleUser { + http.Error( + w, + "last message must be role=user when audio is provided", + http.StatusBadRequest, + ) + return + } + + data, err := base64.StdEncoding.DecodeString(req.Audio) + if err != nil { + log.ErrorContext( + ctx, + "failed to decode audio", + slog.Any("error", err), + ) + http.Error(w, "invalid audio", http.StatusBadRequest) + return + } + + output, err := svc.stt.Transcribe(ctx, data) + if err != nil { + log.ErrorContext( + ctx, + "STT failed", + slog.Any("error", err), + ) + http.Error( + w, + "STT error", + http.StatusInternalServerError, + ) + return + } + + transcription = strings.TrimSpace(output.Text) + detectedLang = output.DetectedLanguage + if detectedLang == "" { + detectedLang = output.Language + } + log.InfoContext( + ctx, + "transcribed audio", + slog.String("text", transcription), + slog.String("language", detectedLang), + ) + + // Append transcription to last message's content. + switch { + case len(last.MultiContent) > 0: + last.MultiContent = append(last.MultiContent, + openai.ChatMessagePart{ + Type: openai.ChatMessagePartTypeText, + Text: transcription, + }, + ) + last.Content = "" + case last.Content != "": + last.MultiContent = []openai.ChatMessagePart{ + { + Type: openai.ChatMessagePartTypeText, + Text: last.Content, + }, + { + Type: openai.ChatMessagePartTypeText, + Text: transcription, + }, + } + last.Content = "" + default: + last.Content = transcription + last.MultiContent = nil + } + } + + // Set SSE headers. + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("Content-Type", "text/event-stream") + + // Helper to send an SSE event. + send := func(msg StreamMessage) { + data, err := json.Marshal(msg) + if err != nil { + log.ErrorContext(ctx, "failed to marshal SSE event", + slog.Any("error", err), + ) + return + } + fmt.Fprintf(w, "event: message\ndata: %s\n\n", data) + flusher.Flush() + } + + // If audio was transcribed, send user message with transcription. + if transcription != "" { + send(StreamMessage{ + Message: openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleUser, + Content: transcription, + }, + Transcription: transcription, + DetectedLanguage: detectedLang, + }) + } + + // Get model. + var model = req.Model + if model == "" { + model = svc.llm.DefaultModel() + } + + // Determine voice to use. + var voice = req.Voice + if req.Voice == "" && detectedLang != "" { + if autoVoice := svc.tts.SelectVoice( + detectedLang, + ); autoVoice != "" { + voice = autoVoice + log.InfoContext(ctx, "auto-selected voice", + slog.String("language", detectedLang), + slog.String("voice", voice), + ) + } + } + + // Start streaming LLM query. + var ( + events = make(chan llm.StreamEvent) + llmErr error + ) + go func() { + llmErr = svc.llm.QueryStream(ctx, messages, model, events) + }() + + // Consume events and send as SSE. + var last StreamMessage + for evt := range events { + msg := StreamMessage{Message: evt.Message} + + // Track the last assistant message for TTS. + if evt.Message.Role == openai.ChatMessageRoleAssistant && + len(evt.Message.ToolCalls) == 0 { + last = msg + continue + } + + send(msg) + } + // Check for LLM errors. + if llmErr != nil { + log.ErrorContext( + ctx, + "LLM stream failed", + slog.Any("error", llmErr), + ) + send(StreamMessage{ + Message: openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleAssistant, + }, + Error: fmt.Sprintf("LLM error: %v", llmErr), + }) + return + } + + // Synthesize TTS for the final assistant message. + if last.Message.Content != "" { + audio, err := svc.tts.Synthesize( + ctx, last.Message.Content, voice, + ) + if err != nil { + log.ErrorContext( + ctx, "TTS failed", slog.Any("error", err), + ) + last.Error = fmt.Sprintf("TTS error: %v", err) + } else { + last.Audio = base64.StdEncoding.EncodeToString(audio) + } + } + last.Model = model + last.Voice = voice + send(last) +} + +// models returns available LLM models. +func (svc *Service) models(w http.ResponseWriter, r *http.Request) { + var ( + ctx = r.Context() + log = ctx.Value("log").(*slog.Logger) + ) + + models, err := svc.llm.ListModels(ctx) + if err != nil { + log.ErrorContext( + ctx, + "failed to list models", + slog.Any("error", err), + ) + http.Error( + w, + "failed to list models", + http.StatusInternalServerError, + ) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(struct { + Models []openai.Model `json:"models"` + DefaultModel string `json:"default_model"` + }{ + Models: models, + DefaultModel: svc.llm.DefaultModel(), + }); err != nil { + log.ErrorContext( + ctx, + "failed to encode models response", + slog.Any("error", err), + ) + } +} + +// voices returns available TTS voices. +func (svc *Service) voices(w http.ResponseWriter, r *http.Request) { + var ( + ctx = r.Context() + log = ctx.Value("log").(*slog.Logger) + ) + + voices, err := svc.tts.ListVoices(ctx) + if err != nil { + log.ErrorContext( + ctx, + "failed to list voices", + slog.Any("error", err), + ) + http.Error( + w, + "failed to list voices", + http.StatusInternalServerError, + ) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string][]string{ + "voices": voices, + }); err != nil { + log.ErrorContext( + ctx, + "failed to encode voices response", + slog.Any("error", err), + ) + } +} diff --git a/internal/service/service_test.go b/internal/service/service_test.go new file mode 100644 index 0000000..e77f653 --- /dev/null +++ b/internal/service/service_test.go @@ -0,0 +1,24 @@ +package service + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestStatusHandler(t *testing.T) { + svc := &Service{} + + req := httptest.NewRequest(http.MethodGet, "/status", nil) + w := httptest.NewRecorder() + + svc.status(w, req) + + if w.Code != http.StatusOK { + t.Errorf( + "status handler returned %d, want %d", + w.Code, + http.StatusOK, + ) + } +} diff --git a/internal/service/static/icons.svg b/internal/service/static/icons.svg new file mode 100644 index 0000000..58f1447 --- /dev/null +++ b/internal/service/static/icons.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/internal/service/static/main.css b/internal/service/static/main.css new file mode 100644 index 0000000..3525856 --- /dev/null +++ b/internal/service/static/main.css @@ -0,0 +1,692 @@ +:root { + --base-font-size: calc(1rem + 0.1618vw); + --ratio: 1.618; + --s-2: calc(var(--s-1) / var(--ratio)); + --s-1: calc(var(--s0) / var(--ratio)); + --s0: var(--base-font-size); + --s1: calc(var(--s0) * var(--ratio)); + --s2: calc(var(--s1) * var(--ratio)); + --s3: calc(var(--s2) * var(--ratio)); + + --font-sans: system-ui, -apple-system, sans-serif; + --font-mono: ui-monospace, monospace; + + --color-black: #1d1f21; + --color-blue: #4271ae; + --color-brown: #a3685a; + --color-cyan: #3e999f; + --color-gray0: #efefef; + --color-gray1: #e0e0e0; + --color-gray2: #d6d6d6; + --color-gray3: #8e908c; + --color-gray4: #969896; + --color-gray5: #4d4d4c; + --color-gray6: #282a2e; + --color-green: #718c00; + --color-orange: #f5871f; + --color-purple: #8959a8; + --color-red: #c82829; + --color-yellow: #eab700; + + --color-bg: var(--color-gray0); + --color-surface: white; + --color-text: var(--color-black); + --color-text-muted: var(--color-gray3); + --color-border: var(--color-black); + --color-border-light: var(--color-gray2); + --color-primary: var(--color-blue); + --color-primary-hover: var(--color-cyan); + --color-recording: var(--color-red); + --color-error: var(--color-yellow); + + --measure: 80ch; + --radius: 0.375rem; + --icon-size: 1.25rem; + --action-icon-size: 0.875rem; + --border-width: 2px; + --textarea-line-height: 1.5rem; +} + +/* ==================== */ +/* Reset */ +/* ==================== */ + +*, +*::before, +*::after { + box-sizing: border-box; + margin: 0; +} + +/* ==================== */ +/* Base */ +/* ==================== */ + +html { + font-size: 100%; + font-family: var(--font-sans); + line-height: 1.5; + color: var(--color-text); + background: var(--color-bg); +} + +body { + overflow: hidden; + min-height: 100dvh; +} + +/* ==================== */ +/* Layout */ +/* ==================== */ + +.container { + display: flex; + flex-direction: column; + min-height: 100dvh; + height: 100dvh; + max-width: calc(var(--measure) + var(--s3) * 2); + margin-inline: auto; + padding: var(--s0); + padding-bottom: 0; +} + +/* ==================== */ +/* Chat */ +/* ==================== */ + +.chat { + flex: 1 1 0; + min-height: 0; + overflow-y: auto; + padding: var(--s1); + background: var(--color-surface); + border: 1px solid var(--color-border-light); + border-radius: var(--radius); +} + +.chat > * + * { + margin-top: var(--s0); +} + +/* ==================== */ +/* Collapsible */ +/* ==================== */ + +.collapsible { + border: 1px dashed var(--color-border-light); + border-radius: var(--radius); +} + +.collapsible[open] .collapsible__summary::before { + transform: rotate(90deg); +} + +.collapsible:last-of-type:has(~ .message__actions:empty) { + border-bottom: none; +} + +.collapsible__content { + padding: var(--s-2) var(--s-1); + border-top: 1px dashed var(--color-border-light); +} + +.collapsible__content > pre { + margin: 0; + font-family: var(--font-mono); + font-size: var(--s-1); + white-space: pre-wrap; + word-break: break-word; +} + +.collapsible__label { + font-size: var(--s-1); + color: var(--color-text-muted); +} + +.collapsible__pre { + margin: 0; + padding: var(--s-2); + font-family: var(--font-mono); + font-size: var(--s-1); + background: var(--color-bg); + border-radius: var(--radius); + overflow-x: auto; + white-space: pre-wrap; + word-break: break-word; +} + +.collapsible__section { + margin-bottom: var(--s-2); +} + +.collapsible__section:last-child { + margin-bottom: 0; +} + +.collapsible__section-label { + font-size: var(--s-2); + font-weight: 500; + color: var(--color-text-muted); + margin-bottom: 0.25rem; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.collapsible__summary { + display: flex; + align-items: center; + gap: var(--s-2); + padding: var(--s-2) var(--s-1); + cursor: pointer; + user-select: none; + list-style: none; +} + +.collapsible__summary::-webkit-details-marker { + display: none; +} + +.collapsible__summary::before { + content: "▶"; + font-size: 0.625em; + color: var(--color-text-muted); + transition: transform 0.15s ease; +} + +.collapsible__summary .icon { + width: var(--icon-size); + height: var(--icon-size); + color: var(--color-text-muted); +} + +.collapsible--reasoning .collapsible__summary .icon { + color: var(--color-purple); +} + +.collapsible--reasoning:has(+ .collapsible--tool) { + margin-bottom: 0; + border-bottom-left-radius: 0; + border-bottom-right-radius: 0; +} + +.collapsible--reasoning + .collapsible--tool { + margin-top: 0; + border-top: none; + border-top-left-radius: 0; + border-top-right-radius: 0; +} + +.collapsible--tool .collapsible__summary .icon { + color: var(--color-orange); +} + +/* ==================== */ +/* Compose */ +/* ==================== */ + +.compose { + background: var(--color-surface); + border: 1px solid var(--color-border-light); + border-radius: var(--radius); + overflow: hidden; + margin-bottom: var(--s0); +} + +.compose__action-btn { + display: flex; + align-items: center; + justify-content: center; + padding: 0.25rem; + background: transparent; + border: none; + border-radius: 0.25rem; + color: var(--color-text-muted); + cursor: pointer; + transition: + color 0.15s ease, + background-color 0.15s ease; +} + +@media (hover: hover) { + .compose__action-btn:hover { + color: var(--color-text); + } +} + +.compose__action-btn:focus-visible { + outline: 2px solid var(--color-primary); + outline-offset: 1px; +} + +.compose__action-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} + +.compose__action-btn .icon { + width: var(--action-icon-size); + height: var(--action-icon-size); +} + +.compose__action-btn--record.recording { + color: white; + background: var(--color-recording); + animation: pulse 1s ease-in-out infinite; +} + +.compose__action-btn--send.loading { + position: relative; + color: transparent; + pointer-events: none; +} + +.compose__action-btn--send.loading::after { + content: ""; + position: absolute; + width: 0.75rem; + height: 0.75rem; + border: 2px solid var(--color-gray2); + border-top-color: var(--color-primary); + border-radius: 50%; + animation: spin 0.8s linear infinite; +} + +.compose__actions { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.125rem var(--s-2); + border-top: 1px dotted var(--color-border-light); +} + +.compose__actions-right { + display: flex; + align-items: center; + gap: 0.125rem; +} + +.compose__attachment { + display: flex; + align-items: center; + gap: 0.25rem; + padding: 0.125rem 0.375rem; + font-size: var(--s-1); + background: var(--color-gray1); + border-radius: var(--radius); + color: var(--color-text); +} + +.compose__attachment-name { + max-width: 120px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.compose__attachment-remove { + display: flex; + align-items: center; + justify-content: center; + padding: 0; + background: none; + border: none; + color: var(--color-text-muted); + cursor: pointer; + transition: color 0.15s ease; +} + +@media (hover: hover) { + .compose__attachment-remove:hover { + color: var(--color-red); + } +} + +.compose__attachment-remove .icon { + width: 0.875rem; + height: 0.875rem; +} + +.compose__attachments { + display: flex; + flex-wrap: wrap; + gap: var(--s-2); + padding: var(--s-2) var(--s-1); + border-top: 1px dotted var(--color-border-light); +} + +.compose__attachments:empty { + display: none; +} + +.compose__textarea { + display: block; + width: 100%; + height: auto; + min-height: calc(var(--textarea-line-height) + var(--s-1) * 2); + max-height: calc(var(--textarea-line-height) * 5 + var(--s-1) * 2); + padding: var(--s-1); + font-family: inherit; + font-size: var(--s0); + line-height: var(--textarea-line-height); + color: var(--color-text); + background: var(--color-surface); + border: none; + resize: none; + overflow-y: auto; + field-sizing: content; +} + +.compose__textarea:focus { + outline: none; +} + +.compose__textarea::placeholder { + color: var(--color-text-muted); +} + +/* ==================== */ +/* Footer */ +/* ==================== */ + +.footer { + margin-top: var(--s0); + background: var(--color-bg); +} + +.footer__select { + height: 2rem; + padding: 0 var(--s-1); + font-family: inherit; + font-size: var(--s-1); + line-height: 2rem; + color: var(--color-text); + background: var(--color-surface); + border: 1px solid var(--color-border-light); + border-radius: var(--radius); + cursor: pointer; + max-width: 150px; +} + +.footer__select:focus-visible { + outline: 2px solid var(--color-primary); + outline-offset: 2px; +} + +.footer__toolbar { + border-inline: none; + border-bottom: none; + display: flex; + align-items: center; + gap: var(--s-2); + padding: var(--s-2); + background: var(--color-surface); + border: 1px solid var(--color-border-light); + border-radius: var(--radius); +} + +.footer__toolbar-btn { + display: flex; + align-items: center; + justify-content: center; + height: 2rem; + width: 2rem; + padding: 0; + background: transparent; + border: 1px solid var(--color-border-light); + border-radius: var(--radius); + color: var(--color-text-muted); + cursor: pointer; + transition: all 0.15s ease; +} + +@media (hover: hover) { + .footer__toolbar-btn:hover { + color: var(--color-text); + border-color: var(--color-text-muted); + } +} + +.footer__toolbar-btn:focus-visible { + outline: 2px solid var(--color-primary); + outline-offset: 2px; +} + +.footer__toolbar-btn .icon { + width: 1rem; + height: 1rem; +} + +.footer__toolbar-btn--muted { + color: var(--color-red); + border-color: var(--color-red); +} + +@media (hover: hover) { + .footer__toolbar-btn--muted:hover { + color: var(--color-red); + border-color: var(--color-red); + } +} + +.footer__toolbar-spacer { + flex: 1; +} + +/* ==================== */ +/* Message */ +/* ==================== */ + +.message { + display: flex; + max-width: 100%; + border: var(--border-width) solid var(--color-border); + border-radius: var(--radius); + overflow: hidden; +} + +.message--assistant .message__icon { + color: var(--color-primary); +} + +.message--debug-open .message__debug { + display: block; +} + +.message--error { + border-color: var(--color-error); +} + +.message--error .message__icon { + border-color: var(--color-error); +} + +.message--user .message__icon { + color: var(--color-green); +} + +.message__action-btn { + display: flex; + align-items: center; + justify-content: center; + padding: 0.125rem; + background: transparent; + border: none; + border-radius: 0.125rem; + color: var(--color-text-muted); + cursor: pointer; + transition: color 0.15s ease; +} + +@media (hover: hover) { + .message__action-btn:hover { + color: var(--color-text); + } +} + +.message__action-btn:focus-visible { + outline: 2px solid var(--color-primary); + outline-offset: 1px; +} + +.message__action-btn svg { + width: var(--action-icon-size); + height: var(--action-icon-size); +} + +.message__action-btn--success { + color: var(--color-primary); +} + +.message__actions { + display: flex; + justify-content: flex-end; + gap: 0.125rem; + padding: 0.125rem 0.25rem; + border-top: 1px dotted var(--color-border-light); + background: var(--color-surface); +} + +.message__actions:empty { + display: none; +} + +.message__body { + flex: 1; + min-width: 0; + display: flex; + flex-direction: column; +} + +.message__content { + padding: var(--s-2) var(--s-1); + white-space: pre-wrap; + word-wrap: break-word; + overflow-wrap: break-word; +} + +.message__content img { + display: block; + max-width: 100%; + height: auto; + margin-block: var(--s-1); + border: 1px solid var(--color-border-light); + border-radius: var(--radius); +} + +.message__debug { + display: none; + padding: var(--s-2) var(--s-1); + border-top: 1px dotted var(--color-border-light); + background: var(--color-surface); +} + +.message__debug-list { + display: grid; + grid-template-columns: auto 1fr; + gap: 0.125rem 1rem; + margin: 0; + font-size: var(--s-1); +} + +.message__debug-list dt, +.message__debug-list dd { + margin: 0; + text-align: left; + color: var(--color-text); +} + +.message__debug-list dt { + white-space: nowrap; +} + +.message__icon { + display: flex; + align-items: center; + justify-content: center; + padding: var(--s-2); + border-right: var(--border-width) solid var(--color-border); + background: var(--color-bg); + color: var(--color-text-muted); +} + +.message__icon svg { + width: var(--icon-size); + height: var(--icon-size); +} + +/* ==================== */ +/* Animations */ +/* ==================== */ + +@keyframes pulse { + 0%, + 100% { + opacity: 1; + } + 50% { + opacity: 0.7; + } +} + +@keyframes spin { + to { + transform: rotate(360deg); + } +} + +/* ==================== */ +/* Media: Mobile */ +/* ==================== */ + +@media (max-width: 639px) { + .container { + padding: 0; + padding-top: var(--s-1); + } + + .chat { + border-radius: 0; + border-inline: none; + } + + .footer { + margin-top: var(--s-1); + padding-bottom: 0; + } + + .compose { + border-radius: 0; + border-inline: none; + margin-bottom: var(--s-1); + } + + .compose__action-btn { + min-width: 44px; + min-height: 44px; + padding: var(--s-1); + } + + .compose__action-btn .icon { + width: var(--icon-size); + height: var(--icon-size); + } + + .footer__toolbar { + border-inline: none; + border-bottom: none; + border-radius: 0; + margin-top: 0; + } + + .footer__toolbar-btn { + height: 1.75rem; + width: 1.75rem; + } + + .footer__select { + height: 1.75rem; + line-height: 1.75rem; + max-width: 128px; + font-size: var(--s-1); + } +} diff --git a/internal/service/static/main.js b/internal/service/static/main.js new file mode 100644 index 0000000..7076bee --- /dev/null +++ b/internal/service/static/main.js @@ -0,0 +1,1372 @@ +const STREAM_ENDPOINT = '/v1/chat/voice/stream'; +const ICONS_URL = '/static/icons.svg'; +const MODELS_ENDPOINT = '/v1/models'; +const MODEL_KEY = 'odidere_model'; +const STORAGE_KEY = 'odidere_history'; +const VOICES_ENDPOINT = '/v1/voices'; +const VOICE_KEY = 'odidere_voice'; + +/** + * Odidere is the main application class for the voice assistant UI. + * It manages audio recording, chat history, and communication with the API. + * Creates a new instance bound to the given document. + * @param {Object} options + * @param {Document} options.document + */ +class Odidere { + constructor({ document }) { + this.document = document; + + // State + this.attachments = []; + this.audioChunks = []; + this.currentAudio = null; + this.currentAudioUrl = null; + this.currentController = null; + this.history = []; + this.isProcessing = false; + this.isRecording = false; + this.isMuted = false; + this.mediaRecorder = null; + + // DOM Elements + this.$attach = document.getElementById('attach'); + this.$attachments = document.getElementById('attachments'); + this.$chat = document.getElementById('chat'); + this.$fileInput = document.getElementById('file-input'); + this.$model = document.getElementById('model'); + this.$ptt = document.getElementById('ptt'); + this.$reset = document.getElementById('reset'); + this.$send = document.getElementById('send'); + this.$textInput = document.getElementById('text-input'); + this.$voice = document.getElementById('voice'); + this.$mute = document.getElementById('mute'); + + // Templates + this.$tplAssistantMessage = document.getElementById( + 'tpl-assistant-message', + ); + this.$tplAttachmentChip = document.getElementById('tpl-attachment-chip'); + this.$tplCollapsible = document.getElementById('tpl-collapsible'); + this.$tplDebugRow = document.getElementById('tpl-debug-row'); + this.$tplErrorMessage = document.getElementById('tpl-error-message'); + this.$tplToolCall = document.getElementById('tpl-tool-call'); + this.$tplUserMessage = document.getElementById('tpl-user-message'); + + this.#init(); + } + + // ==================== + // PUBLIC API + // ==================== + /** + * destroy releases all resources including media streams and audio. + */ + destroy() { + for (const track of this.mediaRecorder?.stream?.getTracks() ?? []) { + track.stop(); + } + this.#stopCurrentAudio(); + this.currentController?.abort(); + } + + /** + * reset clears all state including history, attachments, and pending + * requests. + */ + reset() { + this.#stopCurrentAudio(); + + if (this.currentController) { + this.currentController.abort(); + this.currentController = null; + } + + this.#setLoadingState(false); + this.history = []; + localStorage.removeItem(STORAGE_KEY); + this.$chat.innerHTML = ''; + this.#clearAttachments(); + this.$textInput.value = ''; + this.$textInput.style.height = 'auto'; + } + + // ==================== + // INITIALIZATION + // ==================== + /** + * #init initializes the application. + */ + #init() { + this.#loadHistory(); + this.#bindEvents(); + this.#fetchVoices(); + this.#fetchModels(); + } + + /** + * #bindEvents attaches all event listeners to DOM elements. + */ + #bindEvents() { + // PTT button: touch + this.$ptt.addEventListener('touchstart', this.#handlePttTouchStart, { + passive: false, + }); + this.$ptt.addEventListener('touchend', this.#handlePttTouchEnd); + this.$ptt.addEventListener('touchcancel', this.#handlePttTouchEnd); + // Prevent context menu on long press. + this.$ptt.addEventListener('contextmenu', (e) => e.preventDefault()); + + // PTT button: mouse + this.$ptt.addEventListener('mousedown', this.#handlePttMouseDown); + this.$ptt.addEventListener('mouseup', this.#handlePttMouseUp); + this.$ptt.addEventListener('mouseleave', this.#handlePttMouseUp); + + // Keyboard: spacebar PTT (outside inputs) + this.document.addEventListener('keydown', this.#handleKeyDown); + this.document.addEventListener('keyup', this.#handleKeyUp); + + // Textarea: Enter to send, Shift+Enter for newline + this.$textInput.addEventListener('keydown', this.#handleTextareaKeyDown); + this.$textInput.addEventListener('input', this.#handleTextareaInput); + + // Send button + this.$send.addEventListener('click', () => this.#submitText()); + + // Reset button + this.$reset.addEventListener('click', () => this.reset()); + + // File attachment + this.$attach.addEventListener('click', () => this.$fileInput.click()); + this.$fileInput.addEventListener('change', (e) => + this.#handleAttachments(e.target.files), + ); + + // Save selections on change. + this.$model.addEventListener('change', () => { + localStorage.setItem(MODEL_KEY, this.$model.value); + }); + this.$voice.addEventListener('change', () => { + localStorage.setItem(VOICE_KEY, this.$voice.value); + }); + // Mute button + this.$mute.addEventListener('click', () => this.#toggleMute()); + } + + /** + * #loadHistory loads chat history from localStorage and renders it. + */ + #loadHistory() { + try { + const stored = localStorage.getItem(STORAGE_KEY); + if (!stored) return; + + this.history = JSON.parse(stored); + this.#renderMessages(this.history); + } catch (e) { + console.error('failed to load history:', e); + this.history = []; + } + } + + // ==================== + // EVENT HANDLERS + // ==================== + /** + * #handlePttTouchStart handles touch start on the PTT button. + * @param {TouchEvent} event + */ + #handlePttTouchStart = (event) => { + event.preventDefault(); + event.stopPropagation(); + this.#startRecording(); + }; + + /** + * #handlePttTouchEnd handles touch end on the PTT button. + * @param {TouchEvent} event + */ + #handlePttTouchEnd = (event) => { + event.stopPropagation(); + this.#stopRecording(); + }; + + /** + * #handlePttMouseDown handles mouse down on the PTT button. + * Ignores events that originate from touch to avoid double-firing. + * @param {MouseEvent} event + */ + #handlePttMouseDown = (event) => { + if (event.sourceCapabilities?.firesTouchEvents) return; + event.preventDefault(); + event.stopPropagation(); + this.#startRecording(); + }; + + /** + * #handlePttMouseUp handles mouse up on the PTT button. + * @param {MouseEvent} event + */ + #handlePttMouseUp = (event) => { + if (event.sourceCapabilities?.firesTouchEvents) return; + event.stopPropagation(); + this.#stopRecording(); + }; + + /** + * #handleKeyDown handles keydown events for spacebar PTT. + * Only triggers when focus is not in an input element. + * @param {KeyboardEvent} event + */ + #handleKeyDown = (event) => { + if (event.code !== 'Space') return; + if (event.repeat) return; + + const isInput = + event.target.tagName === 'INPUT' || + event.target.tagName === 'TEXTAREA' || + event.target.tagName === 'SELECT'; + if (isInput) return; + + event.preventDefault(); + this.#startRecording(); + }; + + /** + * #handleKeyUp handles keyup events for spacebar PTT. + * @param {KeyboardEvent} event + */ + #handleKeyUp = (event) => { + if (event.code !== 'Space') return; + + const isInput = + event.target.tagName === 'INPUT' || + event.target.tagName === 'TEXTAREA' || + event.target.tagName === 'SELECT'; + if (isInput) return; + + event.preventDefault(); + this.#stopRecording(); + }; + + /** + * #handleTextareaKeyDown handles Enter key to submit text. + * Shift+Enter inserts a newline instead. + * @param {KeyboardEvent} event + */ + #handleTextareaKeyDown = (event) => { + if (event.code !== 'Enter') return; + if (event.shiftKey) return; + + event.preventDefault(); + this.#submitText(); + }; + + /** + * #handleTextareaInput adjusts textarea height to fit content. + */ + #handleTextareaInput = () => { + const textarea = this.$textInput; + textarea.style.height = 'auto'; + const computed = getComputedStyle(textarea); + const maxHeight = parseFloat(computed.maxHeight); + const newHeight = Math.min(textarea.scrollHeight, maxHeight); + textarea.style.height = `${newHeight}px`; + }; + + /** + * #handleAttachments adds files to the attachment list. + * Duplicates are ignored based on name and size. + * @param {FileList} files + */ + #handleAttachments(files) { + for (const file of files) { + const isDuplicate = this.attachments.some( + (a) => + a.name === file.name && + a.size === file.size && + a.lastModified === file.lastModified, + ); + if (isDuplicate) continue; + + this.attachments.push(file); + } + this.#renderAttachments(); + this.$fileInput.value = ''; + } + + // ==================== + // AUDIO RECORDING + // ==================== + /** + * #startRecording begins audio recording if not already recording or + * processing. + * Initializes the MediaRecorder on first use. + */ + async #startRecording() { + if (this.isRecording) return; + if (this.isProcessing) return; + + this.#stopCurrentAudio(); + + // Initialize MediaRecorder on first use + if (!this.mediaRecorder) { + const success = await this.#initMediaRecorder(); + if (!success) return; + } + + this.audioChunks = []; + this.mediaRecorder.start(); + this.#setRecordingState(true); + } + + /** + * #stopRecording stops the current audio recording. + */ + #stopRecording() { + if (!this.isRecording) return; + if (!this.mediaRecorder) return; + + this.mediaRecorder.stop(); + this.#setRecordingState(false); + } + + /** + * #initMediaRecorder initializes the MediaRecorder with microphone access. + * Prefers opus codec for better compression if available. + * @returns {Promise} true if successful + */ + async #initMediaRecorder() { + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') + ? 'audio/webm;codecs=opus' + : 'audio/webm'; + + this.mediaRecorder = new MediaRecorder(stream, { mimeType }); + + this.mediaRecorder.addEventListener('dataavailable', (event) => { + if (event.data.size > 0) { + this.audioChunks.push(event.data); + } + }); + + this.mediaRecorder.addEventListener('stop', async () => { + if (this.audioChunks.length === 0) return; + + const audioBlob = new Blob(this.audioChunks, { + type: this.mediaRecorder.mimeType, + }); + this.audioChunks = []; + + // Capture and clear text input to send with audio. + const text = this.$textInput.value.trim(); + this.$textInput.value = ''; + this.$textInput.style.height = 'auto'; + + await this.#sendRequest({ audio: audioBlob, text }); + }); + + this.mediaRecorder.addEventListener('error', (event) => { + console.error('MediaRecorder error:', event.error); + this.#setRecordingState(false); + this.audioChunks = []; + this.#renderError( + `Recording error: ${event.error?.message || 'Unknown error'}`, + ); + }); + + return true; + } catch (e) { + console.error('failed to initialize media recorder:', e); + this.#renderError(`Microphone access denied: ${e.message}`); + return false; + } + } + + // ==================== + // AUDIO PLAYBACK + // ==================== + /** + * #playAudio decodes and plays base64-encoded WAV audio. + * @param {string} base64Audio + */ + async #playAudio(base64Audio) { + try { + this.#stopCurrentAudio(); + + const audioData = Uint8Array.from(atob(base64Audio), (c) => + c.charCodeAt(0), + ); + const audioBlob = new Blob([audioData], { type: 'audio/wav' }); + const audioUrl = URL.createObjectURL(audioBlob); + + this.currentAudioUrl = audioUrl; + this.currentAudio = new Audio(audioUrl); + this.currentAudio.muted = !!this.isMuted; + + await new Promise((resolve, reject) => { + this.currentAudio.addEventListener( + 'ended', + () => { + this.#cleanupAudio(); + resolve(); + }, + { once: true }, + ); + this.currentAudio.addEventListener( + 'error', + (e) => { + this.#cleanupAudio(); + reject(e); + }, + { once: true }, + ); + this.currentAudio.play().catch(reject); + }); + } catch (e) { + console.error('failed to play audio:', e); + this.#cleanupAudio(); + } + } + + /** + * #stopCurrentAudio stops and cleans up any playing audio. + */ + #stopCurrentAudio() { + if (this.currentAudio) { + this.currentAudio.pause(); + this.currentAudio.currentTime = 0; + } + this.#cleanupAudio(); + } + + /** + * #cleanupAudio revokes the object URL and clears audio references. + */ + #cleanupAudio() { + if (this.currentAudioUrl) { + URL.revokeObjectURL(this.currentAudioUrl); + this.currentAudioUrl = null; + } + this.currentAudio = null; + } + + // ==================== + // API: FETCH + // ==================== + /** + * #fetchModels fetches available models from the API and populates selectors. + */ + async #fetchModels() { + try { + const res = await fetch(MODELS_ENDPOINT); + if (!res.ok) throw new Error(`${res.status}`); + const data = await res.json(); + this.#populateModels(data.models, data.default_model); + } catch (e) { + console.error('failed to fetch models:', e); + this.#populateModelsFallback(); + } + } + + /** + * #fetchVoices fetches available voices from the API and populates + * selectors. + */ + async #fetchVoices() { + try { + const res = await fetch(VOICES_ENDPOINT); + if (!res.ok) throw new Error(`${res.status}`); + const data = await res.json(); + + // Filter out legacy v0 voices. + const filtered = data.voices.filter((v) => !v.includes('_v0')); + this.#populateVoiceSelect(filtered); + } catch (e) { + console.error('failed to fetch voices:', e); + this.$voice.innerHTML = ''; + + const $opt = this.document.createElement('option'); + $opt.value = ''; + $opt.disabled = true; + $opt.selected = true; + $opt.textContent = 'Failed to load'; + this.$voice.appendChild($opt); + } + } + + // ==================== + // API: CHAT + // ==================== + + /** + * #submitText submits the current text input and attachments. + */ + async #submitText() { + const text = this.$textInput.value.trim(); + const hasContent = text || this.attachments.length > 0; + + if (!hasContent) return; + if (this.isProcessing) return; + + this.#stopCurrentAudio(); + this.$textInput.value = ''; + this.$textInput.style.height = 'auto'; + + await this.#sendRequest({ text }); + } + + /** + * #sendRequest sends a chat request to the streaming SSE endpoint. + * + * For text-only input, renders the user message immediately for + * responsiveness. + * For audio input, waits for the server's transcription event before + * rendering the user message. + * + * Messages are rendered incrementally as SSE events arrive: + * - user (transcription), assistant (tool calls), tool (results), + * and a final assistant message with synthesized audio. + * + * @param {Object} options + * @param {Blob} [options.audio] - Recorded audio blob + * @param {string} [options.text] - Text input + */ + async #sendRequest({ audio = null, text = '' }) { + this.#setLoadingState(true); + this.currentController = new AbortController(); + + try { + // Build the user message with text and attachments. + const userMessage = await this.#buildUserMessage(text, this.attachments); + const hasContent = text || this.attachments.length > 0 || audio; + if (!hasContent) { + this.#setLoadingState(false); + return; + } + + // Build messages array: history + current user message. + const messages = [ + ...this.history.map(({ id, meta, ...msg }) => msg), + userMessage, + ]; + + const payload = { + messages, + voice: this.$voice.value, + model: this.$model.value, + }; + if (audio) { + payload.audio = await this.#toBase64(audio); + } + + // Clear attachments after building payload (before async operations). + this.#clearAttachments(); + + // For text-only requests (no audio), add to history and render + // immediately. + if (!audio) { + this.#appendHistory([userMessage]); + this.#renderMessages([userMessage]); + } + + const res = await fetch(STREAM_ENDPOINT, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload), + signal: this.currentController.signal, + }); + if (!res.ok) { + const err = await res.text(); + throw new Error(`server error ${res.status}: ${err}`); + } + // Parse SSE stream from response body. + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let sseBuffer = ''; + + // Stash assistant messages with tool_calls until their results arrive. + let pendingTools = null; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + sseBuffer += decoder.decode(value, { stream: true }); + + // Process complete SSE events (separated by double newlines). + const parts = sseBuffer.split('\n\n'); + sseBuffer = parts.pop(); + + for (const part of parts) { + if (!part.trim()) continue; + + // Extract data from SSE event lines. + let data = ''; + for (const line of part.split('\n')) { + if (line.startsWith('data: ')) { + data += line.slice(6); + } + } + if (!data) continue; + + let event; + try { + event = JSON.parse(data); + } catch { + console.error('failed to parse SSE data:', data); + continue; + } + + // Handle errors sent mid-stream. + if (event.error) { + this.#renderError(event.error); + continue; + } + + const message = event.message; + // Handle user message from server (audio transcription). + if (message.role === 'user' && event.transcription) { + const displayParts = []; + if (text) displayParts.push(text); + if (event.transcription) displayParts.push(event.transcription); + const displayContent = displayParts.join('\n\n'); + + let historyContent; + if (typeof userMessage.content === 'string') { + historyContent = displayContent; + } else { + historyContent = [...userMessage.content]; + if (event.transcription) { + historyContent.push({ + type: 'text', + text: event.transcription, + }); + } + } + + const audioUserMessage = { + id: userMessage.id, + role: 'user', + content: historyContent, + meta: { language: event.detected_language }, + }; + + this.#appendHistory([audioUserMessage]); + this.#renderMessages([audioUserMessage]); + continue; + } + + // Stash assistant messages with tool calls; don't render yet. + if ( + message.role === 'assistant' && + message.tool_calls?.length > 0 && + !message.content + ) { + pendingTools = { + assistant: message, + results: [], + expected: message.tool_calls.length, + }; + // Add to history (server needs it) but don't render. + this.#appendHistory([message]); + continue; + } + + // Collect tool results and render once all have arrived. + if (message.role === 'tool' && pendingTools.assistant) { + pendingTools.results.push(message); + // Add to history (server needs it) but don't render yet. + this.#appendHistory([message]); + + // Once all tool results are in, render the combined message. + if (pendingTools.results.length >= pendingTools.expected) { + this.#renderMessages([ + pendingTools.assistant, + ...pendingTools.results, + ]); + pendingTools = null; + } + continue; + } + + // Regular assistant message (final reply with content). + const meta = {}; + if (event.voice) meta.voice = event.voice; + + this.#appendHistory([message], meta); + this.#renderMessages([message], meta); + + // For the final assistant message with audio, play it. + if (event.audio) { + this.#setLoadingState(false); + await this.#playAudio(event.audio); + } + } + } + + this.#setLoadingState(false); + } catch (e) { + if (e.name === 'AbortError') { + console.debug('request aborted'); + return; + } + console.error('failed to send request:', e); + this.#renderError(`Error: ${e.message}`); + this.#setLoadingState(false); + } finally { + this.currentController = null; + } + } + + /** + * #buildUserMessage builds a user message object for the API. + * Text files are included as text, images as base64 data URLs. + * Returns either a simple string content or multipart array format + * depending on whether images are present. + * @param {string} text + * @param {File[]} attachments + * @returns {Promise<{id: string, role: 'user', content: string | Array}>} + */ + async #buildUserMessage(text, attachments) { + const textParts = []; + const imageParts = []; + + // Process attachments. + // Images become data URLs, text files become inline text. + for (const file of attachments) { + if (file.type.startsWith('image/')) { + const base64 = await this.#toBase64(file); + const dataUrl = `data:${file.type};base64,${base64}`; + imageParts.push({ + type: 'image_url', + image_url: { url: dataUrl }, + }); + } else { + const content = await file.text(); + textParts.push(`=== File: ${file.name} ===\n\n${content}`); + } + } + + // Add user's message text after files. + if (text) { + textParts.push(text); + } + + const combinedText = textParts.join('\n\n'); + + // If no images, return simple string content. + if (imageParts.length === 0) { + return { + id: crypto.randomUUID(), + role: 'user', + content: combinedText || '', + }; + } + + // If images present, use multipart array format. + const parts = []; + if (combinedText) { + parts.push({ type: 'text', text: combinedText }); + } + parts.push(...imageParts); + + return { + id: crypto.randomUUID(), + role: 'user', + content: parts, + }; + } + + // ==================== + // STATE & HISTORY + // ==================== + /** + * #appendHistory adds multiple messages to history. + * Only the last message receives the metadata. + * @param {Object[]} messages + * @param {Object} [meta] + */ + #appendHistory(messages, meta = {}) { + for (let i = 0; i < messages.length; i++) { + const msg = { ...messages[i] }; + + // Generate ID if not present. + if (!msg.id) { + msg.id = crypto.randomUUID(); + } + + // Attach metadata only to the final message. + if (i === messages.length - 1) { + msg.meta = meta; + } + + this.history.push(msg); + } + + try { + localStorage.setItem(STORAGE_KEY, JSON.stringify(this.history)); + } catch (e) { + console.error('failed to save history:', e); + } + } + + /** + * #setLoadingState updates UI to reflect loading state. + * @param {boolean} loading + */ + #setLoadingState(loading) { + this.isProcessing = loading; + this.$send.classList.toggle('loading', loading); + this.$send.disabled = loading; + this.$ptt.disabled = loading; + } + + /** + * #setRecordingState updates UI to reflect recording state. + * @param {boolean} recording + */ + #setRecordingState(recording) { + this.isRecording = recording; + this.$ptt.classList.toggle('recording', recording); + this.$ptt.setAttribute('aria-pressed', String(recording)); + } + + /** + * #toggleMute toggles mute state and updates button label. + */ + #toggleMute() { + this.isMuted = !this.isMuted; + if (this.$mute) { + const iconName = this.isMuted ? 'volume-off' : 'volume'; + this.$mute.replaceChildren(this.#icon(iconName)); + this.$mute.classList.toggle('footer__toolbar-btn--muted', this.isMuted); + this.$mute.setAttribute('aria-label', this.isMuted ? 'Unmute' : 'Mute'); + } + // Apply mute state to any currently playing audio. + if (this.currentAudio) { + this.currentAudio.muted = !!this.isMuted; + } + } + + // ==================== + // RENDER: SELECTS + // ==================== + /** + * #populateModels populates the model selector with available options. + * @param {Array<{id: string}>} models + * @param {string} defaultModel + */ + #populateModels(models, defaultModel) { + this.$model.innerHTML = ''; + + for (const m of models) { + const $opt = this.document.createElement('option'); + $opt.value = m.id; + $opt.textContent = m.id; + this.$model.appendChild($opt); + } + + this.#loadModel(defaultModel); + } + + /** + * #populateModelsFallback shows an error state when models fail to load. + */ + #populateModelsFallback() { + this.$model.innerHTML = ''; + + const $opt = this.document.createElement('option'); + $opt.value = ''; + $opt.disabled = true; + $opt.selected = true; + $opt.textContent = 'Failed to load'; + this.$model.appendChild($opt); + } + + /** + * #loadModel restores the model selection from localStorage or uses the + * default. + * @param {string} defaultModel + */ + #loadModel(defaultModel) { + const stored = localStorage.getItem(MODEL_KEY); + const escaped = stored ? CSS.escape(stored) : null; + + // Try stored value first, then default, then first available option. + let selectedValue; + if (escaped && this.$model.querySelector(`option[value="${escaped}"]`)) { + selectedValue = stored; + } else if (defaultModel) { + selectedValue = defaultModel; + } else if (this.$model.options.length > 0) { + selectedValue = this.$model.options[0].value; + } + + if (selectedValue) { + this.$model.value = selectedValue; + } + } + + /** + * #populateVoiceSelect populates voice selector grouped by language. + * Voice IDs follow the pattern: {lang}{gender}_{name} (e.g., "af_bella"). + * @param {string[]} voices + */ + #populateVoiceSelect(voices) { + const langLabels = { + af: '🇺🇸 American English (F)', + am: '🇺🇸 American English (M)', + bf: '🇬🇧 British English (F)', + bm: '🇬🇧 British English (M)', + jf: '🇯🇵 Japanese (F)', + jm: '🇯🇵 Japanese (M)', + zf: '🇨🇳 Mandarin Chinese (F)', + zm: '🇨🇳 Mandarin Chinese (M)', + ef: '🇪🇸 Spanish (F)', + em: '🇪🇸 Spanish (M)', + ff: '🇫🇷 French (F)', + fm: '🇫🇷 French (M)', + hf: '🇮🇳 Hindi (F)', + hm: '🇮🇳 Hindi (M)', + if: '🇮🇹 Italian (F)', + im: '🇮🇹 Italian (M)', + pf: '🇧🇷 Brazilian Portuguese (F)', + pm: '🇧🇷 Brazilian Portuguese (M)', + kf: '🇰🇷 Korean (F)', + km: '🇰🇷 Korean (M)', + }; + + // Group voices by their two-character prefix (language + gender) + const groups = {}; + for (const voice of voices) { + const prefix = voice.substring(0, 2); + if (!groups[prefix]) groups[prefix] = []; + groups[prefix].push(voice); + } + + // Clear and rebuild selector. + this.$voice.innerHTML = ''; + + // Add "Auto" option for automatic language detection. + const $auto = this.document.createElement('option'); + $auto.value = ''; + $auto.textContent = '🌐 Auto'; + this.$voice.appendChild($auto); + + // Sort prefixes: by language code first, then by gender. + const sortedPrefixes = Object.keys(groups).sort((a, b) => { + if (a[0] !== b[0]) return a[0].localeCompare(b[0]); + return a[1].localeCompare(b[1]); + }); + + // Create optgroups for each language/gender combination. + for (const prefix of sortedPrefixes) { + const label = langLabels[prefix] || prefix.toUpperCase(); + + const $optgroup = this.document.createElement('optgroup'); + $optgroup.label = label; + + for (const voice of groups[prefix].sort()) { + // Extract voice name from ID (e.g., "af_bella" -> "Bella"). + const name = voice.substring(3).replace(/_/g, ' '); + const displayName = name.charAt(0).toUpperCase() + name.slice(1); + + const $opt = this.document.createElement('option'); + $opt.value = voice; + $opt.textContent = displayName; + $optgroup.appendChild($opt); + } + + this.$voice.appendChild($optgroup); + } + + this.#loadVoice(); + } + + /** + * #loadVoice restores the voice selection from localStorage. + */ + #loadVoice() { + const stored = localStorage.getItem(VOICE_KEY); + + if (stored === null) { + this.$voice.value = ''; + return; + } + + // Empty string is valid ("Auto"). + const escaped = stored === '' ? '' : CSS.escape(stored); + const exists = + stored === '' || this.$voice.querySelector(`option[value="${escaped}"]`); + + if (!exists) { + this.$voice.value = ''; + return; + } + + this.$voice.value = stored; + } + + // ==================== + // RENDER: MESSAGES + // ==================== + /** + * #renderMessages renders messages to the chat container. + * Messages already in the DOM (matched by data-id) are skipped. + * Tool results are matched to their corresponding tool calls. + * @param {Object[]} messages + * @param {Object} [meta] - Default metadata for messages without their own + */ + #renderMessages(messages, meta = {}) { + // Build tool result map for matching tool calls to their results. + const toolResultMap = new Map(); + for (const msg of messages) { + if (msg.role === 'tool' && msg.tool_call_id) { + toolResultMap.set(msg.tool_call_id, msg); + } + } + + for (const msg of messages) { + // Skip if already rendered. + if (msg.id && this.$chat.querySelector(`[data-id="${msg.id}"]`)) { + continue; + } + // Render user message. + if (msg.role === 'user') { + this.#renderUserMessage(msg, msg.meta ?? meta); + continue; + } + // Tool call -- already processed with the result map. + if (msg.role !== 'assistant') continue; + + // Skip assistant messages with no displayable content. + const hasContent = + msg.content || msg.reasoning_content || msg.tool_calls?.length; + if (!hasContent) continue; + + // Otherwise, render assistant message. + const toolResults = (msg.tool_calls ?? []) + .map((tc) => toolResultMap.get(tc.id)) + .filter(Boolean); + + this.#renderAssistantMessage(msg, msg.meta ?? meta, toolResults); + } + } + + /** + * #renderUserMessage renders a user message to the chat. + * @param {Object} message + * @param {Object} [meta] + */ + #renderUserMessage(message, meta = null) { + const content = (() => { + if (typeof message.content === 'string') { + return message.content; + } + if (Array.isArray(message.content)) { + return message.content + .filter((p) => p.type === 'text') + .map((p) => p.text) + .join('\n'); + } + return ''; + })(); + + const $msg = this.$tplUserMessage.content.cloneNode(true).firstElementChild; + if (message.id) $msg.dataset.id = message.id; + + $msg.querySelector('.message__content').textContent = content; + + // Populate debug panel. + const $dl = $msg.querySelector('.message__debug-list'); + if (meta?.language) this.#appendDebugRow($dl, 'Language', meta.language); + if (meta?.voice) this.#appendDebugRow($dl, 'Voice', meta.voice); + if ($dl.children.length === 0) this.#appendDebugRow($dl, 'No data', ''); + + // Bind action buttons. + const $inspect = $msg.querySelector('[data-action="inspect"]'); + $inspect.addEventListener('click', () => { + const isOpen = $msg.classList.toggle('message--debug-open'); + $inspect.setAttribute('aria-expanded', String(isOpen)); + }); + + const $copy = $msg.querySelector('[data-action="copy"]'); + $copy.addEventListener('click', () => + this.#copyToClipboard($copy, content), + ); + + this.$chat.appendChild($msg); + this.$chat.scrollTop = this.$chat.scrollHeight; + } + + /** + * #renderAssistantMessage renders an assistant message with optional + * reasoning, tool calls, and their results. + * @param {Object} message + * @param {string} [message.content] + * @param {string} [message.reasoning_content] + * @param {Array} [message.tool_calls] + * @param {Object} [meta] + * @param {Object[]} [toolResults] + */ + #renderAssistantMessage(message, meta = null, toolResults = []) { + const $msg = + this.$tplAssistantMessage.content.cloneNode(true).firstElementChild; + if (message.id) $msg.dataset.id = message.id; + + const $body = $msg.querySelector('.message__body'); + const $content = $msg.querySelector('.message__content'); + + // Reasoning block (collapsible, shows LLM's chain-of-thought) + if (message.reasoning_content) { + const $reasoning = this.#createCollapsibleBlock( + 'reasoning', + 'Reasoning', + message.reasoning_content, + ); + $body.insertBefore($reasoning, $content); + } + + // Tool call blocks (collapsible, shows function name, args, and result) + if (message.tool_calls?.length > 0) { + for (const toolCall of message.tool_calls) { + const result = toolResults.find((r) => r.tool_call_id === toolCall.id); + const $toolBlock = this.#createToolCallBlock(toolCall, result); + $body.insertBefore($toolBlock, $content); + } + } + + // Main content + if (message.content) { + $content.textContent = message.content; + } else { + $content.remove(); + } + + // Populate debug panel. + const $dl = $msg.querySelector('.message__debug-list'); + if (meta?.voice) this.#appendDebugRow($dl, 'Voice', meta.voice); + if ($dl.children.length === 0) this.#appendDebugRow($dl, 'No data', ''); + + // Bind action buttons. + const $inspect = $msg.querySelector('[data-action="inspect"]'); + $inspect.addEventListener('click', () => { + const isOpen = $msg.classList.toggle('message--debug-open'); + $inspect.setAttribute('aria-expanded', String(isOpen)); + }); + + // Assemble text from all available content. + const copyParts = []; + if (message.reasoning_content) copyParts.push(message.reasoning_content); + for (const tc of message.tool_calls ?? []) { + const name = tc.function?.name || 'unknown'; + const args = tc.function?.arguments || '{}'; + copyParts.push(`${name}(${args})`); + } + if (message.content) copyParts.push(message.content); + const copyText = copyParts.join('\n\n'); + + const $copy = $msg.querySelector('[data-action="copy"]'); + $copy.addEventListener('click', () => + this.#copyToClipboard($copy, copyText), + ); + + this.$chat.appendChild($msg); + this.$chat.scrollTop = this.$chat.scrollHeight; + } + + /** + * #renderError renders an error message in the chat. + * @param {string} message + */ + #renderError(message) { + const $msg = + this.$tplErrorMessage.content.cloneNode(true).firstElementChild; + $msg.querySelector('.message__content').textContent = message; + + this.$chat.appendChild($msg); + this.$chat.scrollTop = this.$chat.scrollHeight; + } + + // ==================== + // RENDER: ATTACHMENTS + // ==================== + /** + * #renderAttachments renders the attachment chips in the footer. + */ + #renderAttachments() { + this.$attachments.innerHTML = ''; + + for (let i = 0; i < this.attachments.length; i++) { + const file = this.attachments[i]; + + const $chip = + this.$tplAttachmentChip.content.cloneNode(true).firstElementChild; + + const $name = $chip.querySelector('.compose__attachment-name'); + $name.textContent = file.name; + $name.title = file.name; + + const $remove = $chip.querySelector('.compose__attachment-remove'); + $remove.setAttribute('aria-label', `Remove ${file.name}`); + $remove.addEventListener('click', () => this.#removeAttachment(i)); + + this.$attachments.appendChild($chip); + } + } + + /** + * #removeAttachment removes the attachment at the given index. + * @param {number} index + */ + #removeAttachment(index) { + this.attachments.splice(index, 1); + this.#renderAttachments(); + } + + /** + * #clearAttachments removes all attachments. + */ + #clearAttachments() { + this.attachments = []; + this.#renderAttachments(); + } + + // ==================== + // RENDER: COMPONENTS + // ==================== + /** + * #createCollapsibleBlock creates a collapsible details element. + * @param {string} type - CSS modifier for styling + * @param {string} label - Summary text + * @param {string} content - Content to display when expanded + * @returns {HTMLDetailsElement} + */ + #createCollapsibleBlock(type, label, content) { + const $details = + this.$tplCollapsible.content.cloneNode(true).firstElementChild; + $details.classList.add(`collapsible--${type}`); + + const $summary = $details.querySelector('.collapsible__summary'); + $summary.insertBefore(this.#icon(type), $summary.firstChild); + + $details.querySelector('.collapsible__label').textContent = label; + $details.querySelector('pre').textContent = content; + + return $details; + } + + /** + * #createToolCallBlock creates a collapsible block for a tool call. + * Shows the tool name, arguments, and optionally the result. + * @param {Object} toolCall + * @param {Object} [result] + * @returns {HTMLDetailsElement} + */ + #createToolCallBlock(toolCall, result = null) { + const $details = + this.$tplToolCall.content.cloneNode(true).firstElementChild; + + $details.querySelector('.collapsible__label').textContent = + toolCall.function?.name || 'Tool Call'; + + // Arguments section + const $args = $details.querySelector('[data-args]'); + try { + const args = JSON.parse(toolCall.function?.arguments || '{}'); + $args.textContent = JSON.stringify(args, null, 2); + } catch { + $args.textContent = toolCall.function?.arguments || ''; + } + + // Output section (only if result is available) + if (result) { + const $outputSection = $details.querySelector( + '.collapsible__section--output', + ); + $outputSection.hidden = false; + + const $output = $details.querySelector('[data-output]'); + try { + const output = JSON.parse(result.content || '{}'); + $output.textContent = JSON.stringify(output, null, 2); + } catch { + $output.textContent = result.content || ''; + } + } + + return $details; + } + + /** + * #appendDebugRow appends a label/value pair to a debug list. + * @param {HTMLDListElement} $dl + * @param {string} label + * @param {string} value + */ + #appendDebugRow($dl, label, value) { + const $row = this.$tplDebugRow.content.cloneNode(true); + $row.querySelector('dt').textContent = label; + $row.querySelector('dd').textContent = value; + $dl.appendChild($row); + } + + // ==================== + // UTILITIES + // ==================== + /** + * #icon creates an SVG icon element referencing the icon sprite. + * @param {string} name + * @returns {SVGSVGElement} + */ + #icon(name) { + const svg = this.document.createElementNS( + 'http://www.w3.org/2000/svg', + 'svg', + ); + svg.setAttribute('class', 'icon'); + + const use = this.document.createElementNS( + 'http://www.w3.org/2000/svg', + 'use', + ); + use.setAttribute('href', `${ICONS_URL}#${name}`); + + svg.appendChild(use); + return svg; + } + + /** + * #toBase64 converts a Blob to a base64-encoded string. + * @param {Blob} blob + * @returns {Promise} + */ + async #toBase64(blob) { + const buffer = await blob.arrayBuffer(); + return new Uint8Array(buffer).toBase64(); + } + + /** + * #copyToClipboard copies text and shows a brief success indicator. + * @param {HTMLButtonElement} $button - The button to update with feedback + * @param {string} text - The text to copy + */ + async #copyToClipboard($button, text) { + try { + await navigator.clipboard.writeText(text); + $button.replaceChildren(this.#icon('check')); + $button.classList.add('message__action-btn--success'); + + setTimeout(() => { + $button.replaceChildren(this.#icon('copy')); + $button.classList.remove('message__action-btn--success'); + }, 1500); + } catch (e) { + console.error('failed to copy:', e); + } + } +} + +// Initialize +new Odidere({ document }); diff --git a/internal/service/templates/static/body.gohtml b/internal/service/templates/static/body.gohtml new file mode 100644 index 0000000..53223ed --- /dev/null +++ b/internal/service/templates/static/body.gohtml @@ -0,0 +1,6 @@ +{{ define "body" }} + + {{ template "main" . }} + {{ template "templates" . }} + +{{ end }} diff --git a/internal/service/templates/static/footer/compose.gohtml b/internal/service/templates/static/footer/compose.gohtml new file mode 100644 index 0000000..7243093 --- /dev/null +++ b/internal/service/templates/static/footer/compose.gohtml @@ -0,0 +1,41 @@ +{{ define "footer/compose" }} +
+ +
+
+ + +
+ + +
+
+
+{{ end }} diff --git a/internal/service/templates/static/footer/footer.gohtml b/internal/service/templates/static/footer/footer.gohtml new file mode 100644 index 0000000..6e21bee --- /dev/null +++ b/internal/service/templates/static/footer/footer.gohtml @@ -0,0 +1,6 @@ +{{ define "footer" }} +
+ {{ template "footer/compose" . }} + {{ template "footer/toolbar" . }} +
+{{ end }} diff --git a/internal/service/templates/static/footer/toolbar.gohtml b/internal/service/templates/static/footer/toolbar.gohtml new file mode 100644 index 0000000..bf52962 --- /dev/null +++ b/internal/service/templates/static/footer/toolbar.gohtml @@ -0,0 +1,31 @@ +{{ define "footer/toolbar" }} + +{{ end }} diff --git a/internal/service/templates/static/head.gohtml b/internal/service/templates/static/head.gohtml new file mode 100644 index 0000000..a5db0b5 --- /dev/null +++ b/internal/service/templates/static/head.gohtml @@ -0,0 +1,11 @@ +{{ define "head" }} + + + + + Odidere + + + +{{ end }} diff --git a/internal/service/templates/static/index.gohtml b/internal/service/templates/static/index.gohtml new file mode 100644 index 0000000..41fe79e --- /dev/null +++ b/internal/service/templates/static/index.gohtml @@ -0,0 +1,5 @@ + + + {{ template "head" . }} + {{ template "body" . }} + diff --git a/internal/service/templates/static/main.gohtml b/internal/service/templates/static/main.gohtml new file mode 100644 index 0000000..8f740e0 --- /dev/null +++ b/internal/service/templates/static/main.gohtml @@ -0,0 +1,7 @@ +{{ define "main" }} +
+
+
+ {{ template "footer" . }} +
+{{ end }} diff --git a/internal/service/templates/static/templates.gohtml b/internal/service/templates/static/templates.gohtml new file mode 100644 index 0000000..3e98a76 --- /dev/null +++ b/internal/service/templates/static/templates.gohtml @@ -0,0 +1,126 @@ +{{ define "templates" }} + + + + + + + + + + + + + +{{ end }} diff --git a/internal/service/templates/templates.go b/internal/service/templates/templates.go new file mode 100644 index 0000000..02f372c --- /dev/null +++ b/internal/service/templates/templates.go @@ -0,0 +1,43 @@ +package templates + +import ( + "embed" + "fmt" + "html/template" + "io/fs" + "strings" +) + +//go:embed static/* +var static embed.FS + +const fileType = ".gohtml" + +// Parse walks the embedded static directory and parses all .gohtml templates. +func Parse() (*template.Template, error) { + tmpl := template.New("") + + parseFS := func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if strings.Contains(path, fileType) { + if _, err := tmpl.ParseFS(static, path); err != nil { + return fmt.Errorf( + "failed to parse template %s: %w", + path, err, + ) + } + } + return nil + } + + if err := fs.WalkDir(static, ".", parseFS); err != nil { + return nil, fmt.Errorf("failed to parse templates: %w", err) + } + + return tmpl, nil +}