69e70466fd
- Overall context timeout now derived from LLM timeout + 1 minute (no longer hardcoded 3min that could conflict with longer LLM timeouts) - Clarify concurrency docs: With* methods are setup-only, not concurrent - Add ctx.Err() checks in fetchFileContext and fetchPatterns loops (break early on cancellation instead of making unnecessary requests)
117 lines
2.9 KiB
Go
117 lines
2.9 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Client calls an OpenAI-compatible chat completion API.
|
|
// A Client is safe for concurrent use by multiple goroutines after construction.
|
|
// WithTimeout and WithTemperature must be called during setup, before concurrent use.
|
|
type Client struct {
|
|
baseURL string
|
|
apiKey string
|
|
model string
|
|
temperature float64
|
|
http *http.Client
|
|
}
|
|
|
|
// NewClient creates a new LLM client.
|
|
func NewClient(baseURL, apiKey, model string) *Client {
|
|
return &Client{
|
|
baseURL: strings.TrimRight(baseURL, "/"),
|
|
apiKey: apiKey,
|
|
model: model,
|
|
http: &http.Client{Timeout: 5 * time.Minute},
|
|
}
|
|
}
|
|
|
|
// WithTimeout sets the HTTP request timeout for LLM calls (default 5 minutes).
|
|
func (c *Client) WithTimeout(d time.Duration) *Client {
|
|
c.http.Timeout = d
|
|
return c
|
|
}
|
|
|
|
// WithTemperature sets the temperature for LLM requests (0 = omit, uses server default).
|
|
func (c *Client) WithTemperature(t float64) *Client {
|
|
c.temperature = t
|
|
return c
|
|
}
|
|
|
|
// Message represents a chat message.
|
|
type Message struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
// ChatRequest is the request payload.
|
|
type ChatRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []Message `json:"messages"`
|
|
Temperature float64 `json:"temperature,omitempty"`
|
|
}
|
|
|
|
// ChatResponse is the response from the API.
|
|
type ChatResponse struct {
|
|
Choices []struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
} `json:"choices"`
|
|
}
|
|
|
|
// Complete sends a chat completion request and returns the assistant's response content.
|
|
func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) {
|
|
reqBody := ChatRequest{
|
|
Model: c.model,
|
|
Temperature: c.temperature,
|
|
Messages: messages,
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
url := c.baseURL + "/chat/completions"
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("LLM request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
var chatResp ChatResponse
|
|
if err := json.Unmarshal(body, &chatResp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
|
|
if len(chatResp.Choices) == 0 {
|
|
return "", fmt.Errorf("no choices in LLM response")
|
|
}
|
|
|
|
return chatResp.Choices[0].Message.Content, nil
|
|
}
|