e3fb19fa1b
CI / test (push) Successful in 17s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (push) Has been skipped
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (push) Has been skipped
CI / review (gpt-5, security, ., rodin/security-patterns, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (push) Has been skipped
317 lines
8.8 KiB
Go
317 lines
8.8 KiB
Go
// Package llm provides clients for LLM chat completion APIs.
|
|
//
|
|
// Supports OpenAI-compatible (default), Anthropic Messages API, and SAP AI Core providers.
|
|
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Provider identifies which API format to use.
|
|
type Provider string
|
|
|
|
const (
|
|
// ProviderOpenAI uses the OpenAI-compatible chat/completions endpoint.
|
|
ProviderOpenAI Provider = "openai"
|
|
// ProviderAnthropic uses the Anthropic Messages API endpoint.
|
|
ProviderAnthropic Provider = "anthropic"
|
|
// ProviderAICore uses SAP AI Core with OAuth authentication.
|
|
ProviderAICore Provider = "aicore"
|
|
)
|
|
|
|
// Client calls an LLM chat completion API.
|
|
// A Client is safe for concurrent use by multiple goroutines after construction.
|
|
// WithTimeout, WithTemperature, and WithProvider must be called during setup,
|
|
// before concurrent use.
|
|
type Client struct {
|
|
baseURL string
|
|
apiKey string
|
|
model string
|
|
temperature float64
|
|
provider Provider
|
|
http *http.Client
|
|
aicore *AICoreClient // Only set when provider is aicore
|
|
}
|
|
|
|
// NewClient creates a new LLM client. Default provider is OpenAI-compatible.
|
|
func NewClient(baseURL, apiKey, model string) *Client {
|
|
return &Client{
|
|
baseURL: strings.TrimRight(baseURL, "/"),
|
|
apiKey: apiKey,
|
|
model: model,
|
|
provider: ProviderOpenAI,
|
|
http: &http.Client{Timeout: 5 * time.Minute},
|
|
}
|
|
}
|
|
|
|
// WithTimeout sets the HTTP request timeout for LLM calls (default 5 minutes).
|
|
// When using AI Core, this also sets the timeout on the AI Core client.
|
|
func (c *Client) WithTimeout(d time.Duration) *Client {
|
|
c.http.Timeout = d
|
|
if c.aicore != nil {
|
|
c.aicore.WithTimeout(d)
|
|
}
|
|
return c
|
|
}
|
|
|
|
// WithTemperature sets the temperature for LLM requests (0 = omit, uses server default).
|
|
func (c *Client) WithTemperature(t float64) *Client {
|
|
c.temperature = t
|
|
return c
|
|
}
|
|
|
|
// WithProvider sets the API provider format (openai, anthropic, or aicore).
|
|
func (c *Client) WithProvider(p Provider) *Client {
|
|
c.provider = p
|
|
return c
|
|
}
|
|
|
|
// WithAICore configures the client to use SAP AI Core for authentication.
|
|
// This sets the provider to aicore automatically.
|
|
// The AI Core client inherits the current HTTP timeout from this client.
|
|
func (c *Client) WithAICore(cfg AICoreConfig) *Client {
|
|
c.provider = ProviderAICore
|
|
c.aicore = NewAICoreClient(cfg).WithTimeout(c.http.Timeout)
|
|
return c
|
|
}
|
|
|
|
// Message represents a chat message.
|
|
type Message struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
// Complete sends a chat completion request and returns the assistant's response content.
|
|
// The first message with role "system" is treated as the system prompt.
|
|
func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) {
|
|
var result string
|
|
var err error
|
|
|
|
for attempt := 0; attempt < 2; attempt++ {
|
|
switch c.provider {
|
|
case ProviderAnthropic:
|
|
result, err = c.completeAnthropic(ctx, messages)
|
|
case ProviderAICore:
|
|
result, err = c.completeAICore(ctx, messages)
|
|
default:
|
|
result, err = c.completeOpenAI(ctx, messages)
|
|
}
|
|
|
|
if err == nil {
|
|
return result, nil
|
|
}
|
|
|
|
// Only retry on response body read errors (transient network issues).
|
|
// Do not retry on context cancellation, status errors, or parse errors
|
|
// that indicate a structural API problem.
|
|
if !isRetryableError(err) {
|
|
return "", err
|
|
}
|
|
|
|
if attempt == 0 && ctx.Err() == nil {
|
|
// Brief pause before retry to allow transient issues to resolve.
|
|
time.Sleep(500 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
return "", err
|
|
}
|
|
|
|
// completeAICore routes to AI Core using the appropriate endpoint based on model type.
|
|
func (c *Client) completeAICore(ctx context.Context, messages []Message) (string, error) {
|
|
if c.aicore == nil {
|
|
return "", fmt.Errorf("AI Core client not configured")
|
|
}
|
|
|
|
if IsAnthropicModel(c.model) {
|
|
return c.aicore.CompleteAnthropic(ctx, c.model, messages, 8192, c.temperature)
|
|
}
|
|
return c.aicore.CompleteOpenAI(ctx, c.model, messages, c.temperature)
|
|
}
|
|
|
|
// isRetryableError returns true for transient errors worth retrying.
|
|
func isRetryableError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
s := err.Error()
|
|
// Body read failures (connection reset, truncation)
|
|
if strings.Contains(s, "read response") {
|
|
return true
|
|
}
|
|
// Unexpected body length (our content-length validation)
|
|
if strings.Contains(s, "body length mismatch") {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// --- OpenAI-compatible implementation ---
|
|
|
|
// ChatRequest is the OpenAI request payload.
|
|
type ChatRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []Message `json:"messages"`
|
|
Temperature float64 `json:"temperature,omitempty"`
|
|
}
|
|
|
|
// ChatResponse is the OpenAI response.
|
|
type ChatResponse struct {
|
|
Choices []struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
} `json:"choices"`
|
|
}
|
|
|
|
func (c *Client) completeOpenAI(ctx context.Context, messages []Message) (string, error) {
|
|
reqBody := ChatRequest{
|
|
Model: c.model,
|
|
Temperature: c.temperature,
|
|
Messages: messages,
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
url := c.baseURL + "/chat/completions"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
return c.doRequest(req, func(body []byte) (string, error) {
|
|
var resp ChatResponse
|
|
if err := json.Unmarshal(body, &resp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
if len(resp.Choices) == 0 {
|
|
return "", fmt.Errorf("no choices in LLM response")
|
|
}
|
|
return resp.Choices[0].Message.Content, nil
|
|
})
|
|
}
|
|
|
|
// --- Anthropic Messages API implementation ---
|
|
|
|
type anthropicRequest struct {
|
|
AnthropicVersion string `json:"anthropic_version,omitempty"`
|
|
Model string `json:"model,omitempty"`
|
|
MaxTokens int `json:"max_tokens"`
|
|
System string `json:"system,omitempty"`
|
|
Messages []anthropicMsg `json:"messages"`
|
|
Temperature float64 `json:"temperature,omitempty"`
|
|
}
|
|
|
|
type anthropicMsg struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
type anthropicResponse struct {
|
|
Content []struct {
|
|
Type string `json:"type"`
|
|
Text string `json:"text"`
|
|
} `json:"content"`
|
|
}
|
|
|
|
func (c *Client) completeAnthropic(ctx context.Context, messages []Message) (string, error) {
|
|
// Extract system message (first message with role "system")
|
|
var system string
|
|
var userMessages []anthropicMsg
|
|
for _, m := range messages {
|
|
if m.Role == "system" {
|
|
system = m.Content
|
|
} else {
|
|
userMessages = append(userMessages, anthropicMsg{
|
|
Role: m.Role,
|
|
Content: m.Content,
|
|
})
|
|
}
|
|
}
|
|
|
|
reqBody := anthropicRequest{
|
|
Model: c.model,
|
|
MaxTokens: 8192,
|
|
System: system,
|
|
Messages: userMessages,
|
|
}
|
|
if c.temperature > 0 {
|
|
reqBody.Temperature = c.temperature
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
url := c.baseURL + "/messages"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("x-api-key", c.apiKey)
|
|
req.Header.Set("anthropic-version", "2023-06-01")
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
return c.doRequest(req, func(body []byte) (string, error) {
|
|
var resp anthropicResponse
|
|
if err := json.Unmarshal(body, &resp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
if len(resp.Content) == 0 {
|
|
return "", fmt.Errorf("no content in Anthropic response")
|
|
}
|
|
// Concatenate all text blocks
|
|
var sb strings.Builder
|
|
for _, block := range resp.Content {
|
|
if block.Type == "text" {
|
|
sb.WriteString(block.Text)
|
|
}
|
|
}
|
|
result := sb.String()
|
|
if result == "" {
|
|
return "", fmt.Errorf("no text content in Anthropic response")
|
|
}
|
|
return result, nil
|
|
})
|
|
}
|
|
|
|
// --- Shared HTTP execution ---
|
|
|
|
func (c *Client) doRequest(req *http.Request, parse func([]byte) (string, error)) (string, error) {
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("LLM request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
// Validate body length against Content-Length header when present.
|
|
// A mismatch indicates the response was truncated in transit.
|
|
if cl := resp.ContentLength; cl > 0 && int64(len(body)) < cl {
|
|
return "", fmt.Errorf("body length mismatch: Content-Length=%d, received=%d", cl, len(body))
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("LLM API error (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
return parse(body)
|
|
}
|