d13e062866
CI / test (pull_request) Failing after 10s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Has been skipped
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Has been skipped
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Has been skipped
AI Core deployment URL already specifies the model. Sending model in the body causes 'Extra inputs are not permitted' error.
382 lines
10 KiB
Go
382 lines
10 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// AICoreOpenAIAPIVersion is the API version used for OpenAI models through AI Core.
|
|
// Update this when SAP AI Core releases a new stable version.
|
|
const AICoreOpenAIAPIVersion = "2024-12-01-preview"
|
|
|
|
// AICoreConfig holds SAP AI Core authentication and connection settings.
|
|
type AICoreConfig struct {
|
|
ClientID string
|
|
ClientSecret string
|
|
AuthURL string
|
|
APIURL string
|
|
ResourceGroup string
|
|
}
|
|
|
|
// AICoreClient wraps AI Core authentication and deployment discovery.
|
|
// Thread-safe for concurrent use after construction.
|
|
type AICoreClient struct {
|
|
config AICoreConfig
|
|
http *http.Client
|
|
|
|
mu sync.RWMutex
|
|
token string
|
|
tokenExpiry time.Time
|
|
deployments map[string]deployment // model name -> deployment info
|
|
}
|
|
|
|
type deployment struct {
|
|
ID string
|
|
URL string
|
|
}
|
|
|
|
// NewAICoreClient creates a new AI Core client with the given configuration.
|
|
// The client uses a default 5-minute timeout; use WithTimeout to customize.
|
|
func NewAICoreClient(cfg AICoreConfig) *AICoreClient {
|
|
return &AICoreClient{
|
|
config: cfg,
|
|
http: &http.Client{Timeout: 5 * time.Minute},
|
|
deployments: make(map[string]deployment),
|
|
}
|
|
}
|
|
|
|
// WithTimeout sets the HTTP request timeout for AI Core calls.
|
|
// This should be called during construction, before concurrent use.
|
|
func (c *AICoreClient) WithTimeout(d time.Duration) *AICoreClient {
|
|
c.http.Timeout = d
|
|
return c
|
|
}
|
|
|
|
// getToken returns a valid OAuth token, refreshing if necessary.
|
|
func (c *AICoreClient) getToken(ctx context.Context) (string, error) {
|
|
c.mu.RLock()
|
|
if c.token != "" && time.Now().Add(5*time.Minute).Before(c.tokenExpiry) {
|
|
token := c.token
|
|
c.mu.RUnlock()
|
|
return token, nil
|
|
}
|
|
c.mu.RUnlock()
|
|
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
// Double-check after acquiring write lock
|
|
if c.token != "" && time.Now().Add(5*time.Minute).Before(c.tokenExpiry) {
|
|
return c.token, nil
|
|
}
|
|
|
|
token, expiry, err := c.fetchToken(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
c.token = token
|
|
c.tokenExpiry = expiry
|
|
return token, nil
|
|
}
|
|
|
|
func (c *AICoreClient) fetchToken(ctx context.Context) (string, time.Time, error) {
|
|
tokenURL := strings.TrimRight(c.config.AuthURL, "/") + "/oauth/token"
|
|
|
|
data := url.Values{}
|
|
data.Set("grant_type", "client_credentials")
|
|
data.Set("client_id", c.config.ClientID)
|
|
data.Set("client_secret", c.config.ClientSecret)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(data.Encode()))
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("create token request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("token request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("read token response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", time.Time{}, fmt.Errorf("token request failed (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var tokenResp struct {
|
|
AccessToken string `json:"access_token"`
|
|
ExpiresIn int `json:"expires_in"`
|
|
}
|
|
if err := json.Unmarshal(body, &tokenResp); err != nil {
|
|
return "", time.Time{}, fmt.Errorf("parse token response: %w", err)
|
|
}
|
|
|
|
if tokenResp.AccessToken == "" {
|
|
return "", time.Time{}, fmt.Errorf("empty access token in response")
|
|
}
|
|
|
|
expiry := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second)
|
|
return tokenResp.AccessToken, expiry, nil
|
|
}
|
|
|
|
// getDeploymentURL returns the deployment URL for a model, fetching deployments if needed.
|
|
func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (string, error) {
|
|
c.mu.RLock()
|
|
if d, ok := c.deployments[model]; ok {
|
|
c.mu.RUnlock()
|
|
return d.URL, nil
|
|
}
|
|
c.mu.RUnlock()
|
|
|
|
// Fetch token first (before acquiring write lock to avoid deadlock)
|
|
token, err := c.getToken(ctx)
|
|
if err != nil {
|
|
return "", fmt.Errorf("get token for deployments: %w", err)
|
|
}
|
|
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
// Double-check after acquiring write lock
|
|
if d, ok := c.deployments[model]; ok {
|
|
return d.URL, nil
|
|
}
|
|
|
|
if err := c.fetchDeployments(ctx, token); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if d, ok := c.deployments[model]; ok {
|
|
return d.URL, nil
|
|
}
|
|
return "", fmt.Errorf("no deployment found for model %q", model)
|
|
}
|
|
|
|
func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error {
|
|
deployURL := strings.TrimRight(c.config.APIURL, "/") + "/v2/lm/deployments"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, deployURL, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("create deployments request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("AI-Resource-Group", c.config.ResourceGroup)
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("deployments request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return fmt.Errorf("read deployments response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return fmt.Errorf("deployments request failed (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var deployResp struct {
|
|
Resources []struct {
|
|
ID string `json:"id"`
|
|
DeploymentURL string `json:"deploymentUrl"`
|
|
Status string `json:"status"`
|
|
Details struct {
|
|
Resources struct {
|
|
BackendDetails struct {
|
|
Model struct {
|
|
Name string `json:"name"`
|
|
} `json:"model"`
|
|
} `json:"backend_details"`
|
|
} `json:"resources"`
|
|
} `json:"details"`
|
|
} `json:"resources"`
|
|
}
|
|
if err := json.Unmarshal(body, &deployResp); err != nil {
|
|
return fmt.Errorf("parse deployments response: %w", err)
|
|
}
|
|
|
|
for _, r := range deployResp.Resources {
|
|
if r.Status != "RUNNING" {
|
|
continue
|
|
}
|
|
modelName := r.Details.Resources.BackendDetails.Model.Name
|
|
if modelName == "" {
|
|
continue
|
|
}
|
|
c.deployments[modelName] = deployment{
|
|
ID: r.ID,
|
|
URL: r.DeploymentURL,
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// CompleteAnthropic sends a request to an Anthropic model via AI Core.
|
|
func (c *AICoreClient) CompleteAnthropic(ctx context.Context, model string, messages []Message, maxTokens int, temperature float64) (string, error) {
|
|
deployURL, err := c.getDeploymentURL(ctx, model)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
token, err := c.getToken(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Extract system message
|
|
var system string
|
|
var userMessages []anthropicMsg
|
|
for _, m := range messages {
|
|
if m.Role == "system" {
|
|
system = m.Content
|
|
} else {
|
|
userMessages = append(userMessages, anthropicMsg{
|
|
Role: m.Role,
|
|
Content: m.Content,
|
|
})
|
|
}
|
|
}
|
|
|
|
reqBody := anthropicRequest{
|
|
AnthropicVersion: "2023-06-01",
|
|
// Model omitted - AI Core deployment already specifies model
|
|
MaxTokens: maxTokens,
|
|
System: system,
|
|
Messages: userMessages,
|
|
}
|
|
if temperature > 0 {
|
|
reqBody.Temperature = temperature
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
// AI Core uses /invoke for Anthropic models
|
|
invokeURL := strings.TrimRight(deployURL, "/") + "/invoke"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, invokeURL, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("AI-Resource-Group", c.config.ResourceGroup)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("AI Core request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("AI Core API error (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var anthropicResp anthropicResponse
|
|
if err := json.Unmarshal(body, &anthropicResp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
|
|
if len(anthropicResp.Content) == 0 {
|
|
return "", fmt.Errorf("no content in response")
|
|
}
|
|
|
|
var sb strings.Builder
|
|
for _, block := range anthropicResp.Content {
|
|
if block.Type == "text" {
|
|
sb.WriteString(block.Text)
|
|
}
|
|
}
|
|
result := sb.String()
|
|
if result == "" {
|
|
return "", fmt.Errorf("no text content in response")
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// CompleteOpenAI sends a request to an OpenAI model via AI Core.
|
|
func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, messages []Message, temperature float64) (string, error) {
|
|
deployURL, err := c.getDeploymentURL(ctx, model)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
token, err := c.getToken(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
reqBody := ChatRequest{
|
|
Model: model,
|
|
Temperature: temperature,
|
|
Messages: messages,
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
// AI Core uses /chat/completions?api-version=<version> for OpenAI models
|
|
chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=" + AICoreOpenAIAPIVersion
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, chatURL, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("AI-Resource-Group", c.config.ResourceGroup)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("AI Core request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("AI Core API error (status %d): %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var openaiResp ChatResponse
|
|
if err := json.Unmarshal(body, &openaiResp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
|
|
if len(openaiResp.Choices) == 0 {
|
|
return "", fmt.Errorf("no choices in response")
|
|
}
|
|
return openaiResp.Choices[0].Message.Content, nil
|
|
}
|
|
|
|
// IsAnthropicModel returns true if the model name indicates an Anthropic model.
|
|
// SAP AI Core uses "anthropic--" prefix for Anthropic models (e.g., "anthropic--claude-3-5-sonnet").
|
|
func IsAnthropicModel(model string) bool {
|
|
return strings.HasPrefix(model, "anthropic--")
|
|
}
|