package llm import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "net/url" "strings" "sync" "time" ) // AICoreOpenAIAPIVersion is the API version used for OpenAI models through AI Core. // Update this when SAP AI Core releases a new stable version. const AICoreOpenAIAPIVersion = "2024-12-01-preview" // AICoreConfig holds SAP AI Core authentication and connection settings. type AICoreConfig struct { ClientID string ClientSecret string AuthURL string APIURL string ResourceGroup string } // AICoreClient wraps AI Core authentication and deployment discovery. // Thread-safe for concurrent use after construction. type AICoreClient struct { config AICoreConfig http *http.Client mu sync.RWMutex token string tokenExpiry time.Time deployments map[string]deployment // model name -> deployment info } type deployment struct { ID string URL string } // NewAICoreClient creates a new AI Core client with the given configuration. // The client uses a default 5-minute timeout; use WithTimeout to customize. func NewAICoreClient(cfg AICoreConfig) *AICoreClient { return &AICoreClient{ config: cfg, http: &http.Client{Timeout: 5 * time.Minute}, deployments: make(map[string]deployment), } } // WithTimeout sets the HTTP request timeout for AI Core calls. // This should be called during construction, before concurrent use. func (c *AICoreClient) WithTimeout(d time.Duration) *AICoreClient { c.http.Timeout = d return c } // getToken returns a valid OAuth token, refreshing if necessary. func (c *AICoreClient) getToken(ctx context.Context) (string, error) { c.mu.RLock() if c.token != "" && time.Now().Add(5*time.Minute).Before(c.tokenExpiry) { token := c.token c.mu.RUnlock() return token, nil } c.mu.RUnlock() c.mu.Lock() defer c.mu.Unlock() // Double-check after acquiring write lock if c.token != "" && time.Now().Add(5*time.Minute).Before(c.tokenExpiry) { return c.token, nil } token, expiry, err := c.fetchToken(ctx) if err != nil { return "", err } c.token = token c.tokenExpiry = expiry return token, nil } func (c *AICoreClient) fetchToken(ctx context.Context) (string, time.Time, error) { tokenURL := strings.TrimRight(c.config.AuthURL, "/") + "/oauth/token" data := url.Values{} data.Set("grant_type", "client_credentials") data.Set("client_id", c.config.ClientID) data.Set("client_secret", c.config.ClientSecret) req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(data.Encode())) if err != nil { return "", time.Time{}, fmt.Errorf("create token request: %w", err) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") resp, err := c.http.Do(req) if err != nil { return "", time.Time{}, fmt.Errorf("token request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", time.Time{}, fmt.Errorf("read token response: %w", err) } if resp.StatusCode < 200 || resp.StatusCode >= 300 { return "", time.Time{}, fmt.Errorf("token request failed (status %d): %s", resp.StatusCode, string(body)) } var tokenResp struct { AccessToken string `json:"access_token"` ExpiresIn int `json:"expires_in"` } if err := json.Unmarshal(body, &tokenResp); err != nil { return "", time.Time{}, fmt.Errorf("parse token response: %w", err) } if tokenResp.AccessToken == "" { return "", time.Time{}, fmt.Errorf("empty access token in response") } expiry := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second) return tokenResp.AccessToken, expiry, nil } // getDeploymentURL returns the deployment URL for a model, fetching deployments if needed. func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (string, error) { c.mu.RLock() if d, ok := c.deployments[model]; ok { c.mu.RUnlock() return d.URL, nil } c.mu.RUnlock() // Fetch token first (before acquiring write lock to avoid deadlock) token, err := c.getToken(ctx) if err != nil { return "", fmt.Errorf("get token for deployments: %w", err) } c.mu.Lock() defer c.mu.Unlock() // Double-check after acquiring write lock if d, ok := c.deployments[model]; ok { return d.URL, nil } if err := c.fetchDeployments(ctx, token); err != nil { return "", err } if d, ok := c.deployments[model]; ok { return d.URL, nil } return "", fmt.Errorf("no deployment found for model %q", model) } func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error { deployURL := strings.TrimRight(c.config.APIURL, "/") + "/v2/lm/deployments" req, err := http.NewRequestWithContext(ctx, http.MethodGet, deployURL, nil) if err != nil { return fmt.Errorf("create deployments request: %w", err) } req.Header.Set("Authorization", "Bearer "+token) req.Header.Set("AI-Resource-Group", c.config.ResourceGroup) resp, err := c.http.Do(req) if err != nil { return fmt.Errorf("deployments request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return fmt.Errorf("read deployments response: %w", err) } if resp.StatusCode < 200 || resp.StatusCode >= 300 { return fmt.Errorf("deployments request failed (status %d): %s", resp.StatusCode, string(body)) } var deployResp struct { Resources []struct { ID string `json:"id"` DeploymentURL string `json:"deploymentUrl"` Status string `json:"status"` Details struct { Resources struct { BackendDetails struct { Model struct { Name string `json:"name"` } `json:"model"` } `json:"backend_details"` } `json:"resources"` } `json:"details"` } `json:"resources"` } if err := json.Unmarshal(body, &deployResp); err != nil { return fmt.Errorf("parse deployments response: %w", err) } for _, r := range deployResp.Resources { if r.Status != "RUNNING" { continue } modelName := r.Details.Resources.BackendDetails.Model.Name if modelName == "" { continue } c.deployments[modelName] = deployment{ ID: r.ID, URL: r.DeploymentURL, } } return nil } // CompleteAnthropic sends a request to an Anthropic model via AI Core. func (c *AICoreClient) CompleteAnthropic(ctx context.Context, model string, messages []Message, maxTokens int, temperature float64) (string, error) { deployURL, err := c.getDeploymentURL(ctx, model) if err != nil { return "", err } token, err := c.getToken(ctx) if err != nil { return "", err } // Extract system message var system string var userMessages []anthropicMsg for _, m := range messages { if m.Role == "system" { system = m.Content } else { userMessages = append(userMessages, anthropicMsg{ Role: m.Role, Content: m.Content, }) } } reqBody := anthropicRequest{ AnthropicVersion: "2023-06-01", // Model omitted - AI Core deployment already specifies model MaxTokens: maxTokens, System: system, Messages: userMessages, } if temperature > 0 { reqBody.Temperature = temperature } data, err := json.Marshal(reqBody) if err != nil { return "", fmt.Errorf("marshal request: %w", err) } // AI Core uses /invoke for Anthropic models invokeURL := strings.TrimRight(deployURL, "/") + "/invoke" req, err := http.NewRequestWithContext(ctx, http.MethodPost, invokeURL, bytes.NewReader(data)) if err != nil { return "", fmt.Errorf("create request: %w", err) } req.Header.Set("Authorization", "Bearer "+token) req.Header.Set("AI-Resource-Group", c.config.ResourceGroup) req.Header.Set("Content-Type", "application/json") resp, err := c.http.Do(req) if err != nil { return "", fmt.Errorf("AI Core request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("read response: %w", err) } if resp.StatusCode < 200 || resp.StatusCode >= 300 { return "", fmt.Errorf("AI Core API error (status %d): %s", resp.StatusCode, string(body)) } var anthropicResp anthropicResponse if err := json.Unmarshal(body, &anthropicResp); err != nil { return "", fmt.Errorf("parse response: %w", err) } if len(anthropicResp.Content) == 0 { return "", fmt.Errorf("no content in response") } var sb strings.Builder for _, block := range anthropicResp.Content { if block.Type == "text" { sb.WriteString(block.Text) } } result := sb.String() if result == "" { return "", fmt.Errorf("no text content in response") } return result, nil } // CompleteOpenAI sends a request to an OpenAI model via AI Core. func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, messages []Message, temperature float64) (string, error) { deployURL, err := c.getDeploymentURL(ctx, model) if err != nil { return "", err } token, err := c.getToken(ctx) if err != nil { return "", err } reqBody := ChatRequest{ Model: model, Temperature: temperature, Messages: messages, } data, err := json.Marshal(reqBody) if err != nil { return "", fmt.Errorf("marshal request: %w", err) } // AI Core uses /chat/completions?api-version= for OpenAI models chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=" + AICoreOpenAIAPIVersion req, err := http.NewRequestWithContext(ctx, http.MethodPost, chatURL, bytes.NewReader(data)) if err != nil { return "", fmt.Errorf("create request: %w", err) } req.Header.Set("Authorization", "Bearer "+token) req.Header.Set("AI-Resource-Group", c.config.ResourceGroup) req.Header.Set("Content-Type", "application/json") resp, err := c.http.Do(req) if err != nil { return "", fmt.Errorf("AI Core request: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("read response: %w", err) } if resp.StatusCode < 200 || resp.StatusCode >= 300 { return "", fmt.Errorf("AI Core API error (status %d): %s", resp.StatusCode, string(body)) } var openaiResp ChatResponse if err := json.Unmarshal(body, &openaiResp); err != nil { return "", fmt.Errorf("parse response: %w", err) } if len(openaiResp.Choices) == 0 { return "", fmt.Errorf("no choices in response") } return openaiResp.Choices[0].Message.Content, nil } // IsAnthropicModel returns true if the model name indicates an Anthropic model. // SAP AI Core uses "anthropic--" prefix for Anthropic models (e.g., "anthropic--claude-3-5-sonnet"). func IsAnthropicModel(model string) bool { return strings.HasPrefix(model, "anthropic--") }