abcb982599
PR Ready Gate / clear-labels (pull_request) Has been skipped
CI / test (pull_request) Successful in 14s
CI / review (anthropic--claude-4.6-sonnet, sonnet, SONNET_REVIEW_TOKEN) (pull_request) Successful in 39s
CI / review (gpt-5, gpt, GPT_REVIEW_TOKEN) (pull_request) Successful in 1m22s
CI / review (gpt-5, security, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 2m30s
Add native SAP AI Core provider that handles OAuth token management and deployment discovery automatically. This eliminates the need for the external LLM proxy when running in SAP environments. Changes: - Add AICoreClient with OAuth token caching and deployment URL discovery - Support both Anthropic and OpenAI models via AI Core deployments - Update CI to use native AI Core provider - Update action inputs to accept AI Core credentials - Update README with AI Core configuration examples Model names must match AI Core deployment names (e.g. anthropic--claude-4.6-sonnet, gpt-5).
392 lines
12 KiB
Go
392 lines
12 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// AICoreOpenAIAPIVersion is the API version used for OpenAI models through AI Core.
|
|
// Update this when SAP AI Core releases a new stable version.
|
|
const AICoreOpenAIAPIVersion = "2024-12-01-preview"
|
|
|
|
// maxErrorBodyLen limits the length of response bodies included in error messages
|
|
// to prevent leaking potentially sensitive upstream details in logs.
|
|
const maxErrorBodyLen = 200
|
|
|
|
// AICoreConfig holds SAP AI Core authentication and connection settings.
|
|
type AICoreConfig struct {
|
|
ClientID string
|
|
ClientSecret string
|
|
AuthURL string
|
|
APIURL string
|
|
ResourceGroup string
|
|
}
|
|
|
|
// AICoreClient wraps AI Core authentication and deployment discovery.
|
|
// Thread-safe for concurrent use after construction.
|
|
//
|
|
// Design: The deployment cache is populated once and never invalidated. This is
|
|
// acceptable for short-lived CI runner processes, but longer-lived deployments
|
|
// may want to add a TTL or re-fetch on errors.
|
|
type AICoreClient struct {
|
|
config AICoreConfig
|
|
http *http.Client
|
|
|
|
mu sync.RWMutex
|
|
token string
|
|
tokenExpiry time.Time
|
|
deployments map[string]string // model name -> deployment URL
|
|
}
|
|
|
|
// NewAICoreClient creates a new AI Core client with the given configuration.
|
|
// The client uses a default 5-minute timeout; use WithTimeout to customize.
|
|
func NewAICoreClient(cfg AICoreConfig) *AICoreClient {
|
|
return &AICoreClient{
|
|
config: cfg,
|
|
http: &http.Client{Timeout: 5 * time.Minute},
|
|
deployments: make(map[string]string),
|
|
}
|
|
}
|
|
|
|
// WithTimeout sets the HTTP request timeout for AI Core calls.
|
|
// This should be called during construction, before concurrent use.
|
|
func (c *AICoreClient) WithTimeout(d time.Duration) *AICoreClient {
|
|
c.http.Timeout = d
|
|
return c
|
|
}
|
|
|
|
// truncateBody truncates a response body for inclusion in error messages.
|
|
// This prevents leaking potentially sensitive upstream response details in logs.
|
|
func truncateBody(body []byte) string {
|
|
if len(body) <= maxErrorBodyLen {
|
|
return string(body)
|
|
}
|
|
return string(body[:maxErrorBodyLen]) + "..."
|
|
}
|
|
|
|
// getToken returns a valid OAuth token, refreshing if necessary.
|
|
func (c *AICoreClient) getToken(ctx context.Context) (string, error) {
|
|
c.mu.RLock()
|
|
if c.token != "" && time.Now().Add(5*time.Minute).Before(c.tokenExpiry) {
|
|
token := c.token
|
|
c.mu.RUnlock()
|
|
return token, nil
|
|
}
|
|
c.mu.RUnlock()
|
|
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
// Double-check after acquiring write lock
|
|
if c.token != "" && time.Now().Add(5*time.Minute).Before(c.tokenExpiry) {
|
|
return c.token, nil
|
|
}
|
|
|
|
token, expiry, err := c.fetchToken(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
c.token = token
|
|
c.tokenExpiry = expiry
|
|
return token, nil
|
|
}
|
|
|
|
func (c *AICoreClient) fetchToken(ctx context.Context) (string, time.Time, error) {
|
|
tokenURL := strings.TrimRight(c.config.AuthURL, "/") + "/oauth/token"
|
|
|
|
data := url.Values{}
|
|
data.Set("grant_type", "client_credentials")
|
|
data.Set("client_id", c.config.ClientID)
|
|
data.Set("client_secret", c.config.ClientSecret)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, tokenURL, strings.NewReader(data.Encode()))
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("create token request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("token request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("read token response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", time.Time{}, fmt.Errorf("token request failed (status %d): %s", resp.StatusCode, truncateBody(body))
|
|
}
|
|
|
|
var tokenResp struct {
|
|
AccessToken string `json:"access_token"`
|
|
ExpiresIn int `json:"expires_in"`
|
|
}
|
|
if err := json.Unmarshal(body, &tokenResp); err != nil {
|
|
return "", time.Time{}, fmt.Errorf("parse token response: %w", err)
|
|
}
|
|
|
|
if tokenResp.AccessToken == "" {
|
|
return "", time.Time{}, fmt.Errorf("empty access token in response")
|
|
}
|
|
|
|
expiry := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second)
|
|
return tokenResp.AccessToken, expiry, nil
|
|
}
|
|
|
|
// getDeploymentURL returns the deployment URL for a model, fetching deployments if needed.
|
|
// getDeploymentURL returns the deployment URL for a model, fetching deployments if needed.
|
|
// Also returns a valid token for use by the caller, avoiding redundant getToken calls.
|
|
//
|
|
// Note: The token is fetched before acquiring the write lock to avoid holding the lock
|
|
// during network I/O. In rare cases where multiple goroutines race and one waits a long
|
|
// time for the write lock, the token could theoretically expire. The 5-minute refresh
|
|
// buffer in getToken makes this extremely unlikely in practice.
|
|
func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (deployURL, token string, err error) {
|
|
c.mu.RLock()
|
|
if u, ok := c.deployments[model]; ok {
|
|
c.mu.RUnlock()
|
|
// Still need a token for the caller
|
|
token, err = c.getToken(ctx)
|
|
if err != nil {
|
|
return "", "", fmt.Errorf("get token: %w", err)
|
|
}
|
|
return u, token, nil
|
|
}
|
|
c.mu.RUnlock()
|
|
|
|
// Fetch token first (before acquiring write lock to avoid holding lock during I/O)
|
|
token, err = c.getToken(ctx)
|
|
if err != nil {
|
|
return "", "", fmt.Errorf("get token for deployments: %w", err)
|
|
}
|
|
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
// Double-check after acquiring write lock
|
|
if u, ok := c.deployments[model]; ok {
|
|
return u, token, nil
|
|
}
|
|
|
|
if err := c.fetchDeployments(ctx, token); err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
if u, ok := c.deployments[model]; ok {
|
|
return u, token, nil
|
|
}
|
|
return "", "", fmt.Errorf("no deployment found for model %q", model)
|
|
}
|
|
|
|
func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error {
|
|
deployURL := strings.TrimRight(c.config.APIURL, "/") + "/v2/lm/deployments"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, deployURL, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("create deployments request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("AI-Resource-Group", c.config.ResourceGroup)
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("deployments request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return fmt.Errorf("read deployments response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return fmt.Errorf("deployments request failed (status %d): %s", resp.StatusCode, truncateBody(body))
|
|
}
|
|
|
|
var deployResp struct {
|
|
Resources []struct {
|
|
DeploymentURL string `json:"deploymentUrl"`
|
|
Status string `json:"status"`
|
|
Details struct {
|
|
Resources struct {
|
|
BackendDetails struct {
|
|
Model struct {
|
|
Name string `json:"name"`
|
|
} `json:"model"`
|
|
} `json:"backend_details"`
|
|
} `json:"resources"`
|
|
} `json:"details"`
|
|
} `json:"resources"`
|
|
}
|
|
if err := json.Unmarshal(body, &deployResp); err != nil {
|
|
return fmt.Errorf("parse deployments response: %w", err)
|
|
}
|
|
|
|
for _, r := range deployResp.Resources {
|
|
if r.Status != "RUNNING" {
|
|
continue
|
|
}
|
|
modelName := r.Details.Resources.BackendDetails.Model.Name
|
|
if modelName == "" {
|
|
continue
|
|
}
|
|
c.deployments[modelName] = r.DeploymentURL
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// CompleteAnthropic sends a request to an Anthropic model via AI Core.
|
|
func (c *AICoreClient) CompleteAnthropic(ctx context.Context, model string, messages []Message, maxTokens int, temperature float64) (string, error) {
|
|
deployURL, token, err := c.getDeploymentURL(ctx, model)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Extract system message
|
|
var system string
|
|
var userMessages []anthropicMsg
|
|
for _, m := range messages {
|
|
if m.Role == "system" {
|
|
system = m.Content
|
|
} else {
|
|
userMessages = append(userMessages, anthropicMsg{
|
|
Role: m.Role,
|
|
Content: m.Content,
|
|
})
|
|
}
|
|
}
|
|
|
|
reqBody := anthropicRequest{
|
|
AnthropicVersion: "bedrock-2023-05-31", // SAP AI Core uses Bedrock format
|
|
// Model omitted - AI Core deployment already specifies model
|
|
MaxTokens: maxTokens,
|
|
System: system,
|
|
Messages: userMessages,
|
|
}
|
|
if temperature > 0 {
|
|
reqBody.Temperature = temperature
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
// AI Core uses /invoke for Anthropic models
|
|
invokeURL := strings.TrimRight(deployURL, "/") + "/invoke"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, invokeURL, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("AI-Resource-Group", c.config.ResourceGroup)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("AI Core request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("AI Core API error (status %d): %s", resp.StatusCode, truncateBody(body))
|
|
}
|
|
|
|
var anthropicResp anthropicResponse
|
|
if err := json.Unmarshal(body, &anthropicResp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
|
|
if len(anthropicResp.Content) == 0 {
|
|
return "", fmt.Errorf("no content in response")
|
|
}
|
|
|
|
var sb strings.Builder
|
|
for _, block := range anthropicResp.Content {
|
|
if block.Type == "text" {
|
|
sb.WriteString(block.Text)
|
|
}
|
|
}
|
|
result := sb.String()
|
|
if result == "" {
|
|
return "", fmt.Errorf("no text content in response")
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// CompleteOpenAI sends a request to an OpenAI model via AI Core.
|
|
func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, messages []Message, temperature float64) (string, error) {
|
|
deployURL, token, err := c.getDeploymentURL(ctx, model)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
reqBody := ChatRequest{
|
|
Model: model,
|
|
Temperature: temperature,
|
|
Messages: messages,
|
|
}
|
|
|
|
data, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
// AI Core uses /chat/completions?api-version=<version> for OpenAI models
|
|
chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=" + AICoreOpenAIAPIVersion
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, chatURL, bytes.NewReader(data))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("AI-Resource-Group", c.config.ResourceGroup)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("AI Core request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read response: %w", err)
|
|
}
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("AI Core API error (status %d): %s", resp.StatusCode, truncateBody(body))
|
|
}
|
|
|
|
var openaiResp ChatResponse
|
|
if err := json.Unmarshal(body, &openaiResp); err != nil {
|
|
return "", fmt.Errorf("parse response: %w", err)
|
|
}
|
|
|
|
if len(openaiResp.Choices) == 0 {
|
|
return "", fmt.Errorf("no choices in response")
|
|
}
|
|
return openaiResp.Choices[0].Message.Content, nil
|
|
}
|
|
|
|
// IsAnthropicModel returns true if the model name indicates an Anthropic model.
|
|
// SAP AI Core uses "anthropic--" prefix for Anthropic models (e.g., "anthropic--claude-3-5-sonnet").
|
|
func IsAnthropicModel(model string) bool {
|
|
return strings.HasPrefix(model, "anthropic--")
|
|
}
|