fix: propagate LLM timeout to AI Core client
CI / test (pull_request) Successful in 15s
CI / review (/anthropic/v1, anthropic--claude-4.6-sonnet, sonnet, anthropic, SONNET_REVIEW_TOKEN) (pull_request) Successful in 32s
CI / review (/openai/v1, gpt-5, security, openai, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m58s
CI / review (/openai/v1, gpt-5, gpt, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 2m16s
CI / test (pull_request) Successful in 15s
CI / review (/anthropic/v1, anthropic--claude-4.6-sonnet, sonnet, anthropic, SONNET_REVIEW_TOKEN) (pull_request) Successful in 32s
CI / review (/openai/v1, gpt-5, security, openai, SECURITY_REVIEW.md, SECURITY_REVIEW_TOKEN) (pull_request) Successful in 1m58s
CI / review (/openai/v1, gpt-5, gpt, openai, GPT_REVIEW_TOKEN) (pull_request) Successful in 2m16s
Address review feedback: MAJOR: - AICoreClient now defaults to 5min timeout (matching Client) - Add AICoreClient.WithTimeout() for explicit timeout control - Client.WithAICore() inherits parent client's timeout - Client.WithTimeout() propagates to aicore client if set MINOR: - Extract AICoreOpenAIAPIVersion constant for the hardcoded api-version - Tighten IsAnthropicModel to only match 'anthropic--' prefix (SAP AI Core always uses this prefix for Anthropic models) NIT: - Use fmt.Sprintf for token generation in tests (robust for >9 calls) - Add error checking in TestAICoreClient_TokenExpiry - Add tests for WithTimeout propagation
This commit is contained in:
+21
-9
@@ -13,6 +13,10 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// AICoreOpenAIAPIVersion is the API version used for OpenAI models through AI Core.
|
||||
// Update this when SAP AI Core releases a new stable version.
|
||||
const AICoreOpenAIAPIVersion = "2024-12-01-preview"
|
||||
|
||||
// AICoreConfig holds SAP AI Core authentication and connection settings.
|
||||
type AICoreConfig struct {
|
||||
ClientID string
|
||||
@@ -40,14 +44,22 @@ type deployment struct {
|
||||
}
|
||||
|
||||
// NewAICoreClient creates a new AI Core client with the given configuration.
|
||||
// The client uses a default 5-minute timeout; use WithTimeout to customize.
|
||||
func NewAICoreClient(cfg AICoreConfig) *AICoreClient {
|
||||
return &AICoreClient{
|
||||
config: cfg,
|
||||
http: &http.Client{Timeout: 30 * time.Second},
|
||||
http: &http.Client{Timeout: 5 * time.Minute},
|
||||
deployments: make(map[string]deployment),
|
||||
}
|
||||
}
|
||||
|
||||
// WithTimeout sets the HTTP request timeout for AI Core calls.
|
||||
// This should be called during construction, before concurrent use.
|
||||
func (c *AICoreClient) WithTimeout(d time.Duration) *AICoreClient {
|
||||
c.http.Timeout = d
|
||||
return c
|
||||
}
|
||||
|
||||
// getToken returns a valid OAuth token, refreshing if necessary.
|
||||
func (c *AICoreClient) getToken(ctx context.Context) (string, error) {
|
||||
c.mu.RLock()
|
||||
@@ -154,7 +166,6 @@ func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (stri
|
||||
}
|
||||
|
||||
func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error {
|
||||
|
||||
deployURL := strings.TrimRight(c.config.APIURL, "/") + "/v2/lm/deployments"
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, deployURL, nil)
|
||||
if err != nil {
|
||||
@@ -180,10 +191,10 @@ func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error
|
||||
|
||||
var deployResp struct {
|
||||
Resources []struct {
|
||||
ID string `json:"id"`
|
||||
DeploymentURL string `json:"deploymentUrl"`
|
||||
Status string `json:"status"`
|
||||
Details struct {
|
||||
ID string `json:"id"`
|
||||
DeploymentURL string `json:"deploymentUrl"`
|
||||
Status string `json:"status"`
|
||||
Details struct {
|
||||
Resources struct {
|
||||
BackendDetails struct {
|
||||
Model struct {
|
||||
@@ -326,8 +337,8 @@ func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, message
|
||||
return "", fmt.Errorf("marshal request: %w", err)
|
||||
}
|
||||
|
||||
// AI Core uses /chat/completions?api-version=2024-12-01-preview for OpenAI models
|
||||
chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=2024-12-01-preview"
|
||||
// AI Core uses /chat/completions?api-version=<version> for OpenAI models
|
||||
chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=" + AICoreOpenAIAPIVersion
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, chatURL, bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create request: %w", err)
|
||||
@@ -363,6 +374,7 @@ func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, message
|
||||
}
|
||||
|
||||
// IsAnthropicModel returns true if the model name indicates an Anthropic model.
|
||||
// SAP AI Core uses "anthropic--" prefix for Anthropic models (e.g., "anthropic--claude-3-5-sonnet").
|
||||
func IsAnthropicModel(model string) bool {
|
||||
return strings.HasPrefix(model, "anthropic--") || strings.Contains(model, "claude")
|
||||
return strings.HasPrefix(model, "anthropic--")
|
||||
}
|
||||
|
||||
+64
-6
@@ -3,6 +3,7 @@ package llm
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
@@ -275,8 +276,8 @@ func TestAICoreClient_CompleteOpenAI(t *testing.T) {
|
||||
})
|
||||
})
|
||||
mux.HandleFunc("/deployments/openai/chat/completions", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Query().Get("api-version") != "2024-12-01-preview" {
|
||||
t.Errorf("expected api-version query param")
|
||||
if r.URL.Query().Get("api-version") != AICoreOpenAIAPIVersion {
|
||||
t.Errorf("expected api-version %s, got %s", AICoreOpenAIAPIVersion, r.URL.Query().Get("api-version"))
|
||||
}
|
||||
var req ChatRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
@@ -327,12 +328,17 @@ func TestIsAnthropicModel(t *testing.T) {
|
||||
model string
|
||||
expected bool
|
||||
}{
|
||||
// SAP AI Core uses "anthropic--" prefix for Anthropic models
|
||||
{"anthropic--claude-4.6-sonnet", true},
|
||||
{"anthropic--claude-4.6-opus", true},
|
||||
{"claude-sonnet-4", true},
|
||||
{"anthropic--claude-3-5-sonnet", true},
|
||||
// Non-prefixed model names are not detected as Anthropic
|
||||
// (SAP AI Core always uses the prefix for Anthropic models)
|
||||
{"claude-sonnet-4", false},
|
||||
{"gpt-5", false},
|
||||
{"gpt-4.1", false},
|
||||
{"llama-3", false},
|
||||
{"my-claude-model", false}, // Avoid false positives on "claude" substring
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -350,7 +356,7 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
|
||||
call := atomic.AddInt32(&tokenCalls, 1)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"access_token": "token-" + string(rune('0'+call)),
|
||||
"access_token": fmt.Sprintf("token-%d", call),
|
||||
"expires_in": 1, // 1 second expiry
|
||||
})
|
||||
return
|
||||
@@ -367,7 +373,10 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
|
||||
})
|
||||
|
||||
// First call
|
||||
token1, _ := client.getToken(context.Background())
|
||||
token1, err := client.getToken(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("first getToken: %v", err)
|
||||
}
|
||||
|
||||
// Force token expiry by manipulating expiry time
|
||||
client.mu.Lock()
|
||||
@@ -375,7 +384,10 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
|
||||
client.mu.Unlock()
|
||||
|
||||
// Should fetch new token
|
||||
token2, _ := client.getToken(context.Background())
|
||||
token2, err := client.getToken(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("second getToken: %v", err)
|
||||
}
|
||||
|
||||
if token1 == token2 {
|
||||
t.Error("expected different tokens after expiry")
|
||||
@@ -385,6 +397,27 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAICoreClient_WithTimeout(t *testing.T) {
|
||||
client := NewAICoreClient(AICoreConfig{
|
||||
ClientID: "test-id",
|
||||
ClientSecret: "test-secret",
|
||||
AuthURL: "https://auth.example.com",
|
||||
APIURL: "https://api.example.com",
|
||||
ResourceGroup: "default",
|
||||
})
|
||||
|
||||
// Default timeout is 5 minutes
|
||||
if client.http.Timeout != 5*time.Minute {
|
||||
t.Errorf("expected default timeout 5m, got %v", client.http.Timeout)
|
||||
}
|
||||
|
||||
// WithTimeout should update the timeout
|
||||
client.WithTimeout(10 * time.Minute)
|
||||
if client.http.Timeout != 10*time.Minute {
|
||||
t.Errorf("expected timeout 10m, got %v", client.http.Timeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_WithAICore(t *testing.T) {
|
||||
client := NewClient("http://example.com", "key", "model")
|
||||
if client.provider != ProviderOpenAI {
|
||||
@@ -407,6 +440,31 @@ func TestClient_WithAICore(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_WithTimeout_PropagatestoAICore(t *testing.T) {
|
||||
client := NewClient("http://example.com", "key", "model").
|
||||
WithAICore(AICoreConfig{
|
||||
ClientID: "id",
|
||||
ClientSecret: "secret",
|
||||
AuthURL: "https://auth.example.com",
|
||||
APIURL: "https://api.example.com",
|
||||
ResourceGroup: "default",
|
||||
})
|
||||
|
||||
// Default should be 5 minutes (inherited from parent client)
|
||||
if client.aicore.http.Timeout != 5*time.Minute {
|
||||
t.Errorf("expected aicore default timeout 5m, got %v", client.aicore.http.Timeout)
|
||||
}
|
||||
|
||||
// WithTimeout should propagate to AI Core client
|
||||
client.WithTimeout(15 * time.Minute)
|
||||
if client.http.Timeout != 15*time.Minute {
|
||||
t.Errorf("expected parent timeout 15m, got %v", client.http.Timeout)
|
||||
}
|
||||
if client.aicore.http.Timeout != 15*time.Minute {
|
||||
t.Errorf("expected aicore timeout 15m, got %v", client.aicore.http.Timeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_CompleteAICore(t *testing.T) {
|
||||
var baseURL string
|
||||
mux := http.NewServeMux()
|
||||
|
||||
+11
-6
@@ -52,8 +52,12 @@ func NewClient(baseURL, apiKey, model string) *Client {
|
||||
}
|
||||
|
||||
// WithTimeout sets the HTTP request timeout for LLM calls (default 5 minutes).
|
||||
// When using AI Core, this also sets the timeout on the AI Core client.
|
||||
func (c *Client) WithTimeout(d time.Duration) *Client {
|
||||
c.http.Timeout = d
|
||||
if c.aicore != nil {
|
||||
c.aicore.WithTimeout(d)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -71,9 +75,10 @@ func (c *Client) WithProvider(p Provider) *Client {
|
||||
|
||||
// WithAICore configures the client to use SAP AI Core for authentication.
|
||||
// This sets the provider to aicore automatically.
|
||||
// The AI Core client inherits the current HTTP timeout from this client.
|
||||
func (c *Client) WithAICore(cfg AICoreConfig) *Client {
|
||||
c.provider = ProviderAICore
|
||||
c.aicore = NewAICoreClient(cfg)
|
||||
c.aicore = NewAICoreClient(cfg).WithTimeout(c.http.Timeout)
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -201,11 +206,11 @@ func (c *Client) completeOpenAI(ctx context.Context, messages []Message) (string
|
||||
// --- Anthropic Messages API implementation ---
|
||||
|
||||
type anthropicRequest struct {
|
||||
Model string `json:"model"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
System string `json:"system,omitempty"`
|
||||
Messages []anthropicMsg `json:"messages"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
Model string `json:"model"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
System string `json:"system,omitempty"`
|
||||
Messages []anthropicMsg `json:"messages"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
}
|
||||
|
||||
type anthropicMsg struct {
|
||||
|
||||
Reference in New Issue
Block a user