fix: propagate LLM timeout to AI Core client

Address review feedback: MAJOR: - AICoreClient now defaults to 5min timeout (matching Client) - Add AICoreClient.WithTimeout() for explicit timeout control - Client.WithAICore() inherits parent client's timeout - Client.WithTimeout() propagates to aicore client if set MINOR: - Extract AICoreOpenAIAPIVersion constant for the hardcoded api-version - Tighten IsAnthropicModel to only match 'anthropic--' prefix (SAP AI Core always uses this prefix for Anthropic models) NIT: - Use fmt.Sprintf for token generation in tests (robust for >9 calls) - Add error checking in TestAICoreClient_TokenExpiry - Add tests for WithTimeout propagation
2026-05-09 22:29:19 -07:00
parent a62b791b9e
commit 34507dd9ff
3 changed files with 96 additions and 21 deletions
@@ -13,6 +13,10 @@ import (
 	"time"
 )
 // AICoreOpenAIAPIVersion is the API version used for OpenAI models through AI Core.
 // Update this when SAP AI Core releases a new stable version.
 const AICoreOpenAIAPIVersion = "2024-12-01-preview"
 // AICoreConfig holds SAP AI Core authentication and connection settings.
 type AICoreConfig struct {
 	ClientID      string
@@ -40,14 +44,22 @@ type deployment struct {
 }
 // NewAICoreClient creates a new AI Core client with the given configuration.
 // The client uses a default 5-minute timeout; use WithTimeout to customize.
 func NewAICoreClient(cfg AICoreConfig) *AICoreClient {
 	return &AICoreClient{
 		config:      cfg,
-		http:        &http.Client{Timeout: 30 * time.Second},
+		http:        &http.Client{Timeout: 5 * time.Minute},
 		deployments: make(map[string]deployment),
 	}
 }
 // WithTimeout sets the HTTP request timeout for AI Core calls.
 // This should be called during construction, before concurrent use.
 func (c *AICoreClient) WithTimeout(d time.Duration) *AICoreClient {
 	c.http.Timeout = d
 	return c
 }
 // getToken returns a valid OAuth token, refreshing if necessary.
 func (c *AICoreClient) getToken(ctx context.Context) (string, error) {
 	c.mu.RLock()
@@ -154,7 +166,6 @@ func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (stri
 }
 func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error {
 	deployURL := strings.TrimRight(c.config.APIURL, "/") + "/v2/lm/deployments"
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, deployURL, nil)
 	if err != nil {
@@ -180,10 +191,10 @@ func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error
 	var deployResp struct {
 		Resources []struct {
-			ID               string `json:"id"`
+			ID            string `json:"id"`
-			DeploymentURL    string `json:"deploymentUrl"`
+			DeploymentURL string `json:"deploymentUrl"`
-			Status           string `json:"status"`
+			Status        string `json:"status"`
-			Details          struct {
+			Details       struct {
 				Resources struct {
 					BackendDetails struct {
 						Model struct {
@@ -326,8 +337,8 @@ func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, message
 		return "", fmt.Errorf("marshal request: %w", err)
 	}
-	// AI Core uses /chat/completions?api-version=2024-12-01-preview for OpenAI models
+	// AI Core uses /chat/completions?api-version=<version> for OpenAI models
-	chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=2024-12-01-preview"
+	chatURL := strings.TrimRight(deployURL, "/") + "/chat/completions?api-version=" + AICoreOpenAIAPIVersion
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, chatURL, bytes.NewReader(data))
 	if err != nil {
 		return "", fmt.Errorf("create request: %w", err)
@@ -363,6 +374,7 @@ func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, message
 }
 // IsAnthropicModel returns true if the model name indicates an Anthropic model.
 // SAP AI Core uses "anthropic--" prefix for Anthropic models (e.g., "anthropic--claude-3-5-sonnet").
 func IsAnthropicModel(model string) bool {
-	return strings.HasPrefix(model, "anthropic--") || strings.Contains(model, "claude")
+	return strings.HasPrefix(model, "anthropic--")
 }
@@ -3,6 +3,7 @@ package llm
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -275,8 +276,8 @@ func TestAICoreClient_CompleteOpenAI(t *testing.T) {
 		})
 	})
 	mux.HandleFunc("/deployments/openai/chat/completions", func(w http.ResponseWriter, r *http.Request) {
-		if r.URL.Query().Get("api-version") != "2024-12-01-preview" {
+		if r.URL.Query().Get("api-version") != AICoreOpenAIAPIVersion {
-			t.Errorf("expected api-version query param")
+			t.Errorf("expected api-version %s, got %s", AICoreOpenAIAPIVersion, r.URL.Query().Get("api-version"))
 		}
 		var req ChatRequest
 		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
@@ -327,12 +328,17 @@ func TestIsAnthropicModel(t *testing.T) {
 		model    string
 		expected bool
 	}{
 		// SAP AI Core uses "anthropic--" prefix for Anthropic models
 		{"anthropic--claude-4.6-sonnet", true},
 		{"anthropic--claude-4.6-opus", true},
-		{"claude-sonnet-4", true},
+		{"anthropic--claude-3-5-sonnet", true},
 		// Non-prefixed model names are not detected as Anthropic
 		// (SAP AI Core always uses the prefix for Anthropic models)
 		{"claude-sonnet-4", false},
 		{"gpt-5", false},
 		{"gpt-4.1", false},
 		{"llama-3", false},
 		{"my-claude-model", false}, // Avoid false positives on "claude" substring
 	}
 	for _, tt := range tests {
@@ -350,7 +356,7 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
 			call := atomic.AddInt32(&tokenCalls, 1)
 			w.Header().Set("Content-Type", "application/json")
 			json.NewEncoder(w).Encode(map[string]interface{}{
-				"access_token": "token-" + string(rune('0'+call)),
+				"access_token": fmt.Sprintf("token-%d", call),
 				"expires_in":   1, // 1 second expiry
 			})
 			return
@@ -367,7 +373,10 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
 	})
 	// First call
-	token1, _ := client.getToken(context.Background())
+	token1, err := client.getToken(context.Background())
 	if err != nil {
 		t.Fatalf("first getToken: %v", err)
 	}
 	// Force token expiry by manipulating expiry time
 	client.mu.Lock()
@@ -375,7 +384,10 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
 	client.mu.Unlock()
 	// Should fetch new token
-	token2, _ := client.getToken(context.Background())
+	token2, err := client.getToken(context.Background())
 	if err != nil {
 		t.Fatalf("second getToken: %v", err)
 	}
 	if token1 == token2 {
 		t.Error("expected different tokens after expiry")
@@ -385,6 +397,27 @@ func TestAICoreClient_TokenExpiry(t *testing.T) {
 	}
 }
 func TestAICoreClient_WithTimeout(t *testing.T) {
 	client := NewAICoreClient(AICoreConfig{
 		ClientID:      "test-id",
 		ClientSecret:  "test-secret",
 		AuthURL:       "https://auth.example.com",
 		APIURL:        "https://api.example.com",
 		ResourceGroup: "default",
 	})
 	// Default timeout is 5 minutes
 	if client.http.Timeout != 5*time.Minute {
 		t.Errorf("expected default timeout 5m, got %v", client.http.Timeout)
 	}
 	// WithTimeout should update the timeout
 	client.WithTimeout(10 * time.Minute)
 	if client.http.Timeout != 10*time.Minute {
 		t.Errorf("expected timeout 10m, got %v", client.http.Timeout)
 	}
 }
 func TestClient_WithAICore(t *testing.T) {
 	client := NewClient("http://example.com", "key", "model")
 	if client.provider != ProviderOpenAI {
@@ -407,6 +440,31 @@ func TestClient_WithAICore(t *testing.T) {
 	}
 }
 func TestClient_WithTimeout_PropagatestoAICore(t *testing.T) {
 	client := NewClient("http://example.com", "key", "model").
 		WithAICore(AICoreConfig{
 			ClientID:      "id",
 			ClientSecret:  "secret",
 			AuthURL:       "https://auth.example.com",
 			APIURL:        "https://api.example.com",
 			ResourceGroup: "default",
 		})
 	// Default should be 5 minutes (inherited from parent client)
 	if client.aicore.http.Timeout != 5*time.Minute {
 		t.Errorf("expected aicore default timeout 5m, got %v", client.aicore.http.Timeout)
 	}
 	// WithTimeout should propagate to AI Core client
 	client.WithTimeout(15 * time.Minute)
 	if client.http.Timeout != 15*time.Minute {
 		t.Errorf("expected parent timeout 15m, got %v", client.http.Timeout)
 	}
 	if client.aicore.http.Timeout != 15*time.Minute {
 		t.Errorf("expected aicore timeout 15m, got %v", client.aicore.http.Timeout)
 	}
 }
 func TestClient_CompleteAICore(t *testing.T) {
 	var baseURL string
 	mux := http.NewServeMux()
@@ -52,8 +52,12 @@ func NewClient(baseURL, apiKey, model string) *Client {
 }
 // WithTimeout sets the HTTP request timeout for LLM calls (default 5 minutes).
 // When using AI Core, this also sets the timeout on the AI Core client.
 func (c *Client) WithTimeout(d time.Duration) *Client {
 	c.http.Timeout = d
 	if c.aicore != nil {
 		c.aicore.WithTimeout(d)
 	}
 	return c
 }
@@ -71,9 +75,10 @@ func (c *Client) WithProvider(p Provider) *Client {
 // WithAICore configures the client to use SAP AI Core for authentication.
 // This sets the provider to aicore automatically.
 // The AI Core client inherits the current HTTP timeout from this client.
 func (c *Client) WithAICore(cfg AICoreConfig) *Client {
 	c.provider = ProviderAICore
-	c.aicore = NewAICoreClient(cfg)
+	c.aicore = NewAICoreClient(cfg).WithTimeout(c.http.Timeout)
 	return c
 }
@@ -201,11 +206,11 @@ func (c *Client) completeOpenAI(ctx context.Context, messages []Message) (string
 // --- Anthropic Messages API implementation ---
 type anthropicRequest struct {
-	Model       string            `json:"model"`
+	Model       string         `json:"model"`
-	MaxTokens   int               `json:"max_tokens"`
+	MaxTokens   int            `json:"max_tokens"`
-	System      string            `json:"system,omitempty"`
+	System      string         `json:"system,omitempty"`
-	Messages    []anthropicMsg    `json:"messages"`
+	Messages    []anthropicMsg `json:"messages"`
-	Temperature float64           `json:"temperature,omitempty"`
+	Temperature float64        `json:"temperature,omitempty"`
 }
 type anthropicMsg struct {