feat: native SAP AI Core support

Add native SAP AI Core provider that handles OAuth token management and deployment discovery automatically. This eliminates the need for the external LLM proxy when running in SAP environments. Changes: - Add AICoreClient with OAuth token caching and deployment URL discovery - Support both Anthropic and OpenAI models via AI Core deployments - Update CI to use native AI Core provider - Update action inputs to accept AI Core credentials - Update README with AI Core configuration examples Model names must match AI Core deployment names (e.g. anthropic--claude-4.6-sonnet, gpt-5).
2026-05-10 08:35:50 -07:00
2 changed files with 32 additions and 30 deletions
@@ -33,9 +33,9 @@ type AICoreConfig struct {
 // AICoreClient wraps AI Core authentication and deployment discovery.
 // Thread-safe for concurrent use after construction.
 //
-// Note: The deployment cache is populated once and never invalidated. This is
+// Design: The deployment cache is populated once and never invalidated. This is
 // acceptable for short-lived CI runner processes, but longer-lived deployments
-// may want to add a TTL or re-fetch on errors. See issue #54 review discussion.
+// may want to add a TTL or re-fetch on errors.
 type AICoreClient struct {
 	config AICoreConfig
 	http   *http.Client
@@ -145,36 +145,48 @@ func (c *AICoreClient) fetchToken(ctx context.Context) (string, time.Time, error
 }

 // getDeploymentURL returns the deployment URL for a model, fetching deployments if needed.
-func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (string, error) {
+// getDeploymentURL returns the deployment URL for a model, fetching deployments if needed.
+// Also returns a valid token for use by the caller, avoiding redundant getToken calls.
+//
+// Note: The token is fetched before acquiring the write lock to avoid holding the lock
+// during network I/O. In rare cases where multiple goroutines race and one waits a long
+// time for the write lock, the token could theoretically expire. The 5-minute refresh
+// buffer in getToken makes this extremely unlikely in practice.
+func (c *AICoreClient) getDeploymentURL(ctx context.Context, model string) (deployURL, token string, err error) {
 	c.mu.RLock()
-	if url, ok := c.deployments[model]; ok {
+	if u, ok := c.deployments[model]; ok {
 		c.mu.RUnlock()
-		return url, nil
+		// Still need a token for the caller
+		token, err = c.getToken(ctx)
+		if err != nil {
+			return "", "", fmt.Errorf("get token: %w", err)
+		}
+		return u, token, nil
 	}
 	c.mu.RUnlock()

-	// Fetch token first (before acquiring write lock to avoid deadlock)
-	token, err := c.getToken(ctx)
+	// Fetch token first (before acquiring write lock to avoid holding lock during I/O)
+	token, err = c.getToken(ctx)
 	if err != nil {
-		return "", fmt.Errorf("get token for deployments: %w", err)
+		return "", "", fmt.Errorf("get token for deployments: %w", err)
 	}

 	c.mu.Lock()
 	defer c.mu.Unlock()

 	// Double-check after acquiring write lock
-	if url, ok := c.deployments[model]; ok {
-		return url, nil
+	if u, ok := c.deployments[model]; ok {
+		return u, token, nil
 	}

 	if err := c.fetchDeployments(ctx, token); err != nil {
-		return "", err
+		return "", "", err
 	}

-	if url, ok := c.deployments[model]; ok {
-		return url, nil
+	if u, ok := c.deployments[model]; ok {
+		return u, token, nil
 	}
-	return "", fmt.Errorf("no deployment found for model %q", model)
+	return "", "", fmt.Errorf("no deployment found for model %q", model)
 }

 func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error {
@@ -236,12 +248,7 @@ func (c *AICoreClient) fetchDeployments(ctx context.Context, token string) error

 // CompleteAnthropic sends a request to an Anthropic model via AI Core.
 func (c *AICoreClient) CompleteAnthropic(ctx context.Context, model string, messages []Message, maxTokens int, temperature float64) (string, error) {
-	deployURL, err := c.getDeploymentURL(ctx, model)
-	if err != nil {
-		return "", err
-	}
-
-	token, err := c.getToken(ctx)
+	deployURL, token, err := c.getDeploymentURL(ctx, model)
 	if err != nil {
 		return "", err
 	}
@@ -325,12 +332,7 @@ func (c *AICoreClient) CompleteAnthropic(ctx context.Context, model string, mess

 // CompleteOpenAI sends a request to an OpenAI model via AI Core.
 func (c *AICoreClient) CompleteOpenAI(ctx context.Context, model string, messages []Message, temperature float64) (string, error) {
-	deployURL, err := c.getDeploymentURL(ctx, model)
-	if err != nil {
-		return "", err
-	}
-
-	token, err := c.getToken(ctx)
+	deployURL, token, err := c.getDeploymentURL(ctx, model)
 	if err != nil {
 		return "", err
 	}
@@ -142,7 +142,7 @@ func TestAICoreClient_DeploymentFetch(t *testing.T) {
 	})

 	// Should find running deployment
-	url, err := client.getDeploymentURL(context.Background(), "anthropic--claude-4.6-sonnet")
+	url, _, err := client.getDeploymentURL(context.Background(), "anthropic--claude-4.6-sonnet")
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -151,7 +151,7 @@ func TestAICoreClient_DeploymentFetch(t *testing.T) {
 	}

 	// Should find running gpt-5, not stopped one
-	url, err = client.getDeploymentURL(context.Background(), "gpt-5")
+	url, _, err = client.getDeploymentURL(context.Background(), "gpt-5")
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -160,14 +160,14 @@ func TestAICoreClient_DeploymentFetch(t *testing.T) {
 	}

 	// Should error on unknown model
-	_, err = client.getDeploymentURL(context.Background(), "unknown-model")
+	_, _, err = client.getDeploymentURL(context.Background(), "unknown-model")
 	if err == nil {
 		t.Error("expected error for unknown model")
 	}
 }

 func TestAICoreClient_CompleteAnthropic(t *testing.T) {
-	// Use a pointer to capture the server URL for use in the handler
+	// baseURL is set after server creation; captured by closure in handlers
 	var baseURL string
 	mux := http.NewServeMux()
 	mux.HandleFunc("/oauth/token", func(w http.ResponseWriter, r *http.Request) {