review-bot/.gitea/actions/review/action.yml

# This composite action supports both Gitea Actions and GitHub Actions runners.
# It detects the VCS host type using the github.api_url context (set only on
# GitHub/GHES runners) and uses the appropriate releases API for version
# resolution and binary download.
#
# Security notes:
# - On GitHub/GHES (VCS_TYPE=github), inputs.gitea-url is IGNORED to prevent
#   token exfiltration. API calls use github.api_url; downloads use
#   github.server_url. Tokens are never sent to user-supplied URLs.
# - On Gitea (VCS_TYPE=gitea), inputs.gitea-url is validated (https scheme,
#   no whitespace/newlines) before use.
# - action-repo is validated against owner/repo pattern.
# - Tokens are passed via masked environment variables, not step outputs.
#
# Requirements: python3, sha256sum, curl (all present on ubuntu-* runners).
name: 'AI Code Review'
description: 'Run AI-powered code review on a pull request using review-bot'

inputs:
  gitea-url:
    description: 'Gitea instance URL (only used on Gitea runners; ignored on GitHub/GHES). Defaults to server_url.'
    required: false
    default: ''
  repo:
    description: 'Repository to review (owner/name, defaults to current)'
    required: false
    default: ''
  action-repo:
    description: 'Repository hosting review-bot releases (owner/name). Defaults to github.action_repository or rodin/review-bot.'
    required: false
    default: ''
  action-repo-token:
    description: 'Token for downloading release assets from action-repo (defaults to github.token on GitHub, reviewer-token on Gitea). Required for private repos.'
    required: false
    default: ''
  pr-number:
    description: 'Pull request number (defaults to current PR)'
    required: false
    default: ''
  reviewer-token:
    description: 'Token for posting the review'
    required: true
  reviewer-name:
    description: 'Display name for the reviewer'
    required: false
    default: ''
  llm-base-url:
    description: 'OpenAI-compatible LLM API base URL (not required for aicore provider)'
    required: false
    default: ''
  llm-api-key:
    description: 'LLM API key (not required for aicore provider)'
    required: false
    default: ''
  llm-model:
    description: 'LLM model name'
    required: true
  llm-provider:
    description: 'LLM API provider: openai, anthropic, or aicore (default openai)'
    required: false
    default: 'openai'
  aicore-client-id:
    description: 'SAP AI Core client ID (required for aicore provider)'
    required: false
    default: ''
  aicore-client-secret:
    description: 'SAP AI Core client secret (required for aicore provider)'
    required: false
    default: ''
  aicore-auth-url:
    description: 'SAP AI Core authentication URL (required for aicore provider)'
    required: false
    default: ''
  aicore-api-url:
    description: 'SAP AI Core API URL (required for aicore provider)'
    required: false
    default: ''
  aicore-resource-group:
    description: 'SAP AI Core resource group (default: default)'
    required: false
    default: 'default'
  conventions-file:
    description: 'Path to conventions file in the repo (e.g. CLAUDE.md)'
    required: false
    default: ''
  patterns-repo:
    description: 'Comma-separated repos with language patterns (e.g. rodin/elixir-patterns,rodin/phoenix-conventions)'
    required: false
    default: ''
  patterns-files:
    description: 'Comma-separated file paths or directories to fetch from patterns repos'
    required: false
    default: 'README.md'
  temperature:
    description: 'LLM temperature (0 = server default)'
    required: false
    default: '0'
  timeout:
    description: 'LLM request timeout in seconds (default 300)'
    required: false
    default: '300'
  version:
    description: 'review-bot version to install (e.g. v0.1.0, defaults to latest)'
    required: false
    default: 'latest'
  dry-run:
    description: 'Print review to stdout instead of posting'
    required: false
    default: 'false'
  update-existing:
    description: 'Delete previous review from same bot after posting new one. Accepts: true/1/yes or false/0/no (default true)'
    required: false
    default: 'true'
  system-prompt-file:
    description: 'Local file with additional system prompt instructions (e.g. security review focus)'
    required: false
    default: ''
  persona:
    description: 'Built-in persona name (security, architect, docs)'
    required: false
    default: ''
  persona-file:
    description: 'Path to custom persona JSON file'
    required: false
    default: ''

runs:
  using: 'composite'
  steps:
    - name: Determine version
      id: version
      shell: bash
      run: |
        # --- Input Validation ---

        # Determine the repo hosting review-bot releases (not the repo being reviewed)
        ACTION_REPO="${{ inputs.action-repo }}"
        if [ -z "$ACTION_REPO" ]; then
          # github.action_repository is the repo containing the running action
          ACTION_REPO="${{ github.action_repository }}"
        fi
        if [ -z "$ACTION_REPO" ]; then
          # Final fallback for Gitea (which may not set action_repository)
          ACTION_REPO="rodin/review-bot"
        fi

        # Validate ACTION_REPO matches owner/repo pattern (防止 path traversal)
        if ! printf '%s' "$ACTION_REPO" | grep -qE '^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$'; then
          echo "Error: action-repo '${ACTION_REPO}' does not match expected owner/repo format" >&2
          exit 1
        fi

        # Detect VCS host type using github.api_url context.
        # github.api_url is set on GitHub.com (https://api.github.com) and GHES
        # (https://<host>/api/v3). It is empty/unset on Gitea Actions runners.
        GITHUB_API_URL="${{ github.api_url }}"
        if [ -n "$GITHUB_API_URL" ]; then
          VCS_TYPE="github"
        else
          VCS_TYPE="gitea"
        fi

        # Determine SERVER_URL based on VCS type.
        # SECURITY: On GitHub/GHES, ALWAYS use github.server_url — never trust
        # inputs.gitea-url to prevent token exfiltration to attacker-controlled hosts.
        if [ "$VCS_TYPE" = "github" ]; then
          SERVER_URL="${{ github.server_url }}"
          if [ -n "${{ inputs.gitea-url }}" ]; then
            echo "::warning::inputs.gitea-url is ignored on GitHub/GHES runners (VCS_TYPE=github). Using github.server_url instead."
          fi
        else
          SERVER_URL="${{ inputs.gitea-url || github.server_url }}"
        fi
        # Strip trailing slash if present
        SERVER_URL="${SERVER_URL%/}"

        # Validate SERVER_URL for Gitea path: must be https, no whitespace/newlines
        if [ "$VCS_TYPE" = "gitea" ]; then
          if ! printf '%s' "$SERVER_URL" | grep -qE '^https://[^[:space:]]+$'; then
            echo "Error: SERVER_URL '${SERVER_URL}' must be an https:// URL with no whitespace" >&2
            exit 1
          fi
          if printf '%s' "$SERVER_URL" | grep -q $'\n'; then
            echo "Error: SERVER_URL contains unexpected newline" >&2
            exit 1
          fi
        fi

        # Determine auth token for release API requests
        ACTION_TOKEN="${{ inputs.action-repo-token }}"
        if [ -z "$ACTION_TOKEN" ]; then
          if [ "$VCS_TYPE" = "github" ]; then
            ACTION_TOKEN="${{ github.token }}"
          else
            ACTION_TOKEN="${{ inputs.reviewer-token }}"
          fi
        fi

        if [ "${{ inputs.version }}" = "latest" ]; then
          if [ "$VCS_TYPE" = "github" ]; then
            # SECURITY: Use github.api_url which is a trusted platform-provided value.
            # Never construct API URLs from user-supplied inputs on GitHub.
            API_URL="${GITHUB_API_URL}/repos/${ACTION_REPO}/releases?per_page=1"
          else
            # Gitea API — SERVER_URL was validated above
            API_URL="${SERVER_URL}/api/v1/repos/${ACTION_REPO}/releases?limit=1"
          fi

          # Build auth header if token is available
          AUTH_HEADER=""
          if [ -n "$ACTION_TOKEN" ]; then
            if [ "$VCS_TYPE" = "github" ]; then
              AUTH_HEADER="Authorization: Bearer ${ACTION_TOKEN}"
            else
              AUTH_HEADER="Authorization: token ${ACTION_TOKEN}"
            fi
          fi

          if [ -n "$AUTH_HEADER" ]; then
            VERSION=$(curl -sSf --connect-timeout 10 --max-time 30 \
              -H "$AUTH_HEADER" "$API_URL" \
              | python3 -c "import sys, json; releases = json.load(sys.stdin); print(releases[0]['tag_name'] if releases else '')")
          else
            VERSION=$(curl -sSf --connect-timeout 10 --max-time 30 "$API_URL" \
              | python3 -c "import sys, json; releases = json.load(sys.stdin); print(releases[0]['tag_name'] if releases else '')")
          fi

          if [ -z "$VERSION" ]; then
            echo "Failed to determine latest version from ${API_URL}" >&2
            exit 1
          fi
        else
          VERSION="${{ inputs.version }}"
        fi

        # Validate VERSION: no newlines, no slashes, no whitespace (防止 path traversal)
        if printf '%s' "$VERSION" | grep -qE '[\n\r/[:space:]]'; then
          echo "Error: VERSION '${VERSION}' contains invalid characters (newline, slash, or whitespace)" >&2
          exit 1
        fi

        echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
        echo "action_repo=${ACTION_REPO}" >> "$GITHUB_OUTPUT"
        echo "server_url=${SERVER_URL}" >> "$GITHUB_OUTPUT"
        echo "vcs_type=${VCS_TYPE}" >> "$GITHUB_OUTPUT"

        # SECURITY: Pass token via masked environment variable instead of step output.
        # Step outputs can leak in debug logs; GITHUB_ENV with masking is safer.
        if [ -n "$ACTION_TOKEN" ]; then
          echo "::add-mask::${ACTION_TOKEN}"
          echo "ACTION_TOKEN=${ACTION_TOKEN}" >> "$GITHUB_ENV"
        fi

    - name: Cache review-bot binary
      id: cache
      uses: actions/cache@v4
      with:
        path: ${{ runner.temp }}/review-bot
        key: review-bot-linux-amd64-${{ steps.version.outputs.version }}

    - name: Install review-bot
      if: steps.cache.outputs.cache-hit != 'true'
      shell: bash
      run: |
        SERVER_URL="${{ steps.version.outputs.server_url }}"
        ACTION_REPO="${{ steps.version.outputs.action_repo }}"
        VERSION="${{ steps.version.outputs.version }}"
        VCS_TYPE="${{ steps.version.outputs.vcs_type }}"
        # Read token from masked environment variable (set in Determine version step)
        # Falls back to empty if not set (public repos don't need auth)
        ACTION_TOKEN="${ACTION_TOKEN:-}"
        BINARY="review-bot-linux-amd64"

        # SECURITY: On GitHub/GHES, use github.server_url for downloads.
        # SERVER_URL is already set correctly per VCS_TYPE in Determine version step,
        # but verify the download destination matches expectations.
        if [ "$VCS_TYPE" = "github" ]; then
          # Double-check: SERVER_URL must be github.server_url (platform-provided)
          EXPECTED_SERVER="${{ github.server_url }}"
          EXPECTED_SERVER="${EXPECTED_SERVER%/}"
          if [ "$SERVER_URL" != "$EXPECTED_SERVER" ]; then
            echo "Error: SERVER_URL mismatch on GitHub runner (possible tampering)" >&2
            exit 1
          fi
        fi

        # Download URL format is the same on both Gitea and GitHub:
        # {server}/{owner}/{repo}/releases/download/{tag}/{asset}
        DOWNLOAD_URL="${SERVER_URL}/${ACTION_REPO}/releases/download/${VERSION}"

        if [ -n "$ACTION_TOKEN" ]; then
          if [ "$VCS_TYPE" = "github" ]; then
            curl -sSfL --connect-timeout 10 --max-time 120 \
              -H "Authorization: Bearer ${ACTION_TOKEN}" \
              "${DOWNLOAD_URL}/${BINARY}" -o "${{ runner.temp }}/review-bot"
            curl -sSfL --connect-timeout 10 --max-time 30 \
              -H "Authorization: Bearer ${ACTION_TOKEN}" \
              "${DOWNLOAD_URL}/checksums.txt" -o "${{ runner.temp }}/checksums.txt"
          else
            curl -sSfL --connect-timeout 10 --max-time 120 \
              -H "Authorization: token ${ACTION_TOKEN}" \
              "${DOWNLOAD_URL}/${BINARY}" -o "${{ runner.temp }}/review-bot"
            curl -sSfL --connect-timeout 10 --max-time 30 \
              -H "Authorization: token ${ACTION_TOKEN}" \
              "${DOWNLOAD_URL}/checksums.txt" -o "${{ runner.temp }}/checksums.txt"
          fi
        else
          curl -sSfL --connect-timeout 10 --max-time 120 \
            "${DOWNLOAD_URL}/${BINARY}" -o "${{ runner.temp }}/review-bot"
          curl -sSfL --connect-timeout 10 --max-time 30 \
            "${DOWNLOAD_URL}/checksums.txt" -o "${{ runner.temp }}/checksums.txt"
        fi

        # Verify SHA-256 checksum
        # NOTE: This verifies integrity (download wasn't corrupted) but not
        # authenticity — both binary and checksums come from the same server.
        # For stronger guarantees, consider GPG signature verification.
        cd "${{ runner.temp }}"
        EXPECTED=$(grep "${BINARY}" checksums.txt | awk '{print $1}')
        ACTUAL=$(sha256sum review-bot | awk '{print $1}')

        if [ -z "$EXPECTED" ]; then
          echo "Error: no checksum found for ${BINARY}" >&2
          exit 1
        fi
        if [ "$EXPECTED" != "$ACTUAL" ]; then
          echo "Error: checksum mismatch!" >&2
          echo "  Expected: $EXPECTED" >&2
          echo "  Actual:   $ACTUAL" >&2
          exit 1
        fi

        chmod +x "${{ runner.temp }}/review-bot"
        echo "Installed review-bot ${VERSION} (checksum verified)"

    - name: Run review
      shell: bash
      env:
        GITEA_URL: ${{ inputs.gitea-url || github.server_url }}
        GITEA_REPO: ${{ inputs.repo || github.repository }}
        PR_NUMBER: ${{ inputs.pr-number || github.event.pull_request.number }}
        REVIEWER_TOKEN: ${{ inputs.reviewer-token }}
        REVIEWER_NAME: ${{ inputs.reviewer-name }}
        LLM_BASE_URL: ${{ inputs.llm-base-url }}
        LLM_API_KEY: ${{ inputs.llm-api-key }}
        LLM_MODEL: ${{ inputs.llm-model }}
        CONVENTIONS_FILE: ${{ inputs.conventions-file }}
        PATTERNS_REPO: ${{ inputs.patterns-repo }}
        PATTERNS_FILES: ${{ inputs.patterns-files }}
        LLM_TEMPERATURE: ${{ inputs.temperature }}
        LLM_TIMEOUT: ${{ inputs.timeout }}
        LLM_PROVIDER: ${{ inputs.llm-provider }}
        UPDATE_EXISTING: ${{ inputs.update-existing }}
        SYSTEM_PROMPT_FILE: ${{ inputs.system-prompt-file }}
        PERSONA: ${{ inputs.persona }}
        PERSONA_FILE: ${{ inputs.persona-file }}
        AICORE_CLIENT_ID: ${{ inputs.aicore-client-id }}
        AICORE_CLIENT_SECRET: ${{ inputs.aicore-client-secret }}
        AICORE_AUTH_URL: ${{ inputs.aicore-auth-url }}
        AICORE_API_URL: ${{ inputs.aicore-api-url }}
        AICORE_RESOURCE_GROUP: ${{ inputs.aicore-resource-group }}
      run: |
        ARGS=""
        if [ "${{ inputs.dry-run }}" = "true" ]; then
          ARGS="--dry-run"
        fi
        ${{ runner.temp }}/review-bot $ARGS