diff --git a/.gitea/actions/review/action.yml b/.gitea/actions/review/action.yml index f0bd53e..12bdfdb 100644 --- a/.gitea/actions/review/action.yml +++ b/.gitea/actions/review/action.yml @@ -1,17 +1,37 @@ -# This composite action is designed for Gitea Actions runners. -# Gitea Actions supports GitHub Actions syntax including $GITHUB_OUTPUT, -# actions/cache, and actions/checkout. +# This composite action supports both Gitea Actions and GitHub Actions runners. +# It detects the VCS host type by checking whether github.api_url is set +# (present on GitHub.com and GHES runners, absent on Gitea runners) and uses +# the appropriate releases API for version resolution and binary download +# (REST API on GitHub, direct URLs on Gitea). +# +# Security notes: +# - On GitHub/GHES (VCS_TYPE=github), inputs.vcs-url is IGNORED to prevent +# token exfiltration. API calls use github.api_url; downloads use +# github.server_url. Tokens are never sent to user-supplied URLs. +# - On Gitea (VCS_TYPE=gitea), inputs.vcs-url is validated (https scheme, +# no whitespace/newlines) before use. +# - action-repo is validated against owner/repo pattern. +# - Tokens are passed via masked environment variables, not step outputs. +# # Requirements: python3, sha256sum, curl (all present on ubuntu-* runners). name: 'AI Code Review' description: 'Run AI-powered code review on a pull request using review-bot' inputs: vcs-url: - description: 'VCS server URL (defaults to server_url)' + description: 'VCS server URL (only used on Gitea runners; ignored on GitHub/GHES). Defaults to server_url.' required: false default: '' repo: - description: 'Repository (owner/name, defaults to current)' + description: 'Repository to review (owner/name, defaults to current)' + required: false + default: '' + action-repo: + description: 'Repository hosting review-bot releases (owner/name). Defaults to github.action_repository or rodin/review-bot.' + required: false + default: '' + action-repo-token: + description: 'Token for downloading release assets from action-repo (defaults to github.token on GitHub, reviewer-token on Gitea). Required for private repos.' required: false default: '' pr-number: @@ -19,7 +39,7 @@ inputs: required: false default: '' reviewer-token: - description: 'Gitea token for posting the review' + description: 'Token for posting the review' required: true reviewer-name: description: 'Display name for the reviewer' @@ -112,19 +132,120 @@ runs: id: version shell: bash run: | - BASE_URL="${{ inputs.vcs-url || github.server_url }}" - REPO="${{ inputs.repo || 'rodin/review-bot' }}" + set -euo pipefail + + # --- Input Validation --- + + # Determine the repo hosting review-bot releases (not the repo being reviewed) + ACTION_REPO="${{ inputs.action-repo }}" + if [ -z "$ACTION_REPO" ]; then + # github.action_repository is the repo containing the running action + ACTION_REPO="${{ github.action_repository }}" + fi + if [ -z "$ACTION_REPO" ]; then + # Final fallback for Gitea (which may not set action_repository) + ACTION_REPO="rodin/review-bot" + echo "::notice::action-repo not specified and github.action_repository is empty; falling back to rodin/review-bot" + fi + + # Validate ACTION_REPO matches owner/repo pattern (prevent path traversal) + if ! printf '%s' "$ACTION_REPO" | grep -qE '^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$'; then + echo "Error: action-repo '${ACTION_REPO}' does not match expected owner/repo format" >&2 + exit 1 + fi + + # Detect VCS host type using github.api_url context. + # github.api_url is set on GitHub.com (https://api.github.com) and GHES + # (https:///api/v3). It is empty/unset on Gitea Actions runners. + GITHUB_API_URL="${{ github.api_url }}" + if [ -n "$GITHUB_API_URL" ]; then + VCS_TYPE="github" + else + VCS_TYPE="gitea" + fi + + # Determine SERVER_URL based on VCS type. + # SECURITY: On GitHub/GHES, ALWAYS use github.server_url — never trust + # inputs.vcs-url to prevent token exfiltration to attacker-controlled hosts. + if [ "$VCS_TYPE" = "github" ]; then + SERVER_URL="${{ github.server_url }}" + if [ -n "${{ inputs.vcs-url }}" ]; then + echo "::warning::inputs.vcs-url is ignored on GitHub/GHES runners (VCS_TYPE=github). Using github.server_url instead." + fi + else + SERVER_URL="${{ inputs.vcs-url || github.server_url }}" + fi + # Strip trailing slash if present + SERVER_URL="${SERVER_URL%/}" + + # Validate SERVER_URL for Gitea path: must be https, no whitespace/newlines. + # The [^[:space:]] class already rejects newlines, so no separate newline check needed. + if [ "$VCS_TYPE" = "gitea" ]; then + if ! printf '%s' "$SERVER_URL" | grep -qE '^https://[^[:space:]]+$'; then + echo "Error: SERVER_URL '${SERVER_URL}' must be an https:// URL with no whitespace" >&2 + exit 1 + fi + fi + + # Determine auth token for release API requests + ACTION_TOKEN="${{ inputs.action-repo-token }}" + if [ -z "$ACTION_TOKEN" ]; then + if [ "$VCS_TYPE" = "github" ]; then + ACTION_TOKEN="${{ github.token }}" + else + ACTION_TOKEN="${{ inputs.reviewer-token }}" + fi + fi + + # Validate token contains no control characters (defense-in-depth against header injection) + if [ -n "$ACTION_TOKEN" ]; then + if printf '%s' "$ACTION_TOKEN" | LC_ALL=C grep -q '[^[:print:]]'; then + echo "Error: ACTION_TOKEN contains control characters" >&2 + exit 1 + fi + fi + if [ "${{ inputs.version }}" = "latest" ]; then - VERSION=$(curl -sSf "${BASE_URL}/api/v1/repos/${REPO}/releases?limit=1" \ - | python3 -c "import sys, json; releases = json.load(sys.stdin); print(releases[0]['tag_name'] if releases else '')") + if [ "$VCS_TYPE" = "github" ]; then + # SECURITY: Use github.api_url which is a trusted platform-provided value. + # Never construct API URLs from user-supplied inputs on GitHub. + API_URL="${GITHUB_API_URL}/repos/${ACTION_REPO}/releases?per_page=1" + else + # Gitea API — SERVER_URL was validated above + API_URL="${SERVER_URL}/api/v1/repos/${ACTION_REPO}/releases?limit=1" + fi + + # Fetch latest version with inline auth header (no intermediate variable) + if [ -n "$ACTION_TOKEN" ]; then + if [ "$VCS_TYPE" = "github" ]; then + VERSION=$(curl -sSf --connect-timeout 10 --max-time 30 \ + -H "Authorization: Bearer ${ACTION_TOKEN}" "$API_URL" \ + | python3 -c "import sys, json; releases = json.load(sys.stdin); print(releases[0]['tag_name'] if releases else '')") + else + VERSION=$(curl -sSf --connect-timeout 10 --max-time 30 \ + -H "Authorization: token ${ACTION_TOKEN}" "$API_URL" \ + | python3 -c "import sys, json; releases = json.load(sys.stdin); print(releases[0]['tag_name'] if releases else '')") + fi + else + VERSION=$(curl -sSf --connect-timeout 10 --max-time 30 "$API_URL" \ + | python3 -c "import sys, json; releases = json.load(sys.stdin); print(releases[0]['tag_name'] if releases else '')") + fi + if [ -z "$VERSION" ]; then - echo "Failed to determine latest version" >&2 + echo "Failed to determine latest version from ${API_URL}" >&2 exit 1 fi else VERSION="${{ inputs.version }}" fi + # Validate VERSION: no slashes or whitespace (prevent path traversal). + # [:space:] includes newlines and carriage returns in POSIX. + if printf '%s' "$VERSION" | grep -qE '[/[:space:]]'; then + echo "Error: VERSION '${VERSION}' contains invalid characters (newline, slash, or whitespace)" >&2 + exit 1 + fi + # Detect OS and architecture for platform-specific binary download OS_RAW=$(uname -s | tr '[:upper:]' '[:lower:]') case "$OS_RAW" in @@ -149,6 +270,16 @@ runs: echo "version=${VERSION}" >> "$GITHUB_OUTPUT" echo "os=${OS}" >> "$GITHUB_OUTPUT" echo "arch=${ARCH}" >> "$GITHUB_OUTPUT" + echo "action_repo=${ACTION_REPO}" >> "$GITHUB_OUTPUT" + echo "server_url=${SERVER_URL}" >> "$GITHUB_OUTPUT" + echo "vcs_type=${VCS_TYPE}" >> "$GITHUB_OUTPUT" + + # SECURITY: Pass token via masked environment variable instead of step output. + # Step outputs can leak in debug logs; GITHUB_ENV with masking is safer. + if [ -n "$ACTION_TOKEN" ]; then + echo "::add-mask::${ACTION_TOKEN}" + echo "ACTION_TOKEN=${ACTION_TOKEN}" >> "$GITHUB_ENV" + fi - name: Cache review-bot binary id: cache @@ -161,21 +292,101 @@ runs: if: steps.cache.outputs.cache-hit != 'true' shell: bash run: | - BASE_URL="${{ inputs.vcs-url || github.server_url }}" - REPO="${{ inputs.repo || 'rodin/review-bot' }}" - VERSION="${{ steps.version.outputs.version }}" - BINARY="review-bot-${{ steps.version.outputs.os }}-${{ steps.version.outputs.arch }}" + set -euo pipefail - curl -sSfL "${BASE_URL}/${REPO}/releases/download/${VERSION}/${BINARY}" \ - -o "${{ runner.temp }}/review-bot" - curl -sSfL "${BASE_URL}/${REPO}/releases/download/${VERSION}/checksums.txt" \ - -o "${{ runner.temp }}/checksums.txt" + SERVER_URL="${{ steps.version.outputs.server_url }}" + ACTION_REPO="${{ steps.version.outputs.action_repo }}" + VERSION="${{ steps.version.outputs.version }}" + VCS_TYPE="${{ steps.version.outputs.vcs_type }}" + OS="${{ steps.version.outputs.os }}" + ARCH="${{ steps.version.outputs.arch }}" + # Read token from masked environment variable (set in Determine version step) + # Falls back to empty if not set (public repos don't need auth) + ACTION_TOKEN="${ACTION_TOKEN:-}" + BINARY="review-bot-${OS}-${ARCH}" + + if [ "$VCS_TYPE" = "github" ]; then + # GitHub/GHES: Use REST API for release asset downloads. + # Web release URLs ({server}/.../releases/download/{tag}/{asset}) redirect + # to S3 and don't reliably support Authorization headers for private repos. + # The REST API endpoint with Accept: application/octet-stream is required. + # GITHUB_API_URL: trusted platform value, same as detected in "Determine version" step. + GITHUB_API_URL="${{ github.api_url }}" + + if [ -n "$ACTION_TOKEN" ]; then + RELEASE_JSON=$(curl -sSf --connect-timeout 10 --max-time 30 \ + -H "Authorization: Bearer ${ACTION_TOKEN}" \ + "${GITHUB_API_URL}/repos/${ACTION_REPO}/releases/tags/${VERSION}") + else + RELEASE_JSON=$(curl -sSf --connect-timeout 10 --max-time 30 \ + "${GITHUB_API_URL}/repos/${ACTION_REPO}/releases/tags/${VERSION}") + fi + + # Extract asset IDs for binary and checksums + BINARY_ASSET_ID=$(printf '%s' "$RELEASE_JSON" | python3 -c "import sys, json; assets = json.load(sys.stdin).get('assets', []); matches = [a['id'] for a in assets if a['name'] == '${BINARY}']; print(matches[0] if matches else '')") + if [ -z "$BINARY_ASSET_ID" ]; then + echo "Error: could not find asset '${BINARY}' in release ${VERSION}" >&2 + exit 1 + fi + + CHECKSUMS_ASSET_ID=$(printf '%s' "$RELEASE_JSON" | python3 -c "import sys, json; assets = json.load(sys.stdin).get('assets', []); matches = [a['id'] for a in assets if a['name'] == 'checksums.txt']; print(matches[0] if matches else '')") + if [ -z "$CHECKSUMS_ASSET_ID" ]; then + echo "Error: could not find asset 'checksums.txt' in release ${VERSION}" >&2 + exit 1 + fi + + # Download assets via REST API with Accept: application/octet-stream + if [ -n "$ACTION_TOKEN" ]; then + curl -sSfL --connect-timeout 10 --max-time 120 \ + -H "Authorization: Bearer ${ACTION_TOKEN}" \ + -H "Accept: application/octet-stream" \ + "${GITHUB_API_URL}/repos/${ACTION_REPO}/releases/assets/${BINARY_ASSET_ID}" \ + -o "${{ runner.temp }}/review-bot" + curl -sSfL --connect-timeout 10 --max-time 30 \ + -H "Authorization: Bearer ${ACTION_TOKEN}" \ + -H "Accept: application/octet-stream" \ + "${GITHUB_API_URL}/repos/${ACTION_REPO}/releases/assets/${CHECKSUMS_ASSET_ID}" \ + -o "${{ runner.temp }}/checksums.txt" + else + curl -sSfL --connect-timeout 10 --max-time 120 \ + -H "Accept: application/octet-stream" \ + "${GITHUB_API_URL}/repos/${ACTION_REPO}/releases/assets/${BINARY_ASSET_ID}" \ + -o "${{ runner.temp }}/review-bot" + curl -sSfL --connect-timeout 10 --max-time 30 \ + -H "Accept: application/octet-stream" \ + "${GITHUB_API_URL}/repos/${ACTION_REPO}/releases/assets/${CHECKSUMS_ASSET_ID}" \ + -o "${{ runner.temp }}/checksums.txt" + fi + else + # Gitea: Direct download via web release URLs (Gitea serves assets + # directly without redirects — no -L needed). + # SECURITY: Omitting -L prevents forwarding Authorization header to + # unexpected hosts if Gitea ever introduces CDN redirects. + DOWNLOAD_URL="${SERVER_URL}/${ACTION_REPO}/releases/download/${VERSION}" + + if [ -n "$ACTION_TOKEN" ]; then + curl -sSf --connect-timeout 10 --max-time 120 \ + -H "Authorization: token ${ACTION_TOKEN}" \ + "${DOWNLOAD_URL}/${BINARY}" -o "${{ runner.temp }}/review-bot" + curl -sSf --connect-timeout 10 --max-time 30 \ + -H "Authorization: token ${ACTION_TOKEN}" \ + "${DOWNLOAD_URL}/checksums.txt" -o "${{ runner.temp }}/checksums.txt" + else + curl -sSf --connect-timeout 10 --max-time 120 \ + "${DOWNLOAD_URL}/${BINARY}" -o "${{ runner.temp }}/review-bot" + curl -sSf --connect-timeout 10 --max-time 30 \ + "${DOWNLOAD_URL}/checksums.txt" -o "${{ runner.temp }}/checksums.txt" + fi + fi # Verify SHA-256 checksum + # NOTE: This verifies integrity (download wasn't corrupted) but not + # authenticity — both binary and checksums come from the same server. + # For stronger guarantees, consider GPG signature verification. cd "${{ runner.temp }}" - EXPECTED=$(grep -E "^[[:xdigit:]]+[[:space:]]+\*?${BINARY}$" checksums.txt | awk '{print $1}') + EXPECTED=$(grep -E "^[0-9a-f]+[[:space:]]+\*?${BINARY}$" checksums.txt | awk '{print $1}') # sha256sum (GNU) is not available on macOS; use shasum -a 256 on darwin. - if [ "${{ steps.version.outputs.os }}" = "darwin" ]; then + if [ "${OS}" = "darwin" ]; then ACTUAL=$(shasum -a 256 review-bot | awk '{print $1}') else ACTUAL=$(sha256sum review-bot | awk '{print $1}') @@ -193,12 +404,12 @@ runs: fi chmod +x "${{ runner.temp }}/review-bot" - echo "Installed review-bot-${{ steps.version.outputs.os }}-${{ steps.version.outputs.arch }} ${VERSION} (checksum verified)" + echo "Installed review-bot-${OS}-${ARCH} ${VERSION} (checksum verified)" - name: Run review shell: bash env: - VCS_URL: ${{ inputs.vcs-url || github.server_url }} + VCS_URL: ${{ steps.version.outputs.server_url }} GITEA_REPO: ${{ inputs.repo || github.repository }} PR_NUMBER: ${{ inputs.pr-number || github.event.pull_request.number }} REVIEWER_TOKEN: ${{ inputs.reviewer-token }}