diff --git a/.github/scripts/track-en-changes.sh b/.github/scripts/track-en-changes.sh new file mode 100644 index 000000000..61bc9b803 --- /dev/null +++ b/.github/scripts/track-en-changes.sh @@ -0,0 +1,192 @@ +#!/usr/bin/env bash +# +# Track changes in php/doc-en and create issues in doc-de +# for files that need translation updates. +# +# How it works: +# - Fetches all commits on doc-en master from the last 7 days +# - For each commit, checks if a matching issue already exists in doc-de +# - If not, creates one listing the DE files to update +# +# Why 7 days: the action runs daily, so 7 days gives us 6 days of margin. +# Even if the action fails for a whole week, nothing is missed. +# Duplicates are prevented by searching for the commit SHA (or associated PR +# number for backward compat) in existing issue titles. +# +set -euo pipefail + +# Ensure the sync-en label exists (idempotent; ignored if already present). +gh label create sync-en --color "0e8a16" --description "Sync from upstream doc-en" 2>/dev/null || true + +SINCE=$(date -u -d '7 days ago' '+%Y-%m-%dT%H:%M:%SZ') +echo "Checking doc-en commits on master since $SINCE" + +# Fetch all commits on master from the last 7 days (paginate if needed) +PAGE=1 +ALL_SHAS="" +while :; do + BATCH=$(gh api "repos/php/doc-en/commits?sha=master&since=$SINCE&per_page=100&page=$PAGE" \ + --jq '.[].sha' 2>/dev/null || true) + COUNT=$(echo "$BATCH" | grep -c . 2>/dev/null || echo "0") + if [ -n "$BATCH" ]; then + ALL_SHAS="$ALL_SHAS"$'\n'"$BATCH" + fi + echo " Page $PAGE: $COUNT commit(s)" + if [ "$COUNT" -lt 100 ]; then + break + fi + PAGE=$((PAGE + 1)) +done + +echo "$ALL_SHAS" | sed '/^$/d' > /tmp/commits.txt +TOTAL=$(wc -l < /tmp/commits.txt) +echo "Total: $TOTAL commit(s) on master in the last 7 days" + +if [ "$TOTAL" -eq 0 ]; then + echo "Nothing to do." + exit 0 +fi + +CREATED=0 +SKIPPED=0 +FAILED=0 + +while read -r SHA; do + [ -z "$SHA" ] && continue + + SHORT_SHA=${SHA:0:7} + + # Get commit info (single API call). A transient failure here must skip this + # commit, not abort the whole run (set -e would otherwise exit on the failing + # command substitution): the commit is picked up on the next run. + COMMIT_DATA=$(gh api "repos/php/doc-en/commits/$SHA" \ + --jq '{msg: (.commit.message | split("\n")[0]), date: .commit.author.date, author: (.author.login // .commit.author.name), files: [.files[].filename]}' 2>/dev/null || true) + if [ -z "$COMMIT_DATA" ]; then + echo "" + echo "Commit $SHORT_SHA: metadata fetch failed, will retry on the next run." + FAILED=$((FAILED + 1)) + continue + fi + COMMIT_MSG=$(echo "$COMMIT_DATA" | jq -r '.msg') + COMMIT_DATE=$(echo "$COMMIT_DATA" | jq -r '.date' | cut -dT -f1) + COMMIT_AUTHOR=$(echo "$COMMIT_DATA" | jq -r '.author') + + echo "" + echo "Commit $SHORT_SHA by $COMMIT_AUTHOR: $COMMIT_MSG" + + # Skip commits marked with [skip-revcheck] + if echo "$COMMIT_MSG" | grep -qi '\[skip-revcheck\]'; then + echo " -> [skip-revcheck], skipping." + SKIPPED=$((SKIPPED + 1)) + continue + fi + + # Deduplication: search for the full commit SHA in existing issues. + # If the lookup itself fails, skip this commit rather than create a possible + # duplicate on unknown state (and rather than abort the whole run). + EXISTING=$(gh issue list --repo "$GH_REPO" --search "\"$SHA\"" \ + --state all --json number --jq 'length' 2>/dev/null || true) + if ! [[ "$EXISTING" =~ ^[0-9]+$ ]]; then + echo " -> dedup lookup failed for $SHORT_SHA, will retry on the next run." + FAILED=$((FAILED + 1)) + continue + fi + if [ "$EXISTING" -gt 0 ]; then + echo " -> Issue already exists (by SHA), skipping." + SKIPPED=$((SKIPPED + 1)) + continue + fi + + # Backward compat: if commit is associated with a PR, also check by PR number + PR_NUMBER=$(gh api "repos/php/doc-en/commits/$SHA/pulls" \ + --jq '.[0].number // empty' 2>/dev/null || true) + if [ -n "$PR_NUMBER" ]; then + EXISTING=$(gh issue list --repo "$GH_REPO" --search "\"doc-en/pull/$PR_NUMBER\"" \ + --state all --json number --jq 'length' 2>/dev/null || true) + if ! [[ "$EXISTING" =~ ^[0-9]+$ ]]; then + echo " -> dedup lookup failed for $SHORT_SHA, will retry on the next run." + FAILED=$((FAILED + 1)) + continue + fi + if [ "$EXISTING" -gt 0 ]; then + echo " -> Issue already exists (by PR #$PR_NUMBER), skipping." + SKIPPED=$((SKIPPED + 1)) + continue + fi + fi + + # Get files changed in this commit + FILES=$(echo "$COMMIT_DATA" | jq -r '.files[]') + + if [ -z "$FILES" ]; then + echo " -> No files found, skipping." + continue + fi + + # Categorize files + UPDATE_LIST="" + NEW_LIST="" + + while IFS= read -r FILE; do + if [[ "$FILE" == */versions.xml ]]; then + continue + fi + if [ -f "$FILE" ]; then + UPDATE_LIST="${UPDATE_LIST}- \`${FILE}\`"$'\n' + elif [[ "$FILE" == *.xml ]]; then + NEW_LIST="${NEW_LIST}- \`${FILE}\`"$'\n' + fi + done <<< "$FILES" + + # Skip if no DE-relevant files + if [ -z "$UPDATE_LIST" ] && [ -z "$NEW_LIST" ]; then + echo " -> No DE-relevant files, skipping." + continue + fi + + # Build issue body (URLs in backticks to avoid crosslinks) + BODY="Commit: \`https://github.com/php/doc-en/commit/$SHA\`"$'\n' + if [ -n "$PR_NUMBER" ]; then + BODY+="PR: \`https://github.com/php/doc-en/pull/$PR_NUMBER\`"$'\n' + fi + + if [ -n "$UPDATE_LIST" ]; then + BODY+=$'\n'"**Zu aktualisierende DE-Dateien**"$'\n' + BODY+="$UPDATE_LIST" + fi + + if [ -n "$NEW_LIST" ]; then + BODY+=$'\n'"**Neue EN-Dateien (noch nicht übersetzt)**"$'\n' + BODY+="$NEW_LIST" + fi + + # Create the issue with a single REST POST. Using the API directly avoids the + # label-name pre-fetch that "gh issue create --label" performs before posting; + # that pre-fetch is the call that failed with a transient 401. The write is not + # retried in-process (a retry could create a duplicate if a POST succeeded + # server-side but reported an error): a single failure does not abort the run, + # and the commit is picked up on the next daily run thanks to the 7-day window + # and the SHA-based dedup. + ISSUE_TITLE="[Sync EN] $COMMIT_MSG" + if gh api "repos/$GH_REPO/issues" \ + --method POST \ + -f "title=$ISSUE_TITLE" \ + -f "body=$BODY" \ + -f "labels[]=sync-en" \ + --jq '.html_url'; then + echo " -> Issue created." + CREATED=$((CREATED + 1)) + else + echo " -> Issue creation failed for $SHORT_SHA, will retry on the next run." + FAILED=$((FAILED + 1)) + fi +done < /tmp/commits.txt + +echo "" +echo "Done. Created: $CREATED, Skipped: $SKIPPED, Failed: $FAILED" + +# Surface failures so the run is visibly red, while still having processed +# every commit (the failed ones are recreated on the next run). +if [ "$FAILED" -gt 0 ]; then + exit 1 +fi diff --git a/.github/workflows/track-en-changes.yml b/.github/workflows/track-en-changes.yml new file mode 100644 index 000000000..16c99457a --- /dev/null +++ b/.github/workflows/track-en-changes.yml @@ -0,0 +1,25 @@ +name: "Track doc-en changes" + +on: + schedule: + - cron: "0 6 * * *" + workflow_dispatch: + +permissions: + issues: write + contents: read + +jobs: + track: + name: "Track doc-en changes" + runs-on: "ubuntu-latest" + + steps: + - name: "Checkout doc-de" + uses: "actions/checkout@v4" + + - name: "Check doc-en merges and create issues" + env: + GH_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + GH_REPO: "${{ github.repository }}" + run: bash .github/scripts/track-en-changes.sh