diff --git a/.github/workflows/sqlancer-sqlite-fuzz.yml b/.github/workflows/sqlancer-sqlite-fuzz.yml new file mode 100644 index 000000000..07209061d --- /dev/null +++ b/.github/workflows/sqlancer-sqlite-fuzz.yml @@ -0,0 +1,241 @@ +name: SQLancer SQLite Fuzz + +on: + push: + branches: + - codex/sqlancer-sqlite-fuzz + schedule: + # Bounded recurring fuzzing. GitHub Actions cannot run forever, so this + # workflow rotates deterministic seeds/oracles on a schedule. + - cron: '17 */6 * * *' + workflow_dispatch: + inputs: + oracle: + description: 'SQLancer MySQL oracle to run, or auto to rotate by run number.' + required: false + default: 'auto' + type: choice + options: + - auto + - FUZZER + - TLP_WHERE + - PQS + - DQP + - DQE + seed: + description: 'Optional deterministic SQLancer seed.' + required: false + default: '' + num_queries: + description: 'Optional query count. Defaults are chosen per oracle.' + required: false + default: '' + max_generated_databases: + description: 'Maximum generated databases.' + required: false + default: '1' + append_findings: + description: 'Append replay failures to the SQLancer findings issue.' + required: false + default: true + type: boolean + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: {} + +jobs: + fuzz: + name: SQLancer ${{ github.event_name == 'workflow_dispatch' && inputs.oracle || 'auto' }} + runs-on: ubuntu-latest + timeout-minutes: 120 + permissions: + contents: read + issues: write # Required to create/update the findings issue. + + env: + FINDINGS_ISSUE_TITLE: SQLancer SQLite replay findings + FINDINGS_LIMIT: 1000 + INPUT_ORACLE: ${{ github.event_name == 'workflow_dispatch' && inputs.oracle || 'auto' }} + INPUT_SEED: ${{ github.event_name == 'workflow_dispatch' && inputs.seed || '' }} + INPUT_NUM_QUERIES: ${{ github.event_name == 'workflow_dispatch' && inputs.num_queries || '' }} + INPUT_MAX_GENERATED_DATABASES: ${{ github.event_name == 'workflow_dispatch' && inputs.max_generated_databases || '1' }} + INPUT_APPEND_FINDINGS: ${{ github.event_name != 'workflow_dispatch' || inputs.append_findings }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up PHP + uses: shivammathur/setup-php@v2 + with: + php-version: '8.3' + coverage: none + + - name: Install Composer dependencies (root) + uses: ramsey/composer-install@v3 + with: + ignore-cache: 'yes' + composer-options: '--optimize-autoloader' + + - name: Install Composer dependencies (mysql-on-sqlite) + uses: ramsey/composer-install@v3 + with: + working-directory: packages/mysql-on-sqlite + ignore-cache: 'yes' + composer-options: '--optimize-autoloader' + + - name: Choose fuzz settings + id: settings + run: | + set -euo pipefail + + ORACLES=( FUZZER TLP_WHERE PQS DQP DQE ) + DEFAULT_QUERIES=( 10000 3000 1000 300 300 ) + + ORACLE="$INPUT_ORACLE" + if [ -z "$ORACLE" ] || [ "$ORACLE" = 'auto' ]; then + INDEX=$(( GITHUB_RUN_NUMBER % ${#ORACLES[@]} )) + ORACLE="${ORACLES[$INDEX]}" + fi + + NUM_QUERIES="$INPUT_NUM_QUERIES" + if [ -z "$NUM_QUERIES" ]; then + NUM_QUERIES=1000 + for i in "${!ORACLES[@]}"; do + if [ "${ORACLES[$i]}" = "$ORACLE" ]; then + NUM_QUERIES="${DEFAULT_QUERIES[$i]}" + break + fi + done + fi + + SEED="$INPUT_SEED" + if [ -z "$SEED" ]; then + SEED=$(( 20270000 + GITHUB_RUN_NUMBER )) + fi + + MAX_GENERATED_DATABASES="$INPUT_MAX_GENERATED_DATABASES" + if [ -z "$MAX_GENERATED_DATABASES" ]; then + MAX_GENERATED_DATABASES=1 + fi + + RUNNER_OUTPUT_DIR="$RUNNER_TEMP/sqlancer-output-$ORACLE-$SEED" + + { + echo "oracle=$ORACLE" + echo "seed=$SEED" + echo "num_queries=$NUM_QUERIES" + echo "max_generated_databases=$MAX_GENERATED_DATABASES" + echo "runner_output_dir=$RUNNER_OUTPUT_DIR" + } >> "$GITHUB_OUTPUT" + + { + echo "SQLANCER_MYSQL_ORACLE=$ORACLE" + echo "RANDOM_SEED=$SEED" + echo "NUM_QUERIES=$NUM_QUERIES" + echo "MAX_GENERATED_DATABASES=$MAX_GENERATED_DATABASES" + echo "ARTIFACTS_DIR=$RUNNER_OUTPUT_DIR" + echo "RUNNER_OUTPUT_DIR=$RUNNER_OUTPUT_DIR" + } >> "$GITHUB_ENV" + + - name: Run SQLancer replay + id: fuzz + continue-on-error: true + run: | + set +e + mkdir -p "$RUNNER_OUTPUT_DIR" + ./bin/run-sqlancer-sqlite-fuzz.sh > "$RUNNER_OUTPUT_DIR/runner-output.txt" 2>&1 + STATUS=$? + cat "$RUNNER_OUTPUT_DIR/runner-output.txt" + echo "status=$STATUS" >> "$GITHUB_OUTPUT" + exit "$STATUS" + + - name: Detect replay failure + id: replay_failure + if: steps.fuzz.outputs.status != '0' + run: | + if grep -q '^FAIL line [0-9][0-9]*:' "$RUNNER_OUTPUT_DIR/runner-output.txt"; then + echo "found=true" >> "$GITHUB_OUTPUT" + else + echo "found=false" >> "$GITHUB_OUTPUT" + fi + + - name: Append replay failure to findings issue + if: steps.replay_failure.outputs.found == 'true' && env.INPUT_APPEND_FINDINGS == 'true' + env: + GH_TOKEN: ${{ github.token }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + + COMMENT_FILE="$RUNNER_TEMP/sqlancer-finding-comment.md" + MARKER_FILE="$RUNNER_TEMP/sqlancer-finding-marker.txt" + + php tests/fuzz/append-sqlancer-finding.php \ + --output="$RUNNER_OUTPUT_DIR/runner-output.txt" \ + --comment="$COMMENT_FILE" \ + --marker="$MARKER_FILE" \ + --oracle="$SQLANCER_MYSQL_ORACLE" \ + --seed="$RANDOM_SEED" \ + --num-queries="$NUM_QUERIES" \ + --max-generated-databases="$MAX_GENERATED_DATABASES" \ + --commit="$GITHUB_SHA" \ + --run-url="$RUN_URL" + + MARKER="$(cat "$MARKER_FILE")" + + ISSUE_NUMBER="$( + gh issue list \ + --state open \ + --search "$FINDINGS_ISSUE_TITLE in:title" \ + --json number,title \ + --jq ".[] | select(.title == \"$FINDINGS_ISSUE_TITLE\") | .number" \ + | head -n 1 + )" + + if [ -z "$ISSUE_NUMBER" ]; then + ISSUE_BODY="$RUNNER_TEMP/sqlancer-findings-issue.md" + cat > "$ISSUE_BODY" <<'EOF' + This issue is maintained by the SQLancer SQLite fuzz workflow. + + Each finding is stored as one issue comment with a hidden `sqlancer-finding` marker. The workflow skips duplicate markers and stops appending after 1000 finding comments. Reduce entries into `tests/e2e/specs/sqlancer-fuzz-regressions.test.js` before fixing them. + EOF + ISSUE_URL="$(gh issue create --title "$FINDINGS_ISSUE_TITLE" --body-file "$ISSUE_BODY")" + ISSUE_NUMBER="${ISSUE_URL##*/}" + fi + + EXISTING_MARKERS="$( + gh api --paginate "repos/$GITHUB_REPOSITORY/issues/$ISSUE_NUMBER/comments" \ + --jq '.[].body' \ + | grep -F '', $failure_hash ); + +$entry = sprintf( + "%s\n### %s seed %s line %s\n\n- Found: %s\n- Run: %s\n- Commit: `%s`\n- Settings: `SQLANCER_MYSQL_ORACLE=%s RANDOM_SEED=%s NUM_QUERIES=%s MAX_GENERATED_DATABASES=%s`\n\n```sql\n%s\n```\n\n```text\n%s\n```\n", + $marker, + $args['oracle'], + $args['seed'], + $failure_line, + gmdate( 'Y-m-d H:i:s \U\T\C' ), + $args['run-url'], + $args['commit'], + $args['oracle'], + $args['seed'], + $args['num-queries'], + $args['max-generated-databases'], + $failure_sql, + $exception +); + +if ( ! empty( $sqlite_lines ) ) { + $entry .= "\nTranslated SQLite replay SQL:\n\n```sql\n" . implode( "\n", $sqlite_lines ) . "\n```\n"; +} + +$comment_dir = dirname( $args['comment'] ); +if ( ! is_dir( $comment_dir ) ) { + mkdir( $comment_dir, 0777, true ); +} + +$marker_dir = dirname( $args['marker'] ); +if ( ! is_dir( $marker_dir ) ) { + mkdir( $marker_dir, 0777, true ); +} + +file_put_contents( $args['comment'], $entry ); +file_put_contents( $args['marker'], $marker . "\n" ); + +echo "Formatted SQLancer finding: $marker\n"; diff --git a/tests/fuzz/replay-sqlancer-log.php b/tests/fuzz/replay-sqlancer-log.php new file mode 100644 index 000000000..a2de108de --- /dev/null +++ b/tests/fuzz/replay-sqlancer-log.php @@ -0,0 +1,95 @@ + [--skip-line=N ...]\n" ); + exit( 1 ); +} + +require_once dirname( __DIR__, 2 ) . '/packages/mysql-on-sqlite/tests/bootstrap.php'; + +$pdo_class = PHP_VERSION_ID >= 80400 ? PDO\SQLite::class : PDO::class; +$pdo = new $pdo_class( 'sqlite::memory:' ); +$driver = new WP_SQLite_Driver( + new WP_SQLite_Connection( array( 'pdo' => $pdo ) ), + 'wp' +); + +$line_number = 0; +foreach ( file( $log_file, FILE_IGNORE_NEW_LINES ) as $line ) { + ++$line_number; + + $sql = sqlancer_log_line_to_sql( $line ); + if ( null === $sql ) { + continue; + } + + if ( + isset( $skip_lines[ $line_number ] ) + || preg_match( '/^(DROP DATABASE|CREATE DATABASE|USE)\b/i', $sql ) + ) { + printf( "SKIP line %d: %s\n", $line_number, $sql ); + continue; + } + + try { + $driver->query( $sql ); + printf( "OK line %d: %s\n", $line_number, $sql ); + } catch ( Throwable $e ) { + fprintf( STDERR, "FAIL line %d: %s\n", $line_number, $sql ); + fprintf( STDERR, "%s: %s\n", get_class( $e ), $e->getMessage() ); + foreach ( $driver->get_last_sqlite_queries() as $query ) { + fprintf( + STDERR, + " SQLITE: %s PARAMS=%s\n", + $query['sql'], + json_encode( $query['params'] ) + ); + } + exit( 1 ); + } +} + +echo "REPLAY_OK\n"; + +/** + * Extract SQL from a SQLancer log line. + * + * @param string $line Log line. + * @return string|null SQL statement, or null for metadata/blank lines. + */ +function sqlancer_log_line_to_sql( $line ) { + $line = trim( $line ); + if ( '' === $line || 0 === strpos( $line, '--' ) ) { + return null; + } + + return preg_replace( '/;\s*--\s*\d+ms;?$/', ';', $line ); +} diff --git a/tests/fuzz/sqlancer-findings.md b/tests/fuzz/sqlancer-findings.md new file mode 100644 index 000000000..52d93d06b --- /dev/null +++ b/tests/fuzz/sqlancer-findings.md @@ -0,0 +1,10 @@ +# SQLancer SQLite Findings + +The scheduled SQLancer SQLite fuzz workflow records new failures in a GitHub +issue named `SQLancer SQLite replay findings`. + +Each issue comment records one MySQL-accepted SQLancer statement that failed +when replayed through the SQLite driver. The workflow deduplicates comments by a +hidden finding hash and stops appending after 1000 findings. Reduce each finding, +move the reduced query into `tests/e2e/specs/sqlancer-fuzz-regressions.test.js` +and the package-level SQLancer test slice, then fix it in this PR.