Update CI workflow for comprehensive testing and benchmarking

Enhance the GitHub Actions CI workflow by introducing a comprehensive test job that runs a new script (`run_all_tests.sh`) for building, testing, and benchmarking. Update dependencies to use the latest actions and ensure consistent environment variables. Remove the previous build-and-test and benchmark jobs, consolidating functionality for improved clarity and efficiency.
2025-09-09 15:48:43 +00:00 · 2025-05-19 23:11:44 -04:00
parent 0c40bdac84
commit f8f890c29a
2 changed files with 78 additions and 58 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,3 +1,6 @@
 # .github/workflows/ci.yml
 # This is the full GitHub Actions workflow file.
 name: CI
 on:
@@ -6,88 +9,105 @@ on:
  pull_request:
    branches: [ main ]
 env:
  CARGO_TERM_COLOR: always
  CARGO_TARGET_DIR: ${{ github.workspace }}/target # Consistent target dir
  RUST_BACKTRACE: 1
 jobs:
-  build-and-test:
+  # This job will now run your comprehensive script
-    name: Build & Test
+  comprehensive-tests:
    name: Comprehensive Tests & Benchmarks
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4 # Updated to v4
      - name: Install Rust (stable)
-        uses: actions-rs/toolchain@v1
+        uses: actions/setup-rust@v1 # Using the official setup-rust action
        with:
          toolchain: stable
-          override: true
+          # Optional: enable caching
          # cache: true
-      - name: Build (release)
+      - name: Install system prerequisites for tests and benchmarks
-        run: cargo build --workspace --release
+        run: |
          sudo apt-get update
          sudo apt-get install -y hyperfine jq bc # For benchmarks within run_all_tests.sh
-      - name: Run tests
+      - name: Ensure run_all_tests.sh is executable
-        run: cargo test --all -- --nocapture
+        run: chmod +x ./run_all_tests.sh
      - name: Run Comprehensive Test Script
        # This script should now handle:
        # - Building and installing Marlin
        # - Running cargo test --all
        # - Generating test corpus
        # - Running bench/dirty-vs-full.sh (produces dirty-vs-full.md)
        # - (Optional) Running the cold-start benchmark (produces perf.json or similar)
        # - Running the demo flow tests
        run: ./run_all_tests.sh
      - name: Upload Dirty vs Full Benchmark Report
        uses: actions/upload-artifact@v4
        with:
          name: marlin-dirty-vs-full-benchmark-report
          path: bench/dirty-vs-full.md
          if-no-files-found: warn # Don't fail if the file isn't there, just warn
          retention-days: 7
      # If your run_all_tests.sh now also creates perf.json for cold-start
      - name: Upload Cold Start Benchmark JSON (if generated by script)
        uses: actions/upload-artifact@v4
        if: ${{ success() }} # Only if previous steps succeed
        with:
          name: marlin-cold-start-perf-json
          path: perf.json # Assuming run_all_tests.sh now creates this
          if-no-files-found: ignore # Okay if this specific file isn't generated
          retention-days: 7
  coverage:
    name: Code Coverage (Tarpaulin)
-    needs: build-and-test
+    needs: comprehensive-tests # Run after the main tests
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+      - name: Checkout code
        uses: actions/checkout@v4
-      - name: Install Rust (nightly)
+      - name: Install Rust (nightly for Tarpaulin, if needed)
-        uses: actions-rs/toolchain@v1
+        uses: actions/setup-rust@v1
        with:
-          toolchain: nightly
+          toolchain: nightly # Or stable if Tarpaulin works well with it for your project
          override: true
          components: llvm-tools-preview # Component needed by Tarpaulin
-      - name: Install system prerequisites
+      - name: Install system prerequisites for Tarpaulin
        run: |
          sudo apt-get update
-          sudo apt-get install -y pkg-config libssl-dev
+          sudo apt-get install -y pkg-config libssl-dev # Keep if your build needs them
      - name: Add llvm-tools (for tarpaulin)
        run: rustup component add llvm-tools-preview
      - name: Install cargo-tarpaulin
        run: cargo install cargo-tarpaulin
      - name: Code Coverage (libmarlin only)
-        run: cargo +nightly tarpaulin --package libmarlin --out Xml --fail-under 85
+        # MARLIN_DB_PATH should be unset or point to a temp location for isolated test runs
        run: |
          unset MARLIN_DB_PATH
          cargo +nightly tarpaulin --package libmarlin --out Html --out Xml --fail-under 85
        continue-on-error: true # So the workflow doesn't fail if coverage is low during development
-  benchmark:
+      - name: Upload HTML Coverage Report
-    name: Performance Benchmark (Hyperfine)
+        uses: actions/upload-artifact@v4
    needs: build-and-test
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v3
      - name: Install Rust (stable)
        uses: actions-rs/toolchain@v1
        with:
-          toolchain: stable
+          name: marlin-coverage-report-html
-          override: true
+          path: tarpaulin-report.html
          if-no-files-found: warn
          retention-days: 7
-      - name: Install benchmarking tools
+      - name: Upload XML Coverage Report (for services like Codecov)
-        run: |
+        uses: actions/upload-artifact@v4
-          sudo apt-get update
+        with:
-          sudo apt-get install -y hyperfine jq bc
+          name: marlin-coverage-report-xml
-
+          path: cobertura.xml # Default XML output name for Tarpaulin
-      - name: Build release binary
+          if-no-files-found: warn
-        run: cargo build --release
+          retention-days: 7
      - name: Run cold-start benchmark
        run: |
          # measure cold start init latency
          hyperfine \
            --warmup 3 \
            --export-json perf.json \
            'target/release/marlin init'
      - name: Enforce P95 ≤ 3s
        run: |
          p95=$(jq '.results[0].percentiles["95.00"]' perf.json)
          echo "P95 init latency: ${p95}s"
          if (( $(echo "$p95 > 3.0" | bc -l) )); then
            echo "::error ::Performance threshold exceeded (P95 > 3.0s)"  
            exit 1
          fi
--- a/tarpaulin-report.html
+++ b/tarpaulin-report.html