Feat/added eval run after deploy workflow

2025-08-12 06:59:03 +08:00 · 2025-02-20 15:01:26 -03:00 · 2025-02-20 15:01:26 -03:00 · d7db58e477
commit d7db58e477
parent e417f83c28
2 changed files with 70 additions and 0 deletions
--- a/.github/scripts/eval_run.py
+++ b/.github/scripts/eval_run.py
@ -0,0 +1,37 @@
 import requests
 import argparse
 import sys
 def main():
    parser = argparse.ArgumentParser(description='Run evaluation benchmark')
    parser.add_argument('--label', required=True, help='Label for the evaluation run')
    parser.add_argument('--api-url', required=True, help='API URL')
    parser.add_argument('--api-key', required=True, help='API key')
    parser.add_argument('--experiment-id', required=True, help='Experiment ID')
    args = parser.parse_args()
    try:
        response = requests.post(
            f"{args.api_url}/run",
            json={
                "experiment_id": args.experiment_id,
                "api_key": args.api_key,
                "label": args.label
            },
            headers={
                "Content-Type": "application/json"
            }
        )
        response.raise_for_status()
        print("Evaluation run started successfully")
        print(f"Response: {response.json()}")
    except requests.exceptions.RequestException as e:
        print(f"Error running evaluation: {str(e)}", file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/.github/workflows/eval-prod.yml
+++ b/.github/workflows/eval-prod.yml
@ -0,0 +1,33 @@
 name: Run Eval Benchmark Prod
 env:
  EVAL_API_URL: ${{ secrets.EVAL_API_URL }}
  EVAL_API_KEY: ${{ secrets.EVAL_API_KEY }}
  EVAL_EXPERIMENT_ID: ${{ secrets.EVAL_BENCHMARK_EXPERIMENT_ID }}
 on:
  workflow_run:
    workflows: ["Deploy Images to GHCR"]
    types:
      - completed
    branches:
      - main
 jobs:
  run-eval-benchmark-prod:
        runs-on: ubuntu-latest
        if: ${{ github.event.workflow_run.conclusion == 'success' }}
        steps:
          - name: 'Install dependencies'
            run: |
              python -m pip install --upgrade pip
              pip install requests
          # make sure the image is deployed before running the eval benchmark
          - name: Wait for 2 minutes
            run: sleep 120
          - name: 'Run Eval Benchmark Prod'
            run: |
              python .github/scripts/eval_run.py --label prod.${{ github.sha }} --api-url ${{ env.EVAL_API_URL }} --api-key ${{ env.EVAL_API_KEY }} --experiment-id ${{ env.EVAL_EXPERIMENT_ID }}