Feat/added eval run after deploy workflow

2025-08-12 06:59:03 +08:00 · 2025-02-20 15:01:26 -03:00 · 2025-02-20 15:01:26 -03:00 · d7db58e477
commit d7db58e477
parent e417f83c28
2 changed files with 70 additions and 0 deletions
--- a/.github/scripts/eval_run.py
+++ b/.github/scripts/eval_run.py
@ -0,0 +1,37 @@
+import requests
+import argparse
+import sys
+
+def main():
+    parser = argparse.ArgumentParser(description='Run evaluation benchmark')
+    parser.add_argument('--label', required=True, help='Label for the evaluation run')
+    parser.add_argument('--api-url', required=True, help='API URL')
+    parser.add_argument('--api-key', required=True, help='API key')
+    parser.add_argument('--experiment-id', required=True, help='Experiment ID')
+
+    args = parser.parse_args()
+
+    try:
+        response = requests.post(
+            f"{args.api_url}/run",
+            json={
+                "experiment_id": args.experiment_id,
+                "api_key": args.api_key,
+                "label": args.label
+            },
+            headers={
+                "Content-Type": "application/json"
+            }
+        )
+        
+        response.raise_for_status()
+        
+        print("Evaluation run started successfully")
+        print(f"Response: {response.json()}")
+        
+    except requests.exceptions.RequestException as e:
+        print(f"Error running evaluation: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/.github/workflows/eval-prod.yml
+++ b/.github/workflows/eval-prod.yml
@ -0,0 +1,33 @@
+name: Run Eval Benchmark Prod
+
+env:
+  EVAL_API_URL: ${{ secrets.EVAL_API_URL }}
+  EVAL_API_KEY: ${{ secrets.EVAL_API_KEY }}
+  EVAL_EXPERIMENT_ID: ${{ secrets.EVAL_BENCHMARK_EXPERIMENT_ID }}
+
+on:
+  workflow_run:
+    workflows: ["Deploy Images to GHCR"]
+    types:
+      - completed
+    branches:
+      - main
+
+
+jobs:
+  run-eval-benchmark-prod:
+        runs-on: ubuntu-latest
+        if: ${{ github.event.workflow_run.conclusion == 'success' }}
+        steps:
+          - name: 'Install dependencies'
+            run: |
+              python -m pip install --upgrade pip
+              pip install requests
+
+          # make sure the image is deployed before running the eval benchmark
+          - name: Wait for 2 minutes
+            run: sleep 120
+
+          - name: 'Run Eval Benchmark Prod'
+            run: |
+              python .github/scripts/eval_run.py --label prod.${{ github.sha }} --api-url ${{ env.EVAL_API_URL }} --api-key ${{ env.EVAL_API_KEY }} --experiment-id ${{ env.EVAL_EXPERIMENT_ID }}