diff --git a/.github/scripts/eval_run.py b/.github/scripts/eval_run.py new file mode 100644 index 00000000..f423810f --- /dev/null +++ b/.github/scripts/eval_run.py @@ -0,0 +1,37 @@ +import requests +import argparse +import sys + +def main(): + parser = argparse.ArgumentParser(description='Run evaluation benchmark') + parser.add_argument('--label', required=True, help='Label for the evaluation run') + parser.add_argument('--api-url', required=True, help='API URL') + parser.add_argument('--api-key', required=True, help='API key') + parser.add_argument('--experiment-id', required=True, help='Experiment ID') + + args = parser.parse_args() + + try: + response = requests.post( + f"{args.api_url}/run", + json={ + "experiment_id": args.experiment_id, + "api_key": args.api_key, + "label": args.label + }, + headers={ + "Content-Type": "application/json" + } + ) + + response.raise_for_status() + + print("Evaluation run started successfully") + print(f"Response: {response.json()}") + + except requests.exceptions.RequestException as e: + print(f"Error running evaluation: {str(e)}", file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.github/workflows/eval-prod.yml b/.github/workflows/eval-prod.yml new file mode 100644 index 00000000..a4ac0326 --- /dev/null +++ b/.github/workflows/eval-prod.yml @@ -0,0 +1,33 @@ +name: Run Eval Benchmark Prod + +env: + EVAL_API_URL: ${{ secrets.EVAL_API_URL }} + EVAL_API_KEY: ${{ secrets.EVAL_API_KEY }} + EVAL_EXPERIMENT_ID: ${{ secrets.EVAL_BENCHMARK_EXPERIMENT_ID }} + +on: + workflow_run: + workflows: ["Deploy Images to GHCR"] + types: + - completed + branches: + - main + + +jobs: + run-eval-benchmark-prod: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' }} + steps: + - name: 'Install dependencies' + run: | + python -m pip install --upgrade pip + pip install requests + + # make sure the image is deployed before running the eval benchmark + - name: Wait for 2 minutes + run: sleep 120 + + - name: 'Run Eval Benchmark Prod' + run: | + python .github/scripts/eval_run.py --label prod.${{ github.sha }} --api-url ${{ env.EVAL_API_URL }} --api-key ${{ env.EVAL_API_KEY }} --experiment-id ${{ env.EVAL_EXPERIMENT_ID }}