-
Notifications
You must be signed in to change notification settings - Fork 2
Integrate autotune agent into the auto agent #42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cd5409d
1ca5e07
6e52d68
cff421f
eeb3584
980244d
cb365a4
98defd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| import asyncio | ||
| import logging | ||
| import time | ||
|
|
||
| import aiohttp | ||
|
|
||
|
|
||
| async def call_eval_server_async( | ||
| session: aiohttp.ClientSession, | ||
| eval_server_url: str, | ||
| payload: dict, | ||
| poll_interval: int = 10, | ||
| client_wait_timeout: int = 3600 * 3, # Default to 3 hours | ||
| ) -> dict: | ||
| """Calls the evaluation server asynchronously and polls for status. | ||
|
|
||
| Args: | ||
| session: aiohttp.ClientSession to use. | ||
| eval_server_url: Base URL of the eval server (e.g., | ||
| "http://localhost:1245"). | ||
| payload: The request payload. | ||
| poll_interval: Seconds to wait between polls. | ||
| client_wait_timeout: Max seconds to wait for task completion. | ||
|
|
||
| Returns: | ||
| The result from the evaluation server. | ||
| """ | ||
| # 1. Submit the task | ||
| submit_url = f"{eval_server_url}/evaluate" | ||
| logging.info(f"Submitting async task to {submit_url}") | ||
|
|
||
| payload = payload.copy() | ||
| payload["client_wait_timeout"] = client_wait_timeout | ||
|
|
||
| async with session.post(submit_url, json=payload) as response: | ||
| if response.status != 202: | ||
| error_text = await response.text() | ||
| raise Exception( | ||
| f"Failed to submit task. Status: {response.status}, Error: {error_text}" | ||
| ) | ||
| resp_data = await response.json() | ||
| task_id = resp_data["task_id"] | ||
| logging.info(f"Task submitted successfully. ID: {task_id}") | ||
|
|
||
| # 2. Poll for status | ||
| start_time = time.time() | ||
| status_url = f"{eval_server_url}/status/{task_id}" | ||
|
|
||
| while True: | ||
| if time.time() - start_time > client_wait_timeout: | ||
| raise Exception( | ||
| f"Client timed out waiting for task {task_id} after {client_wait_timeout} seconds" | ||
| ) | ||
|
|
||
| async with session.get(status_url) as response: | ||
| if response.status != 200: | ||
| error_text = await response.text() | ||
| raise Exception( | ||
| f"Failed to get task status. Status: {response.status}, Error: {error_text}" | ||
| ) | ||
|
|
||
| status_data = await response.json() | ||
| status = status_data["status"] | ||
|
|
||
| if status == "success": | ||
| logging.info(f"Task {task_id} completed successfully.") | ||
| return status_data["result"] | ||
| elif status in ["failed", "timeout"]: | ||
| raise Exception( | ||
| f"Task {task_id} ended with status {status}: {status_data.get('error')}" | ||
| ) | ||
|
|
||
| logging.info( | ||
| f"Task {task_id} status: {status}. Waiting {poll_interval}s..." | ||
| ) | ||
| await asyncio.sleep(poll_interval) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,9 +4,7 @@ | |
| TEMPERATURE = 0.1 | ||
| TOP_P = 0.9 | ||
| TOP_K = 5 | ||
| TPU_TIMEOUT = 120 | ||
| REQUEST_TIMEOUT = 1800 | ||
| REQUEST_TIMEOUT = 3600 * 3 | ||
| TPU_SERVER_PORT = 5463 | ||
| CPU_SERVER_PORT = 5464 | ||
| EVAL_SERVER_PORT = 1245 | ||
| PERF_THRESHOLD = 1.1 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it not needed anymore?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't see it is used by any code.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is for evaluation? When perf improves by more than 10%, we consider it as an improvement?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The evaluation code is independent of any agent code and the threshold was set as 1.05. |
||
Uh oh!
There was an error while loading. Please reload this page.