Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions data/test_demo_aug.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"\"100,000\""
]
],
"field_name": "Amount"
"field_name": "AMOUNT"
},
"difficulty": "medium",
"query_template_name": "create_report",
Expand Down Expand Up @@ -98,7 +98,7 @@
"human_review_comment": "rejected since it is the same. Rewriten",
"ground_truth_dict": {
"object_name": "Accounts",
"report_name": "ActiveAccountsRPT",
"report_name": "MyAccountsRPT",
"filter_type": "standard filter",
"filters": [
[
Expand Down Expand Up @@ -538,7 +538,7 @@
"operator2": "equals",
"value2": "Closed Lost",
"field3": "AMOUNT",
"operator3": "greaterThan",
"operator3": "greaterOrEqual",
"value3": "\"5,000\"",
"field4": "TYPE",
"operator4": "equals",
Expand Down Expand Up @@ -8386,6 +8386,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8429,6 +8431,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8472,6 +8476,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8515,6 +8521,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8558,6 +8566,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -10488,7 +10498,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "Shoe Case Support",
Expand Down Expand Up @@ -10536,7 +10546,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "VIP Support",
Expand Down Expand Up @@ -10584,7 +10594,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "Marketing Support",
Expand Down Expand Up @@ -10631,7 +10641,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "tech support specialists",
Expand Down Expand Up @@ -10679,7 +10689,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "enterprise client support",
Expand Down
26 changes: 18 additions & 8 deletions data/test_zero_shot.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"\"100,000\""
]
],
"field_name": "Amount"
"field_name": "AMOUNT"
},
"difficulty": "medium",
"query_template_name": "create_report",
Expand Down Expand Up @@ -96,7 +96,7 @@
"human_review_comment": "rejected since it is the same. Rewriten",
"ground_truth_dict": {
"object_name": "Accounts",
"report_name": "ActiveAccountsRPT",
"report_name": "MyAccountsRPT",
"filter_type": "standard filter",
"filters": [
[
Expand Down Expand Up @@ -527,7 +527,7 @@
"operator2": "equals",
"value2": "Closed Lost",
"field3": "AMOUNT",
"operator3": "greaterThan",
"operator3": "greaterOrEqual",
"value3": "\"5,000\"",
"field4": "TYPE",
"operator4": "equals",
Expand Down Expand Up @@ -8185,6 +8185,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8227,6 +8229,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8269,6 +8273,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8311,6 +8317,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -8353,6 +8361,8 @@
}
},
"objects": [
"Quote",
"Opportunity",
"PricebookEntry",
"Pricebook2"
],
Expand Down Expand Up @@ -10243,7 +10253,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "Shoe Case Support",
Expand Down Expand Up @@ -10290,7 +10300,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "VIP Support",
Expand Down Expand Up @@ -10337,7 +10347,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "Marketing Support",
Expand Down Expand Up @@ -10383,7 +10393,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "tech support specialists",
Expand Down Expand Up @@ -10430,7 +10440,7 @@
"QueueSobject",
"Queue"
],
"metadata_types": []
"metadata_types": ["AssignmentRules"]
},
"instance_dict": {
"queue_name": "enterprise client support",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,18 +369,7 @@ sudo systemctl start osworld_server.service

### Accessibility Tree Support

To support the accessibility tree functionality, you'll need to install pyastpi2 in your Ubuntu environment. This package enables access to accessibility information and tree structures.

Installation steps:

```bash
# Update package list and ensure pip is installed
sudo apt-get update
sudo apt-get install python3-pip

# Install pyastpi2 using pip
pip3 install pyastpi2
```
Due to Salesforce security requirements, we’re unable to provide the solution here. Please refer to the instructions in the original repository and proceed at your own discretion.

### Xorg Configuration

Expand Down
23 changes: 13 additions & 10 deletions main_bu.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@
import glob
from playwright.async_api import async_playwright

from utils import run_evaluate, run_reset, LogFormatter, split_task_config_pool_into_batches
from utils import run_evaluate, run_reset, LogFormatter, split_task_config_pool_into_batches, capture_logs_to_file
from args import get_args

from scuba.phases.evaluation.master_evaluator import MilestoneEvaluator
from scuba.phases.resetter import Resetter
from scuba.helpers.salesforce_commands import authorize_using_access_token, install_initial_data, retrieve_initial_state_metadata, create_project_if_not_exists
# build env and agent
from browser_use import Controller
Expand Down Expand Up @@ -203,7 +202,7 @@ async def replan(param_model:NoParamsAction, browser: BrowserContextBugFix) -> A

async with asyncio.Lock():
try:
evaluator = MilestoneEvaluator(args.org_alias)

# breakpoint()
score_card = evaluator.evaluate_instance(task_instance_dict, agent_answer)
evaluation_result = score_card.__dict__()
Expand Down Expand Up @@ -339,13 +338,7 @@ async def test(args: argparse.Namespace, task_config_pool: List[Dict]) -> None:
total_batches = len(task_config_pool_batches)
logger.info(f"Split the task_config_pool into {total_batches} batches due to constraints and dependencies of different tasks")

if args.reset_orgs_before_eval:
# Since the reset and evaluation are based on local files; we need to reset the salesforce orgs first
logger.info(f"Bulk resetting the salesforce orgs...")
time_start = time.perf_counter()
run_reset(task_config_pool, args.org_alias)
time_end = time.perf_counter()
logger.info(f"Done bulk resetting the salesforce orgs in {time_end - time_start:.2f} seconds")

if args.solutions == 'bu':
# build auxilary components
retriever = None
Expand Down Expand Up @@ -374,6 +367,15 @@ async def test(args: argparse.Namespace, task_config_pool: List[Dict]) -> None:
for batch_idx, task_config_pool in enumerate(task_config_pool_batches):
num_tasks = len(task_config_pool)
logger.info(f"Starting batch {batch_idx} with {num_tasks} tasks")
# Since the reset and evaluation are based on local files; we need to reset the salesforce orgs first
logger.info(f"Bulk resetting the salesforce orgs...")
time_start=time.perf_counter()
file=os.path.join(args.result_dir,"reset.log")
with capture_logs_to_file(file):
run_reset(task_config_pool, args.org_alias)

time_end=time.perf_counter()
logger.info(f"Done bulk resetting the salesforce orgs in {time_end-time_start:.2f} seconds")
semaphore = asyncio.Semaphore(args.max_concurrent_tasks)
job_queue = []
for task_instance_dict in task_config_pool:
Expand Down Expand Up @@ -410,6 +412,7 @@ def get_unfinished_task_ids(task_instance_dicts: List[Dict], target_dir: str):
if __name__ == '__main__':
args = get_args()
assert args.org_alias == os.getenv("ORG_ALIAS"), f"org_alias: {args.org_alias} is not the same as the org_alias in the .env file: {os.getenv('ORG_ALIAS')}. The one in the .env file is used to login in the remote desktop environment."
evaluator=MilestoneEvaluator(args.org_alias)
args.result_dir = os.path.join(args.result_dir, args.run_name)

assert args.total_desired_envs == args.max_concurrent_tasks, f"total_desired_envs: {args.total_desired_envs} is not the same as max_concurrent_tasks: {args.max_concurrent_tasks}"
Expand Down
Loading