mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-13 23:53:25 +00:00
- Add hover_at, type_at, drag_at coordinate tools to server - Add hoverAt, typeAt, dragAt methods to Browser class - Export server internals (browser, tool-loop, registry) for eval imports - Copy eval app from enterprise repo with agents, graders, runner, dashboard - Nest eval-targets inside apps/eval - Adapt sessionExecutionDir → workingDir for current server API - Add biome ignore for dashboard HTML to prevent lint breaking onclick handlers
11 lines
4.4 KiB
JSON
11 lines
4.4 KiB
JSON
{"query_id": "87f4c5128e36cdb9366a138a7b61bb00", "dataset": "online-mind2web", "query": "View the speakers that are bluetooth and wireless and filter the results to only show models that are on sale and cost less than $50.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.bestbuy.com/", "metadata": {"original_task_id": "87f4c5128e36cdb9366a138a7b61bb00", "website": "https://www.bestbuy.com/", "category": "medium", "additional": {"level": "medium", "reference_length": 6}}}
|
|
{"query_id": "cfafe3771369d1d261e9f7ecd44c296d", "dataset": "online-mind2web", "query": "Find the highest-rated dealer for Cadillac with a rating above 4 stars within 20 miles of zip 60606.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.cars.com/", "metadata": {"original_task_id": "cfafe3771369d1d261e9f7ecd44c296d", "website": "https://www.cars.com/", "category": "medium", "additional": {"level": "medium", "reference_length": 6}}}
|
|
{"query_id": "816851ff92ff0219acf4364dcc2c4692", "dataset": "online-mind2web", "query": "Search for boys' infant pajamas below $40.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.macys.com/", "metadata": {"original_task_id": "816851ff92ff0219acf4364dcc2c4692", "website": "https://www.macys.com/", "category": "medium", "additional": {"level": "medium", "reference_length": 10}}}
|
|
{"query_id": "905cb53061c33aa2d77e485fe1fca516", "dataset": "online-mind2web", "query": "Browse dermatologists within 10 miles of zip code 10019 and filter by only those who accept Blue Medicare Advantage.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.healthgrades.com/", "metadata": {"original_task_id": "905cb53061c33aa2d77e485fe1fca516", "website": "https://www.healthgrades.com/", "category": "hard", "additional": {"level": "hard", "reference_length": 11}}}
|
|
{"query_id": "bbbc243b4f18a7a897f0bc84e11d293f", "dataset": "online-mind2web", "query": "Find out how many assists Chris Paul has been averaging in the current season.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.nba.com/", "metadata": {"original_task_id": "bbbc243b4f18a7a897f0bc84e11d293f", "website": "https://www.nba.com/", "category": "easy", "additional": {"level": "easy", "reference_length": 4}}}
|
|
{"query_id": "d71be72aa25c3eab8eea47a0e60382e2", "dataset": "online-mind2web", "query": "Find technical specs for the latest Macbook Air on Apple.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "d71be72aa25c3eab8eea47a0e60382e2", "website": "https://www.apple.com/", "category": "easy", "additional": {"level": "easy", "reference_length": 4}}}
|
|
{"query_id": "3c1ffc3f494e423b3c434c79e35da8f3", "dataset": "online-mind2web", "query": "Find 12 Monkeys community and view the latest posts mentioning James Cole.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.reddit.com/", "metadata": {"original_task_id": "3c1ffc3f494e423b3c434c79e35da8f3", "website": "https://www.reddit.com/", "category": "medium", "additional": {"level": "medium", "reference_length": 6}}}
|
|
{"query_id": "608c595eec271fa5dc03506923519994", "dataset": "online-mind2web", "query": "Calculate a FedEx Ground shipping rate for a 3-pound package from zip code 10019 to zip code 90028.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.fedex.com/en-us/home.html", "metadata": {"original_task_id": "608c595eec271fa5dc03506923519994", "website": "https://www.fedex.com/en-us/home.html", "category": "medium", "additional": {"level": "medium", "reference_length": 9}}}
|
|
{"query_id": "a7a73c8fa75441fc76df9746c327bdd6", "dataset": "online-mind2web", "query": "Estimate the cost of a photographer in 07055 for a 4-hour project.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.thumbtack.com/", "metadata": {"original_task_id": "a7a73c8fa75441fc76df9746c327bdd6", "website": "https://www.thumbtack.com/", "category": "medium", "additional": {"level": "medium", "reference_length": 8}}}
|
|
{"query_id": "56f8890a837c49f7df766b9c981646f3", "dataset": "online-mind2web", "query": "Show crazy credits for the movie \" Prometheus\" on IMDb.", "graders": ["mind2web_judge", "fara_combined"], "start_url": "https://www.imdb.com/", "metadata": {"original_task_id": "56f8890a837c49f7df766b9c981646f3", "website": "https://www.imdb.com/", "category": "medium", "additional": {"level": "medium", "reference_length": 6}}}
|