Files
shivammittal274 29056226bb feat: add eval framework and coordinate-based input tools (#453)
- Add hover_at, type_at, drag_at coordinate tools to server
- Add hoverAt, typeAt, dragAt methods to Browser class
- Export server internals (browser, tool-loop, registry) for eval imports
- Copy eval app from enterprise repo with agents, graders, runner, dashboard
- Nest eval-targets inside apps/eval
- Adapt sessionExecutionDir → workingDir for current server API
- Add biome ignore for dashboard HTML to prevent lint breaking onclick handlers
2026-03-16 23:12:23 +05:30

644 lines
327 KiB
JSON
Vendored
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{"query_id": "Allrecipes--0", "dataset": "webvoyager", "query": "Provide a recipe for vegetarian lasagna with more than 100 reviews and a rating of at least 4.5 stars suitable for 6 people.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--0", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Vegetarian Four Cheese Lasagna', 4.6-star, 181 reviews, Servings 8", "answer_type": "possible"}}}
{"query_id": "Allrecipes--1", "dataset": "webvoyager", "query": "Find a recipe for a vegetarian lasagna that has at least a four-star rating and uses zucchini.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--1", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "\"Debbie's Vegetable Lasagna\", 4.7-star, include zucchini", "answer_type": "possible"}}}
{"query_id": "Allrecipes--2", "dataset": "webvoyager", "query": "Find a recipe for a vegetarian lasagna under 600 calories per serving that has a prep time of less than 1 hour.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--2", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Easy Vegetarian Red Beans Lasagna', 496 Calories, prep time 20 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--3", "dataset": "webvoyager", "query": "Locate a recipe for vegan chocolate chip cookies with over 60 reviews and a rating of at least 4.5 stars on Allrecipes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--3", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Vegan Chocolate Chip, Oatmeal, and Nut Cookies', 4.9 star, 67 viewers (> 60)", "answer_type": "golden"}}}
{"query_id": "Allrecipes--4", "dataset": "webvoyager", "query": "Find a recipe for Baked Salmon that takes less than 30 minutes to prepare and has at least a 4 star rating based on user reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--4", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Baked Dijon Salmon', 4.6-star, prep time 15 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--5", "dataset": "webvoyager", "query": "Search for a popular Pasta Sauce with more than 1000 reviews and a rating above 4 stars. Create a shopping list of ingredients for this recipe.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--5", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "\"World's Best Pasta Sauce!\", 4.7-star, 818 reviews, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--6", "dataset": "webvoyager", "query": "Search for a vegetarian lasagna recipe that has at least a four-star rating and over 500 reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--6", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Spinach Lasagna', 4.7-star, 501 reviews", "answer_type": "possible"}}}
{"query_id": "Allrecipes--7", "dataset": "webvoyager", "query": "Find a popular recipe for a chocolate chip cookie and list the ingredients and preparation steps.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--7", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Best Chocolate Chip Cookies', <Ingredients>, <Preparation Steps>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--8", "dataset": "webvoyager", "query": "Search for a recipe for Beef Wellington on Allrecipes that has at least 200 reviews and an average rating of 4.5 stars or higher. List the main ingredients required for the dish.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--8", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Beef Wellington', <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--9", "dataset": "webvoyager", "query": "Find a high-rated recipe for vegetarian lasagna, list the key ingredients required, and include the total preparation and cook time stated on the recipe.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--9", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Spicy Vegetarian Lasagna', <Ingredients>, prep time 30 mis, cook time 1 hour 10 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--10", "dataset": "webvoyager", "query": "Find The Most Popular Recipes of the 1960s, noting the recipe name, preparation time and total time of the second recipe in this collection.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--10", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Swedish Meatballs I', prep time 25 mins, total time 1 hour 25 mins", "answer_type": "golden"}}}
{"query_id": "Allrecipes--11", "dataset": "webvoyager", "query": "Discover a suitable chocolate cupcake recipe on Allrecipes that has a preparation time of under 1 hour and at least 100 user reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--11", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Chocolate Cupcake', 1261 reviews, prep time 15 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--12", "dataset": "webvoyager", "query": "Search for a popular cookie recipe on Allrecipes with more than 1000 reviews and a rating of 4.5 stars or better. Provide the list of ingredients needed.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--12", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Best Chocolate Chip Cookies', 4.6-star, 14493 reviews, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--13", "dataset": "webvoyager", "query": "Find a recipe with over 100 reviews for Fried Fish on Allrecipes, list the Full Nutrition Label and tell me the amount of Iron per Serving.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--13", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Crispy Fried Fish', Iron: 15mg", "answer_type": "possible"}}}
{"query_id": "Allrecipes--14", "dataset": "webvoyager", "query": "Search for a recipe that includes \"chicken breast\" and \"quinoa\" with preparation time under 30 minutes on Allrecipes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--14", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Slow Cooked Chicken Stew', prep time 20 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--15", "dataset": "webvoyager", "query": "Choose a dessert recipe on Allrecipes with a prep time of less than 30 minutes, has chocolate as an ingredient, and has a user rating of 4 stars or higher. Provide the name of the recipe, ingredients list, and step-by-step instructions.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--15", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Ultimate Chocolate Dessert', 4.7-star, prep time 15 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--16", "dataset": "webvoyager", "query": "Find a five-star rated chocolate chip cookie recipe that takes less than 1 hour to make on Allrecipes. Note how many reviews the recipe has and the main ingredients required.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--16", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Chocolate Chip Cookie Cups', 5.0-star, 3 reviews, total time 45 mins, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--17", "dataset": "webvoyager", "query": "Find the Easy Vegetarian Spinach Lasagna recipe on Allrecipes and tell me what the latest review says.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--17", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "Easy to make and very delicious", "answer_type": "golden"}}}
{"query_id": "Allrecipes--18", "dataset": "webvoyager", "query": "Find a recipe for a vegetarian lasagna that has over 300 reviews and an average rating of 4.5 or higher on Allrecipes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--18", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Eggplant Lasagna', 4.7-star, 305 reviews", "answer_type": "possible"}}}
{"query_id": "Allrecipes--19", "dataset": "webvoyager", "query": "Find a vegan lasagna recipe on Allrecipes that requires 10 ingredients or less and has feedback of more than 200 reviews. Provide a brief overview of the ingredient list and the total prep and cook time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--19", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Vegan Lasagna II', 9 Ingredients, 4.2-star, prep time 30 mins, cook time 1 hour, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--20", "dataset": "webvoyager", "query": "Find a recipe for a cauliflower pizza crust that has a preparation time of under 30 minutes and a rating of at least 4 stars on Allrecipes. Include the number of calories per serving.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--20", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Cauliflower Pizza Crust', 4.2 stars, Prep Time: 15 mins, 59 Calories per serving", "answer_type": "possible"}}}
{"query_id": "Allrecipes--21", "dataset": "webvoyager", "query": "Locate a high-rated recipe for gluten-free brownies on Allrecipes with at least 50 reviews. List the main ingredients and the total time required for preparation and cooking.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--21", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Gluten-Free Fudge Brownies', 4.1 stars, 69 reviews, <Ingredients>, Prep Time: 15 mins, Total Time: 1 hr", "answer_type": "possible"}}}
{"query_id": "Allrecipes--22", "dataset": "webvoyager", "query": "Find a recipe for a healthy avocado salad on Allrecipes that has a preparation time of less than 20 minutes and more than 30 user reviews. Include the nutritional information per serving.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--22", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Avocado Salad', 4.7 stars, 253 reviews, Prep Time: 15 mins, Nutrition Facts: 126 Calories, 10g Fat, 10g Carbs, 2g Protein", "answer_type": "possible"}}}
{"query_id": "Allrecipes--23", "dataset": "webvoyager", "query": "Search Allrecipes for a baked lemon chicken recipe that has a prep time under 45 minutes, with at least a 4.5-star rating based on user reviews, and over 200 reviews. List the primary ingredients required.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--23", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Baked Chicken Schnitzel', 4.5 stars, 250 reviews, Prep Time: 20 mins, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--24", "dataset": "webvoyager", "query": "Locate a recipe for an eggplant Parmesan on Allrecipes with a rating of at least 4.5 stars and over 50 reviews. Include the preparation time and the number of servings provided by the recipe.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--24", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Eggplant Parmesan', 4.5 stars, 2711 reviews, Prep Time: 25 mins, Servings: 10", "answer_type": "possible"}}}
{"query_id": "Allrecipes--25", "dataset": "webvoyager", "query": "Find a popular quinoa salad recipe on Allrecipes with more than 500 reviews and a rating above 4 stars. Create a shopping list of ingredients for this recipe and include the total cooking and preparation time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--25", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Easy Quinoa Salad', 4.8 stars, 1107 reviews, Prep Time: 20 mins, Cook Time: 15 mins, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--26", "dataset": "webvoyager", "query": "Search for a high-protein vegetarian chili recipe on Allrecipes that has at least 50 reviews and a rating of 4 stars or higher. Provide the ingredient list, cooking time, and a brief description of the cooking steps.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--26", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'The Best Vegetarian Chili in the World', 4.7 stars, 1681 reviews, Cook Time: 1 hr, <Ingredients>, <Description: Cooking steps>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--27", "dataset": "webvoyager", "query": "Locate a chicken curry recipe on Allrecipes that has been reviewed more than 30 times and has a rating of at least 4 stars. Provide a summary of the recipe including ingredients, preparation time, and cooking instructions.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--27", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Indian Chicken Curry (Murgh Kari)', 4.7 stars, 955 reviews, <Ingredients>, Prep Time: 20 mins, <cooking instructions>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--28", "dataset": "webvoyager", "query": "On Allrecipes, find a vegan brownie recipe that has at least 40 reviews and a rating of 4.5 or higher. Include the list of ingredients, total prep and cook time, and a brief overview of the preparation steps.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--28", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Vegan Brownies', 4.6 stars, 828 reviews, <Ingredients>, Prep Time: 15 mins, Cook Time: 30 mins, <preparation steps>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--29", "dataset": "webvoyager", "query": "Search for a Mediterranean-style grilled fish recipe on Allrecipes that includes ingredients like olives, has at least a 4-star rating, and more than 25 reviews. Detail the ingredients, cooking method, and total time required for preparation and cooking.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--29", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Branzino Mediterranean', 36 reviews, <Ingredients> include olive oil, <cooking method>, Prep Time: 15 mins, Cook Time: 25 mins, Total Time: 40 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--30", "dataset": "webvoyager", "query": "Find a recipe for a vegan smoothie bowl on Allrecipes that includes bananas and leaves, has more than 20 reviews, and a rating of at least 4 stars. Provide a list of ingredients, preparation time, and a summary of the recipe steps.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--30", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Spinach and Banana Power Smoothie', 4.8 stars, 72 reviews, Ingredients: 1 cup plain soy milk, 3/4 cup packed fresh spinach leaves, 1 large banana, sliced; Prep Time: 10 mins; <steps>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--31", "dataset": "webvoyager", "query": "Search for a seafood paella recipe on Allrecipes with a minimum of 4.5 stars rating and at least 50 reviews. The recipe should include shrimp and mussels. Provide the ingredients, total time, and an overview of the preparation steps.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--31", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Easy Paella', 4.6 stars, 470 reviews, <Ingredients>, <preparation steps>, Total Time: 1 hr", "answer_type": "possible"}}}
{"query_id": "Allrecipes--32", "dataset": "webvoyager", "query": "Find a high-rated beef stew recipe on Allrecipes that requires a slow cooker and has at least 30 reviews. Detail the cooking time and the first five ingredients listed in the recipe.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--32", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Slow Cooker Beef Stew', 3994 reviews, Cook Time: 4 hrs, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--33", "dataset": "webvoyager", "query": "Find a recipe for a low-carb breakfast on Allrecipes with at least 25 reviews. Show the Nutrition Facts and the total carbohydrate content per serving.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--33", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Low-Carb Bacon Spinach Egg Cups', 99 reviews, 237 Calories, 18g Fat, 4g Carbs, 17g Protein", "answer_type": "possible"}}}
{"query_id": "Allrecipes--34", "dataset": "webvoyager", "query": "Locate a baked salmon recipe on Allrecipes that has at least 50 reviews and a rating of 4.5 stars or higher. Note the primary seasoning or herb used and the estimated cooking time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--34", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Baked Salmon', 4.7 stars, 2339 reviews, Cook Time: 35 mins, <Ingredients>", "answer_type": "possible"}}}
{"query_id": "Allrecipes--35", "dataset": "webvoyager", "query": "Search for an Italian-style meatball recipe on Allrecipes that has more than 100 reviews. Detail the type of meat used and the overall cooking time required.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--35", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Italian Turkey Meatballs', 4.7 stars, 234 reviews, Cook Time: 15 mins, meat: 1/2 pounds ground lean turkey", "answer_type": "possible"}}}
{"query_id": "Allrecipes--36", "dataset": "webvoyager", "query": "Locate a recipe for an American apple pie on Allrecipes with a rating of at least 4 stars and more than 50 reviews. Note the maximum temperature mentioned in the Directions.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--36", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'All American Apple Pie', 4.6 stars, 490 reviews, 350 degrees F (175 degrees C)", "answer_type": "possible"}}}
{"query_id": "Allrecipes--37", "dataset": "webvoyager", "query": "Search for a Greek salad recipe on Allrecipes that has a prep time of under 25 minutes and more than 15 reviews. Include the primary cheese used and the type of dressing recommended.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--37", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Greek Salad', 4.6 stars, 192 reviews, 1 cup crumbled feta cheese, ground black pepper to taste...", "answer_type": "possible"}}}
{"query_id": "Allrecipes--38", "dataset": "webvoyager", "query": "Find a French ratatouille recipe on Allrecipes with a 4-star rating or higher and at least 15 reviews. Note the variety of vegetables included and the overall cooking time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--38", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Ratatouille', 4.6 stars, 793 reviews, vegetables: 1 eggplant, cut into 1/2 inch cubes; 2 zucchini, sliced; 2 large tomatoes, chopped", "answer_type": "possible"}}}
{"query_id": "Allrecipes--39", "dataset": "webvoyager", "query": "Locate a recipe for sushi rolls on Allrecipes with a minimum of 20 reviews. Show the Nutrition Facts and the main ingredients. Tell me how to store these rolls.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--39", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Smoked Salmon Sushi Roll', 78 reviews, Nutrition Facts (per serving): 291 Calories, 7g Fat, 45g Carbs, 11g Protein, <Ingredients>; You can refrigerate them in an airtight container for up to two days.", "answer_type": "possible"}}}
{"query_id": "Allrecipes--40", "dataset": "webvoyager", "query": "Browse the about us section of Allrecipes for a brief introduction to The Allrecipes Allstars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--40", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "The Allrecipes Allstars: Social media influencers, registered dietitians, grillmasters, and more seasoned home cooks make up our enthusiastic squad of 100+ brand ambassadors. This diverse, food-loving crew spans the U.S. geographically and represents many different cultures, ethnicities, and family makeups. Since 2011, the Allstars have created tens of thousands of original recipes, photos, and reviews plus shared their cooking expertise via flat and video content on our website, social media, plus more marketing channels.", "answer_type": "golden"}}}
{"query_id": "Allrecipes--41", "dataset": "webvoyager", "query": "List 3 recommended dinner recipes in the Allrecipes Dinners section.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--41", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "Ground Beef-Spinach Casserole; Mexican Ground Beef Casserole; Retro Ground Beef Casserole with Biscuits", "answer_type": "possible"}}}
{"query_id": "Allrecipes--42", "dataset": "webvoyager", "query": "Find a recipe for banana bread with more than 200 reviews and a rating of at least 4.0 stars on Allrecipes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--42", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Banana Banana Bread', 4.7 stars, 12649 reviews", "answer_type": "possible"}}}
{"query_id": "Allrecipes--43", "dataset": "webvoyager", "query": "Find a recipe for a vegan pumpkin pie on Allrecipes with a minimum four-star rating and a total cook time exceeding 1 hour.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--43", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "'Amazing Vegan Pumpkin Pie', 5.0 stars, Cook Time: 1 hr 55 mins", "answer_type": "possible"}}}
{"query_id": "Allrecipes--44", "dataset": "webvoyager", "query": "List at least 6 holiday recipes sections mentioned in the Occasions section of Allrecipes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.allrecipes.com/", "metadata": {"original_task_id": "Allrecipes--44", "website": "Allrecipes", "category": "Allrecipes", "additional": {"ground_truth": "THANKSGIVING RECIPES; CHRISTMAS RECIPES; LUNAR NEW YEAR RECIPES; HANUKKAH RECIPES; PURIM RECIPES; MARDI GRAS RECIPES ...", "answer_type": "possible"}}}
{"query_id": "Amazon--0", "dataset": "webvoyager", "query": "Search an Xbox Wireless controller with green color and rated above 4 stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--0", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Xbox Core Wireless Gaming Controller - Velocity Green; 4.7-star", "answer_type": "possible"}}}
{"query_id": "Amazon--1", "dataset": "webvoyager", "query": "Search for women's golf polos in m size, priced between 50 to 75 dollars, and save the lowest priced among results.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--1", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "PUMA Golf 2019 Men's Rotation Polo; $50.00", "answer_type": "possible"}}}
{"query_id": "Amazon--2", "dataset": "webvoyager", "query": "Find a gaming desktop with Windows 11 Home, and the disk size should be 1TB.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--2", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "HP Victus 15L Gaming Desktop with Windows 11 Home and 1TB disk size", "answer_type": "possible"}}}
{"query_id": "Amazon--3", "dataset": "webvoyager", "query": "Find climbing gears and sort the results by price high to low. Answer the first 3 results after sorting.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--3", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "First 3 results after sort", "answer_type": "possible"}}}
{"query_id": "Amazon--4", "dataset": "webvoyager", "query": "Find the used Nintendo Switch Lite on Amazon then filter by 'Used - Good', tell me the cheapest one that is 'Used - Good'.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--4", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Nintendo Switch Lite - Blue; Used Good: $170", "answer_type": "possible"}}}
{"query_id": "Amazon--5", "dataset": "webvoyager", "query": "Find a Blue iPhone 12 Pro 128gb and add to cart.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--5", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Apple iPhone 12 Pro, 128GB, Pacific Blue - Fully Unlocked (Renewed); Action: ADD_TO_CHART", "answer_type": "possible"}}}
{"query_id": "Amazon--6", "dataset": "webvoyager", "query": "Browse black strollers within $100 to $200 on Amazon. Then find one Among these black strollers with over 20,000 reviews and a rating greater than 4 star.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--6", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Baby Trend Expedition Jogger, Dash Black; 22146 reviews; 4.7-star", "answer_type": "possible"}}}
{"query_id": "Amazon--7", "dataset": "webvoyager", "query": "Browse the women's hiking boots on Amazon and filter the results to show only those that are waterproof and have a rating of at least 4 stars and size 6.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--7", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Filter: 4-star, waterproof, size 6", "answer_type": "possible"}}}
{"query_id": "Amazon--8", "dataset": "webvoyager", "query": "Find the cheapest Samsung-made Android tablet with screen between 10-10.9 inches on Amazon. Only answer the cheapest one.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--8", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Samsung Galaxy Tab S 10.5in 16GB Android Tablet - Titanium Gold (Renewed); $139.94", "answer_type": "possible"}}}
{"query_id": "Amazon--9", "dataset": "webvoyager", "query": "Find a dog bed on Amazon that is washable and has a length of at least 30 inches.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--9", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Gulokoka Large Dog Bed for Crate Comfortable Washable Pet Mat for Dogs, Cats, Gray", "answer_type": "possible"}}}
{"query_id": "Amazon--10", "dataset": "webvoyager", "query": "Find the cost of a 2-year protection for PS4 on Amazon.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--10", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Sony Playstation PS4 1TB Black Console; 2-Year Protection for $30.99", "answer_type": "possible"}}}
{"query_id": "Amazon--11", "dataset": "webvoyager", "query": "Find a stainless steel kitchen sink with double bowls on Amazon. Sort the results and find the cheapest one with FREE delivery.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--11", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Transolid STDE33226-2 Kitchen Sink, Stainless Steel; $120.89", "answer_type": "possible"}}}
{"query_id": "Amazon--12", "dataset": "webvoyager", "query": "Check reviews for a Ride On Car with 100+ reviews & 4+ stars rating on Amazon. Give me the top review about this Ride On Car.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--12", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Worth every penny", "answer_type": "possible"}}}
{"query_id": "Amazon--13", "dataset": "webvoyager", "query": "Browse best selling black hoodies in mens size Big and Tall that is between $25 and $50 on Amazon.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--13", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "adidas Men's Essentials Fleece Hoodie; 500+ bought in past month", "answer_type": "possible"}}}
{"query_id": "Amazon--14", "dataset": "webvoyager", "query": "Find the new surge protector on Amazon with 6 to 8 outlets under 25 dollars with customer reviews above 4+ stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--14", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Surge Protector Power Strip $15.99, 8 Outlets, 4.7-star", "answer_type": "possible"}}}
{"query_id": "Amazon--15", "dataset": "webvoyager", "query": "Find a pair of mens running shoes in black, size 7, 4+ stars and under $50 and add them to my cart on Amazon.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--15", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Damyuan Men's Sport Gym Running Shoes Walking Shoes Casual Lace Up Lightweight; black, size 7, 4.0-star, $29.99", "answer_type": "possible"}}}
{"query_id": "Amazon--16", "dataset": "webvoyager", "query": "Find the Return Policy for Mens Rhinestone Skull Graphic Shirt on Amazon. Color: Black, Size: XX-Large. If Free return is avaliable, tell me how to return this item.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--16", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "FREE Returns, 1. Go to Your Orders to start the return; 2. Print the return shipping label; 3. Ship it!", "answer_type": "golden"}}}
{"query_id": "Amazon--17", "dataset": "webvoyager", "query": "Show me the list of baby products that are on sale and under 10 dollars on Amazon. Provide at least 2 on sale products", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--17", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Johnson's Baby Care Essentials Gift Set, $7.55; SWEET DOLPHIN 12 Pack Muslin Burp Cloths Large 100% Cotton Hand Washcloths for Baby, $9.98", "answer_type": "possible"}}}
{"query_id": "Amazon--18", "dataset": "webvoyager", "query": "Open Amazon's home page and tell me what the deal is that is going on at the moment, list the names of at least 2 items that are on offer and tell me what percent off they are.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--18", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Gevi Household V2.0 Countertop Nugget Ice Maker, 20% off; Osmo - Little Genius Starter Kit for iPad & iPhone, 7% off;", "answer_type": "possible"}}}
{"query_id": "Amazon--19", "dataset": "webvoyager", "query": "Look for an English language book on roman empire history in the Amazon Kindle store. Sort by newests arrivals and look for a title that will be released within a month.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--19", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "THE HISTORY OF THE DECLINE AND FALL OF THE ROMAN EMPIRE (All 6 Volumes), released on January 10, 2024.", "answer_type": "possible"}}}
{"query_id": "Amazon--20", "dataset": "webvoyager", "query": "Search for a wireless ergonomic keyboard with backlighting and a rating of at least 4 stars. The price should be between $40 to $60. Save the product with the 500+ customer reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--20", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Logitech Wave Keys Wireless Ergonomic Keyboard, $57.99, 4.6 stars, 26005 ratings", "answer_type": "possible"}}}
{"query_id": "Amazon--21", "dataset": "webvoyager", "query": "Find a stainless steel, 12-cup programmable coffee maker on Amazon. The price range should be between $100 to $200. Report the one with the 4+ customer rating.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--21", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Braun BrewSense 12-Cup Drip Coffee Maker, Stainless Steel, 4.3 stars, $129.95", "answer_type": "possible"}}}
{"query_id": "Amazon--22", "dataset": "webvoyager", "query": "Search for a set of non-stick, oven-safe cookware on Amazon. The set should include at least 10 pieces and be priced under $150.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--22", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "CAROTE 11pcs Nonstick Cookware Set, Non Stick, Oven Safe, $129.99 ($11.82 / Count)", "answer_type": "possible"}}}
{"query_id": "Amazon--23", "dataset": "webvoyager", "query": "Look for a men's waterproof digital sports watch with a heart rate monitor on Amazon. It should be priced between $50 to $100.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--23", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Smartwatch for Men Android iPhone, Waterproof, Heart Rate, $54.99", "answer_type": "possible"}}}
{"query_id": "Amazon--24", "dataset": "webvoyager", "query": "Browse for a compact air fryer on Amazon with a capacity of 2 to 3 quarts. It should have a digital display, auto shutoff and be priced under $100.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--24", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Dash DMAF360GBAQ02 Aircrisp® Pro Digital Air Fryer, Digital Display, Auto Shut Off, 3qt, $90.10", "answer_type": "possible"}}}
{"query_id": "Amazon--25", "dataset": "webvoyager", "query": "Search for a queen-sized, hypoallergenic mattress topper on Amazon. It should have a memory foam material and be priced between $50 to $100.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--25", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "2 Inch 7-Zone Memory Foam Mattress Topper Queen with 100% Bamboo Rayon Cover, Cooling Gel-Infused Swirl Egg Crate Memory Foam, $99.99", "answer_type": "possible"}}}
{"query_id": "Amazon--26", "dataset": "webvoyager", "query": "Find a portable Bluetooth speaker on Amazon with a water-resistant design, under $50. It should have a minimum battery life of 10 hours.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--26", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Portable Bluetooth Speaker, IPX7 Waterproof Wireless Speaker, 25W Super Bass 24H Playtime, $29.97", "answer_type": "possible"}}}
{"query_id": "Amazon--27", "dataset": "webvoyager", "query": "Look for a USB-C hub on Amazon compatible with MacBook Pro, featuring at least 4 ports, including HDMI and SD card reader. The price should be under $50. Select the one after sorting by Best Sellers.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--27", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Hiearcool USB C Hub, USB C Multi-Port Adapter for MacBook Pro, 7IN1, include 4K HDMI USB3.0 and SD/TF Card Reader, $24.99", "answer_type": "possible"}}}
{"query_id": "Amazon--28", "dataset": "webvoyager", "query": "Search for a yoga mat on Amazon that is at least 6mm thick, non-slip, and eco-friendly. The price should be under $50.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--28", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Retrospec Solana Yoga Mat 1\" Thick, Non Slip, $38.51", "answer_type": "possible"}}}
{"query_id": "Amazon--29", "dataset": "webvoyager", "query": "Find a set of solar-powered garden lights on Amazon with a minimum pack of 10 lights. They should be LED and priced under $50.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--29", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "KelvinLux Solar Garden Lights Outdoor, 12 Packs, 12 LEDs, $35.99 ($3.00 / Count)", "answer_type": "possible"}}}
{"query_id": "Amazon--30", "dataset": "webvoyager", "query": "Locate the highest-rated fiction book released in 2024 on Amazon, with a minimum of 50 customer reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--30", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "The Women Library Binding Large Print, March 1, 2024, 4.8 stars", "answer_type": "possible"}}}
{"query_id": "Amazon--31", "dataset": "webvoyager", "query": "Find a compact digital camera on Amazon with a zoom capability of at least 10x, rated 4 stars or higher, and priced between $100 to $300.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--31", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "5K Digital Camera for Photography Autofocus, 16X Digital Zoom, 5.0 stars, $129.99", "answer_type": "possible"}}}
{"query_id": "Amazon--32", "dataset": "webvoyager", "query": "Search for an electric kettle on Amazon with a capacity of at least 1.5 liters, made of stainless steel, and with a customer rating of 4 stars or above.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--32", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "COMFEE' Stainless Steel Electric Kettle, 1.7 Liter, 4.6 stars", "answer_type": "possible"}}}
{"query_id": "Amazon--33", "dataset": "webvoyager", "query": "Search for a portable air conditioner on Amazon suitable for a room size of 300 sq ft, with energy efficiency rating, and compare the prices of the top three search results.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--33", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "price compare: 1) Shinco 10,000 BTU Portable Air Conditioner, $314.99; 2) Renogy 8,000 BTU Portable Air Conditioners, $283.09; 3) SereneLife Compact Freestanding Portable Air Conditioner, $247.54", "answer_type": "possible"}}}
{"query_id": "Amazon--34", "dataset": "webvoyager", "query": "Find a beginner's acrylic paint set on Amazon, with at least 24 colors, suitable for canvas painting, and priced under $40.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--34", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Complete Acrylic Paint Set, 24х Rich Pigment Colors, for Painting Canvas, $16.97", "answer_type": "possible"}}}
{"query_id": "Amazon--35", "dataset": "webvoyager", "query": "Find a men's leather wallet on Amazon with RFID blocking, at least 6 card slots, and priced below $50. Check if it's available for FREE delivery.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--35", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "STAY FINE Top Grain Leather Wallet for Men, RFID Blocking, Slim Billfold with 8 Card Slots, FREE delivery Friday, March 1", "answer_type": "possible"}}}
{"query_id": "Amazon--36", "dataset": "webvoyager", "query": "Search for a children's science experiment kit on Amazon suitable for ages 8-13, with at least a 4-star rating and priced under $30.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--36", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "UNGLINGA 150 Experiments Science Kits for Kids Age 6-8-10-12-14, 4.6 stars, $29.99", "answer_type": "possible"}}}
{"query_id": "Amazon--37", "dataset": "webvoyager", "query": "Locate a queen-sized bedspread on Amazon with a floral pattern, and check if it's available in blue color.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--37", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "NEWLAKE Cotton Bedspread Quilt Sets-Reversible Patchwork Coverlet Set, Blue Classic Royal Pattern, Queen Size", "answer_type": "possible"}}}
{"query_id": "Amazon--38", "dataset": "webvoyager", "query": "Find a bird feeder on Amazon suitable for small birds, with an anti-squirrel mechanism, and check if it's available with free shipping.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--38", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Bird Feeder for Outdoors Hanging, Squirrel Proof, FREE delivery Friday, March 1", "answer_type": "possible"}}}
{"query_id": "Amazon--39", "dataset": "webvoyager", "query": "Locate a travel guide book on Amazon for Japan, published in 2024, with at least 20 customer reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--39", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "Japan Travel Guide 2024: The Ultimate Route to Authentic Ramen and Beyond Tips, Maps, and Must-Sees for Every Traveler, February 1, 2024, 38 ratings", "answer_type": "possible"}}}
{"query_id": "Amazon--40", "dataset": "webvoyager", "query": "Locate a women's yoga mat in purple, with a thickness of at least 5mm, rated 4+ stars, and priced under $30 on Amazon. Check how many colors are available in total, and what is the return and delivery policy.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.amazon.com/", "metadata": {"original_task_id": "Amazon--40", "website": "Amazon", "category": "Amazon", "additional": {"ground_truth": "ProsourceFit Extra Thick Yoga Pilates Exercise Mat, 1/2\", 4.6 stars, $21.99, 7 colors, FREE delivery Friday, March 1 on orders shipped by Amazon over $35", "answer_type": "possible"}}}
{"query_id": "Apple--0", "dataset": "webvoyager", "query": "Compare the prices of the latest models of MacBook Air available on Apple's website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--0", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "MacBook Air 13-inch M1 chip: from $999; 13-inch M2 chip: from $1099; 15-inch M2 chip: from $1299", "answer_type": "possible"}}}
{"query_id": "Apple--1", "dataset": "webvoyager", "query": "Research the new features of the iOS 17 on Apple support and check its compatibility with the iPhone 12.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--1", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "StandBy delivers a new full-screen experience; AirDrop makes it easier to share and connect; Enhancements to the keyboard;... compatible", "answer_type": "possible"}}}
{"query_id": "Apple--2", "dataset": "webvoyager", "query": "Compare the prices and chips for the iPhone 14 Pro and iPhone 15 Pro models directly from Apple's website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--2", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "14 Pro: Available at authorized resellers, A16 Bionic chip, 6-core CPU, 5-core GPU, 16-core Neural Engine; 15 Pro: Starting at $999, A17 Pro chip, 6-core CPU, 6-core GPU, 16-core Neural Engine", "answer_type": "possible"}}}
{"query_id": "Apple--3", "dataset": "webvoyager", "query": "Find the latest model of the iPhone and compare the price and screen size between the pro and pro max.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--3", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPhone 15 pro starts from $999, 6.1-inch screen; iPhone 15 pro max starts from $1199, 6.7-inch screen", "answer_type": "possible"}}}
{"query_id": "Apple--4", "dataset": "webvoyager", "query": "How much does it cost to buy a Macbook pro, 16-inch, Apple M3 Max chip with 16-core CPU, 40-core GPU, 64GB unified memory, 1TB SSD.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--4", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "$4,199.00 or $349.91/mo.per month for 12 mo.*", "answer_type": "possible"}}}
{"query_id": "Apple--5", "dataset": "webvoyager", "query": "Check the release date and price for the latest version of the iPhone.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--5", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPhone 15 ($799) or pro ($999) or pro Max ($1199); September 22, 2023", "answer_type": "possible"}}}
{"query_id": "Apple--6", "dataset": "webvoyager", "query": "Find AirPods on Apple and how many types are currently available.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--6", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "4", "answer_type": "possible"}}}
{"query_id": "Apple--7", "dataset": "webvoyager", "query": "When and where the Apple Vision Pro will be released.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--7", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Available early 2024 in the U.S.", "answer_type": "possible"}}}
{"query_id": "Apple--8", "dataset": "webvoyager", "query": "Identify and list the specifications of the latest iPad model released by Apple, including its storage options, processor type, and display features.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--8", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPad Pro, storage options: 128GB, 256GB, 512GB, 1TB, 2TB; processor type: Apple M2 chip; display features: 11inch with Liquid Retina display, 12.9inch with Liquid Retina XDR display", "answer_type": "possible"}}}
{"query_id": "Apple--9", "dataset": "webvoyager", "query": "Check the Apple Store for the availability of the latest iPhone model and schedule an in-store pickup at the nearest Apple Store for January 10, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--9", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPhone 15; Schedule an in-store pickup", "answer_type": "possible"}}}
{"query_id": "Apple--10", "dataset": "webvoyager", "query": "Find information on the latest (as of today's date) MacBook model, including its key features such as processor type, memory size, and storage capacity.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--10", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Macbook Pro; processor type: Apple M3 chip, Apple M3 Pro chip, Apple M3 Max chip; memory size: 8GB, 16GB, 18GB, 24GB, 36GB, 48GB, 64GB, 96GB, 128GB; storage capacity: 512GB, 1TB, 2TB, 4TB, 8TB", "answer_type": "possible"}}}
{"query_id": "Apple--11", "dataset": "webvoyager", "query": "Get information about the latest iPad model released by Apple, including its release date, base storage capacity, and starting price available on Apple's official website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--11", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "sixth-generation iPad Pro 11inch, iPad Pro 12.9inch; release date: October 26, 2022; base storage capacity 128 GB, starting price $799", "answer_type": "possible"}}}
{"query_id": "Apple--12", "dataset": "webvoyager", "query": "What Apple Repair ways are mentioned on apple website, answer 2 of them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--12", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Any 2 of 'Send your product to Apple', 'Find an Apple Authorized Service Provider', 'Visit a Genius at an Apple Store', 'Independent Repair Providers', 'Self Service Repair'", "answer_type": "golden"}}}
{"query_id": "Apple--13", "dataset": "webvoyager", "query": "How many colors does the latest MacBook Air come in?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--13", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "4, Silver, Starlight, Space Gray, and Midnight", "answer_type": "possible"}}}
{"query_id": "Apple--14", "dataset": "webvoyager", "query": "Identify the upgrade options available for the cheapest base model of the MacBook Pro 14-inch with M3 chip, and calculate the total price difference from the base model to the maximum upgrade (no Pre-Installed Software) offered by Apple.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--14", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Base model:$1599, difference: $1020", "answer_type": "possible"}}}
{"query_id": "Apple--15", "dataset": "webvoyager", "query": "On Apple's website, how many different types of keyboards are available when customizing your 14-inch MacBook Pro?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--15", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "16", "answer_type": "possible"}}}
{"query_id": "Apple--16", "dataset": "webvoyager", "query": "Find on Apple website how many types of AirPods (3rd generation) are available and what is the price difference.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--16", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "2 types, price difference $10", "answer_type": "possible"}}}
{"query_id": "Apple--17", "dataset": "webvoyager", "query": "Search Apple for the accessory Smart Folio for iPad and check the closest pickup availability next to zip code 90038.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--17", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Apple Tower Theatre", "answer_type": "golden"}}}
{"query_id": "Apple--18", "dataset": "webvoyager", "query": "Check if there are trade-in offers for the latest model of iPhone.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--18", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "There are trade-in offers.", "answer_type": "golden"}}}
{"query_id": "Apple--19", "dataset": "webvoyager", "query": "On Apple's website, what is the slogan for the Mac and what is the slogan for the Macbook pro.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--19", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "If you can dream it, Mac can do it; Mind-blowing. Head-turning", "answer_type": "golden"}}}
{"query_id": "Apple--20", "dataset": "webvoyager", "query": "Check the price for an Apple iPhone 14 Plus with 256GB storage in Purple color.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--20", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "From $899 or $37.45/mo.per month for 24 mo.months", "answer_type": "possible"}}}
{"query_id": "Apple--21", "dataset": "webvoyager", "query": "Identify the available storage options for the latest iPad Pro on the Apple website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--21", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "128GB, 256GB, 512GB, 1TB, and 2TB", "answer_type": "possible"}}}
{"query_id": "Apple--22", "dataset": "webvoyager", "query": "Find out the trade-in value for an iPhone 13 Pro Max in good condition on the Apple website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--22", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPhone 13 Pro Max, Up to $500", "answer_type": "possible"}}}
{"query_id": "Apple--23", "dataset": "webvoyager", "query": "Determine the price difference between the latest series of Apple Watch and Apple Watch SE on the Apple website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--23", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Apple Watch SE From $249, Apple Watch Series 9 From $399", "answer_type": "possible"}}}
{"query_id": "Apple--24", "dataset": "webvoyager", "query": "Find out the starting price for the most recent model of the iMac on the Apple website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--24", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "$1299.00", "answer_type": "possible"}}}
{"query_id": "Apple--25", "dataset": "webvoyager", "query": "On the Apple website, look up the processor for the latest model of the Apple TV.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--25", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Apple TV 4K: A15 Bionic chip", "answer_type": "possible"}}}
{"query_id": "Apple--26", "dataset": "webvoyager", "query": "Find the maximum video recording resolution supported by the latest iPad mini on the Apple website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--26", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "4K video recording at 24 fps, 25 fps, 30 fps, or 60 fps", "answer_type": "possible"}}}
{"query_id": "Apple--27", "dataset": "webvoyager", "query": "On Apple's website, check if the HomePod mini in store is available in multiple colors and list them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--27", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Available in multiple colors: Space Gray, Blue, Yellow, White, and Orange.", "answer_type": "possible"}}}
{"query_id": "Apple--28", "dataset": "webvoyager", "query": "On the Apple website, find out if the Mac Mini can be configured with a GPU larger than 16-core.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--28", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Yes. Mac mini Apple M2 Pro chip, Configurable to: 19-core GPU", "answer_type": "golden"}}}
{"query_id": "Apple--29", "dataset": "webvoyager", "query": "On Apple's website, check the estimated battery life of the latest MacBook Air during web browsing in Tech Specs.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--29", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Up to 15 hours wireless web", "answer_type": "possible"}}}
{"query_id": "Apple--30", "dataset": "webvoyager", "query": "Check the storage options and prices for the latest iPad Pro models on Apple's website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--30", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "11-inch, 128GB from $799, 256GB from $899, 512GB from $1099, 1TB from $1499, and 2TB from $1899.", "answer_type": "possible"}}}
{"query_id": "Apple--31", "dataset": "webvoyager", "query": "On Apple's website, what is the slogan for the latest Apple Watch Series.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--31", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Smarter. Brighter. Mightier.", "answer_type": "golden"}}}
{"query_id": "Apple--32", "dataset": "webvoyager", "query": "Investigate the trade-in value for an iPhone 11 Pro Max on Apple's website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--32", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPhone 11 Pro Max\tUp to $270", "answer_type": "possible"}}}
{"query_id": "Apple--33", "dataset": "webvoyager", "query": "Look for the color options available for the newest iMac.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--33", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Blue, Green, Pink, Silver, Yellow, Orange, Purple", "answer_type": "possible"}}}
{"query_id": "Apple--34", "dataset": "webvoyager", "query": "Identify the size and weight for the Apple TV 4K and list the Siri Remote features introduced.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--34", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Height: 1.2 inches (31 mm), Width: 3.66 inches (93 mm), Depth: 3.66 inches (93 mm); Siri Remote features", "answer_type": "possible"}}}
{"query_id": "Apple--35", "dataset": "webvoyager", "query": "How many types of Apple Pencil are currently available on the Apple's website? Which one supports Wireless pairing and charging.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--35", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "3, Apple Pencil (2nd generation), Apple Pencil (USB-C), Apple Pencil (1st generation); Apple Pencil (2nd generation) supports Wireless pairing and charging.", "answer_type": "possible"}}}
{"query_id": "Apple--36", "dataset": "webvoyager", "query": "Browse Apple Music on the entertainment section of the Apple's website, and see which singers' names are included in the pictures on this page.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--36", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Lauren Daigle, Megan Moroney, Olivia Rodrigo ...", "answer_type": "possible"}}}
{"query_id": "Apple--37", "dataset": "webvoyager", "query": "Compare the color options of iPhone 13 Pro, iPhone 14 Pro and iPhone 15 Pro.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--37", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "iPhone 13 pro: Alpine Green, Silver, Gold, Graphite, Sierra Blue; iPhone 14 pro: Deep Purple, Gold, Silver, Space Black; iPhone 15 pro: Natural Titanium, Blue Titanium, White Titanium, Black Titanium", "answer_type": "golden"}}}
{"query_id": "Apple--38", "dataset": "webvoyager", "query": "Explore accessories for Apple Vision Pro, list at least three accessories.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--38", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Apple Vision Pro Battery; Apple Vision Pro Travel Case; ZEISS Optical Inserts ...", "answer_type": "possible"}}}
{"query_id": "Apple--39", "dataset": "webvoyager", "query": "Find solutions on Apple's website if you forgot your Apple ID password.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--39", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "The fastest and easiest way to reset your password is with your iPhone or other trusted Apple device — one that you're already signed in to with your Apple ID, so that we know that it's yours.", "answer_type": "possible"}}}
{"query_id": "Apple--40", "dataset": "webvoyager", "query": "Find information on Apple website, and tell me the device weight of Apple Vision Pro and list 5 Built-in Apps it supports.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--40", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "Device Weight, 21.222.9 ounces (600650 g); Builtin Apps: App Store, Encounter Dinosaurs, Files, Freeform, Keynote...", "answer_type": "possible"}}}
{"query_id": "Apple--41", "dataset": "webvoyager", "query": "How much does it cost to buy an ipad mini with 64GB storage and Wi-Fi + Cellular connectivity? (no engraving, no apple pencil, no smart folio, no apple trade-in).", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--41", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "$649", "answer_type": "possible"}}}
{"query_id": "Apple--42", "dataset": "webvoyager", "query": "Find updates for Apple Watch Series 7,8,9 on Apple's website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.apple.com/", "metadata": {"original_task_id": "Apple--42", "website": "Apple", "category": "Apple", "additional": {"ground_truth": "see https://www.apple.com/watch/compare/, <summary>", "answer_type": "possible"}}}
{"query_id": "ArXiv--0", "dataset": "webvoyager", "query": "Search for the latest preprints about 'quantum computing'.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--0", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Any paper related to quantum computing (latest)", "answer_type": "possible"}}}
{"query_id": "ArXiv--1", "dataset": "webvoyager", "query": "Search for the latest research papers on quantum computing submitted to ArXiv within the last two days.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--1", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Paper related to quantum computing (latest 2 days)", "answer_type": "possible"}}}
{"query_id": "ArXiv--2", "dataset": "webvoyager", "query": "Look up the most recent papers related to 'cs.CL', select one and show its abstract.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--2", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "cs.CL paper, <abstract>", "answer_type": "possible"}}}
{"query_id": "ArXiv--3", "dataset": "webvoyager", "query": "Locate the most recent research paper about 'Algebraic Topology' under Mathematics published on ArXiv. Provide the title of the paper, the name of the authors, and the abstract.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--3", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "math.AT paper, <title>, <authors>, <abstract>", "answer_type": "possible"}}}
{"query_id": "ArXiv--4", "dataset": "webvoyager", "query": "Find the most recent research papers in Astrophysics of Galaxies. How many papers have been announced in the last day?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--4", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "22 Dec 2023, 18 (real-time)", "answer_type": "possible"}}}
{"query_id": "ArXiv--5", "dataset": "webvoyager", "query": "Search papers about \"quantum computing\" which has been submitted to the Quantum Physics category on ArXiv. How many results in total. What if search in all archives?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--5", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "23081 results, searching in archive quant-ph; 39482 results, search in all archives", "answer_type": "possible"}}}
{"query_id": "ArXiv--6", "dataset": "webvoyager", "query": "How many figures and tables are in the paper \"On the Sentence Embeddings from Pre-trained Language Models\"?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--6", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "2 Figures, 8 Tables.", "answer_type": "golden"}}}
{"query_id": "ArXiv--7", "dataset": "webvoyager", "query": "Find the most recent paper submitted on machine learning in the Computer Science category posted on ArXiv.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--7", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Latest cs.LG paper", "answer_type": "possible"}}}
{"query_id": "ArXiv--8", "dataset": "webvoyager", "query": "What is the latest news on ArXiv?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--8", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "'Accessibility update: arXiv now offers papers in HTML format' (December 21, 2023)", "answer_type": "possible"}}}
{"query_id": "ArXiv--9", "dataset": "webvoyager", "query": "Find the latest research paper about neural networks published on ArXiv which has been submitted within the last week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--9", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Latest paper related to neural networks", "answer_type": "possible"}}}
{"query_id": "ArXiv--10", "dataset": "webvoyager", "query": "Visit ArXiv Help on how to withdraw an article if the submission is not yet announced.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--10", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "If your submission has not yet become publicly available you may delete or delay it. To do either of these things go to your user page and select either the Delete or Unsubmit icon.", "answer_type": "golden"}}}
{"query_id": "ArXiv--11", "dataset": "webvoyager", "query": "For Non-English submissions, do I need to provide a multi-language abstract, if need, answer the separator between the multiple abstracts.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--11", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "-----", "answer_type": "golden"}}}
{"query_id": "ArXiv--12", "dataset": "webvoyager", "query": "Find store in arXiv Help, tell me how many styles of arXiv Logo Shirt are available?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--12", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "3", "answer_type": "golden"}}}
{"query_id": "ArXiv--13", "dataset": "webvoyager", "query": "How many articles on ArXiv with 'SimCSE' in the title?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--13", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "4", "answer_type": "possible"}}}
{"query_id": "ArXiv--14", "dataset": "webvoyager", "query": "On ArXiv, how many articles have 'SimCSE' in the article and are originally announced in October 2023?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--14", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "3", "answer_type": "golden"}}}
{"query_id": "ArXiv--15", "dataset": "webvoyager", "query": "Searching Chinese Benchmark on ArXiv, how many papers announced in December 2023 mention being accepted for AAAI 2024?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--15", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "2", "answer_type": "possible"}}}
{"query_id": "ArXiv--16", "dataset": "webvoyager", "query": "Locate the latest research about gravitational waves that were uploaded to ArXiv this week and provide a brief summary of one article's main findings.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--16", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Latest gravitational waves paper, <summary>", "answer_type": "possible"}}}
{"query_id": "ArXiv--17", "dataset": "webvoyager", "query": "Find the paper 'GPT-4 Technical Report', when was v3 submitted?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--17", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Mon, 27 Mar 2023 17:46:54 UTC", "answer_type": "golden"}}}
{"query_id": "ArXiv--18", "dataset": "webvoyager", "query": "Download the paper 'Dense Passage Retrieval for Open-Domain Question Answering'. How many formulas are in the article and which one is the loss function?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--18", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "2 formulas, the second one is loss function", "answer_type": "golden"}}}
{"query_id": "ArXiv--19", "dataset": "webvoyager", "query": "Which university maintains and manages ArXiv. Accessing the university's website from ArXiv, how many underegraduate students are currently at the university.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--19", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Cornell University, 16071 UNDERGRADUATE STUDENTS", "answer_type": "possible"}}}
{"query_id": "ArXiv--20", "dataset": "webvoyager", "query": "Find the latest paper on 'machine learning in the Statistics section of ArXiv and provide its abstract.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--20", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "stat.ML paper, <abstract>", "answer_type": "possible"}}}
{"query_id": "ArXiv--21", "dataset": "webvoyager", "query": "Search for papers on 'neural networks for image processing' in the Computer Science category on ArXiv and report how many were submitted in the last week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--21", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "cs paper related to 'neural networks for image processing',", "answer_type": "possible"}}}
{"query_id": "ArXiv--22", "dataset": "webvoyager", "query": "Locate the ArXiv Help section and find instructions on how to subscribe to daily listing emails for new submissions in a specific category.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--22", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "To: arch-ive@arxiv.org \\n Subject: subscribe Your Full Name", "answer_type": "possible"}}}
{"query_id": "ArXiv--23", "dataset": "webvoyager", "query": "Determine how many articles with the keyword 'autonomous vehicles' were published in the 'Electrical Engineering and Systems Science' section of ArXiv yesterday.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--23", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "eess.SY paper related to autonomous vehicles", "answer_type": "possible"}}}
{"query_id": "ArXiv--24", "dataset": "webvoyager", "query": "Identify the most recent paper related to 'graph neural networks' on ArXiv and determine the affiliation of the first author.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--24", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "paper related to graph neural networks", "answer_type": "possible"}}}
{"query_id": "ArXiv--25", "dataset": "webvoyager", "query": "Browse the ArXiv store and let me know how many different types of merchandise are available.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--25", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "6, arXiv Logo Shirt, arXiv Logo Mug, arXiv is Open Science, Gift cards, arXiv Morning Mug, arXiv Forever", "answer_type": "golden"}}}
{"query_id": "ArXiv--26", "dataset": "webvoyager", "query": "Search for papers related to 'climate change modeling' on ArXiv and find out how many have been published in the Earth and Planetary Astrophysics (astro-ph.EP) category in the last week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--26", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "astro-ph.EP paper related to climate change modeling last week", "answer_type": "possible"}}}
{"query_id": "ArXiv--27", "dataset": "webvoyager", "query": "On ArXiv, what categories does Economics include, and what are their abbreviations?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--27", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Econometrics (econ.EM), General Economics (econ.GN), and Theoretical Economics (econ.TH)", "answer_type": "golden"}}}
{"query_id": "ArXiv--28", "dataset": "webvoyager", "query": "Search 'Poly encoder' by title on ArXiv and check whether the articles in the search results provide HTML access.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--28", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "'Persona-Coded Poly-Encoder: Persona-Guided Multi-Stream Conversational Sentence Scoring', Access include: HTML (experimental)", "answer_type": "possible"}}}
{"query_id": "ArXiv--29", "dataset": "webvoyager", "query": "On ArXiv, search for papers with 'Neural Network Optimization' in the title published in 2023, and provide the number of such papers.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--29", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "240+ (search by title)", "answer_type": "possible"}}}
{"query_id": "ArXiv--30", "dataset": "webvoyager", "query": "Look up the submission guidelines on ArXiv for submitting a paper and tell me the formats for figures.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--30", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Accepted figure formats: PostScript (PS, EPS) — requires LaTeX processing; JPEG, GIF, PNG or PDF figures — requires PDFLaTeX processing", "answer_type": "golden"}}}
{"query_id": "ArXiv--31", "dataset": "webvoyager", "query": "Search ArXiv for papers with 'Graph Neural Networks' in the abstract that were submitted between Jan 1, 2024, and Jan 3, 2024, and determine how many of these papers have more than five authors.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--31", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "7 papers", "answer_type": "golden"}}}
{"query_id": "ArXiv--32", "dataset": "webvoyager", "query": "Locate the latest paper on ArXiv within the 'Nonlinear Sciences - Chaotic Dynamics' category, summarize the abstract and note the submission date.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--32", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "latest nlin.CD paper, <abstract>, <date>", "answer_type": "possible"}}}
{"query_id": "ArXiv--33", "dataset": "webvoyager", "query": "Query ArXiv for the latest research article in the category of Systems and Control under Computer Science. Summarize the main objective or hypothesis presented in the paper and provide the names of the authors.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--33", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "eess.SY paper", "answer_type": "possible"}}}
{"query_id": "ArXiv--34", "dataset": "webvoyager", "query": "Search for the most recent paper related to non-commutative geometry submitted by an author with the first name John. Provide the title and the abstract.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--34", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Finite spectral triples for the fuzzy torus, Authors: John W. Barrett, James Gaunt, <abstract>", "answer_type": "possible"}}}
{"query_id": "ArXiv--35", "dataset": "webvoyager", "query": "Retrieve the latest research paper in Quantum Physics from ArXiv and provide the title, author(s), and date of submission.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--35", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "paper related to Quantum Physics", "answer_type": "possible"}}}
{"query_id": "ArXiv--36", "dataset": "webvoyager", "query": "Search 'CVPR 2023' and 'CVPR2023' through journal ref on ArXiv to see how many results there are respectively.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--36", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "'CVPR 2023': 48 results; 'CVPR2023': 9 results", "answer_type": "golden"}}}
{"query_id": "ArXiv--37", "dataset": "webvoyager", "query": "Find the names of people in ArXiv's Leadership Team.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--37", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "Ramin Zabih, Yoav Artzi, Stephanie Orphan, Steinn Sigurdsson, and Charles Frankston.", "answer_type": "golden"}}}
{"query_id": "ArXiv--38", "dataset": "webvoyager", "query": "Find the ArXiv Blog on the ArXiv website and summarize the content of its latest article.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--38", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "'Attention arXiv users: Re-implemented RSS', January 31, 2024, <summary>", "answer_type": "possible"}}}
{"query_id": "ArXiv--39", "dataset": "webvoyager", "query": "Search the title 'GPT-4 Technical Report' and access this paper through HTML format. Read the paper on this page and tell me what is 'one of the main goals of developing such models' mentioned in the Introduction.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--39", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "One of the main goals of developing such models is to improve their ability to understand and generate natural language text, particularly in more complex and nuanced scenarios.", "answer_type": "golden"}}}
{"query_id": "ArXiv--40", "dataset": "webvoyager", "query": "How many articles are there on each of the three most recent announce days in the Solar and Stellar Astrophysics section of ArXiv. Choose one at random and answer its title and when the first version was uploaded?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--40", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "astro-ph.SR paper, latest 3 days", "answer_type": "possible"}}}
{"query_id": "ArXiv--41", "dataset": "webvoyager", "query": "Find the button to share arxiv non-profit store and follow the QR code to share the shop. Then add arXiv Forever short sleeve (XL) to your cart.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--41", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "QR code image, Action: add to chart", "answer_type": "golden"}}}
{"query_id": "ArXiv--42", "dataset": "webvoyager", "query": "Find an article published between 1 January 2000 and 1 January 2005 that requires Support Vector Machines in the title and its Journey ref is ACL Workshop.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://arxiv.org/", "metadata": {"original_task_id": "ArXiv--42", "website": "ArXiv", "category": "ArXiv", "additional": {"ground_truth": "'Using a Support-Vector Machine for Japanese-to-English Translation of Tense, Aspect, and Modality'", "answer_type": "golden"}}}
{"query_id": "BBC News--0", "dataset": "webvoyager", "query": "Find a report on the BBC News website about recent developments in renewable energy technologies in the UK.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--0", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<report> (about developments in renewable energy technologies in the UK)", "answer_type": "possible"}}}
{"query_id": "BBC News--1", "dataset": "webvoyager", "query": "Read the latest health-related news article published on BBC News and summarize the key points discussed.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--1", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<summary> (about latest health-related article)", "answer_type": "possible"}}}
{"query_id": "BBC News--2", "dataset": "webvoyager", "query": "Read the latest article regarding the environmental impacts of deforestation published within the last two days.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--2", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<article> (within the last 2 days)", "answer_type": "possible"}}}
{"query_id": "BBC News--3", "dataset": "webvoyager", "query": "Check the leaderboard for Golf's DP World Tour in the SPORT section, what was the name of the most recent tournament, and how many teams have a Total of -10 strokes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--3", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Mauritius Open; 5", "answer_type": "possible"}}}
{"query_id": "BBC News--4", "dataset": "webvoyager", "query": "Find the latest article regarding the economic implications of climate change in Europe as reported by BBC News and summarize the central points.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--4", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<summary> (economic implications of climate change in Europe)", "answer_type": "possible"}}}
{"query_id": "BBC News--5", "dataset": "webvoyager", "query": "Find the article \"What is climate change? A really simple guide\" and use it to answer what human activities are causing climate change.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--5", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "This recent climate change has been caused by human activity, mainly the widespread use of fossil fuels - coal, oil and gas - in homes, factories and transport.", "answer_type": "golden"}}}
{"query_id": "BBC News--6", "dataset": "webvoyager", "query": "Find the top story from BBC News in the technology section for today.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--6", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Latest news in Innovation - Technology", "answer_type": "possible"}}}
{"query_id": "BBC News--7", "dataset": "webvoyager", "query": "Find a AI-related story under Technology of Business. What is in the first picture in the story?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--7", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Analyse the first image in story.", "answer_type": "possible"}}}
{"query_id": "BBC News--8", "dataset": "webvoyager", "query": "Get a brief overview of the economic implications of the UK's latest trade deal posted on BBC News and the date when the article was published.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--8", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "CPTPP trade deal, <summary>; 16th July 2023", "answer_type": "possible"}}}
{"query_id": "BBC News--9", "dataset": "webvoyager", "query": "Find out which musician made the headlines in Music News.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--9", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Taylor Swift", "answer_type": "possible"}}}
{"query_id": "BBC News--10", "dataset": "webvoyager", "query": "Identify the main headlines covering the UK's plan to tackle climate change on BBC News.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--10", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "News about UK's plan to tackle climate change", "answer_type": "possible"}}}
{"query_id": "BBC News--11", "dataset": "webvoyager", "query": "Find out how many teams are in the Scottish Premiership of the Football Tournament and when did the Hibernian team's most recent match start?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--11", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "12 teams, 15:00, 2 Jan 2024", "answer_type": "possible"}}}
{"query_id": "BBC News--12", "dataset": "webvoyager", "query": "Find a picture in the travel section that contains food, tell me what the food is called and what region it comes from.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--12", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "ramen, Tokyo", "answer_type": "possible"}}}
{"query_id": "BBC News--13", "dataset": "webvoyager", "query": "Search for recent news related to Trump and summarize the main points.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--13", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<summary> (about Trump)", "answer_type": "possible"}}}
{"query_id": "BBC News--14", "dataset": "webvoyager", "query": "Find a news article on BBC News about the impact of the recent tech industry layoffs on the global economy. Summarize the key points and the name of the author, and provide the date of publication.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--14", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<title>, <author>, <summary> (impact of the recent tech industry layoffs on the global economy)", "answer_type": "possible"}}}
{"query_id": "BBC News--15", "dataset": "webvoyager", "query": "What does the current headline in Natural Wonders tell about.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--15", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Name that whale! How AI aces animal spotting", "answer_type": "possible"}}}
{"query_id": "BBC News--16", "dataset": "webvoyager", "query": "Identify the most recent development or update in Brexit negotiations as reported on BBC News and report the key points and any stated impacts on European economies.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--16", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "<summary> (Brexit negotiations)", "answer_type": "possible"}}}
{"query_id": "BBC News--17", "dataset": "webvoyager", "query": "How many War related sections are currently in BBC News.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--17", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "2", "answer_type": "possible"}}}
{"query_id": "BBC News--18", "dataset": "webvoyager", "query": "Visit BBC News Audio, What are the best PodCasts for 2023? List 2 of them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--18", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "2 of them: Believe in Magic, The Gift, Vishal, A Very British Cult, People Who Knew Me, History's Secret Heroes", "answer_type": "possible"}}}
{"query_id": "BBC News--19", "dataset": "webvoyager", "query": "Visit the Athletics calendar for the date of the next earliest game.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--19", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "17th - 18th February 2024", "answer_type": "possible"}}}
{"query_id": "BBC News--20", "dataset": "webvoyager", "query": "Find the latest article in the Green Living section on BBC News and provide a summary of its main points.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--20", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Earth - Green Living, <article>, <summary>", "answer_type": "possible"}}}
{"query_id": "BBC News--21", "dataset": "webvoyager", "query": "Identify the top headline in the World News section on BBC News and describe the region it is related to.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--21", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "News - World, <headline>, <region>", "answer_type": "possible"}}}
{"query_id": "BBC News--22", "dataset": "webvoyager", "query": "Determine the current top business story on BBC News and give a brief overview of its economic implications.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--22", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Business, <article>, <summary>, economic implications", "answer_type": "possible"}}}
{"query_id": "BBC News--23", "dataset": "webvoyager", "query": "Identify the latest health-related news on BBC News and summarize the main findings or recommendations.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--23", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Innovation - Science & Health, <article>, <summary>", "answer_type": "possible"}}}
{"query_id": "BBC News--24", "dataset": "webvoyager", "query": "Search the latest article about space exploration on BBC News and summarize its key points.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--24", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Search for space exploration, eg. SpaceX blasts private firm's lunar lander into orbit", "answer_type": "possible"}}}
{"query_id": "BBC News--25", "dataset": "webvoyager", "query": "Find the most recent sports analysis article on BBC News related to the English Premier League and summarize its key insights.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--25", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Sport - Football - Leagues & Cups - Premier League, <article>", "answer_type": "possible"}}}
{"query_id": "BBC News--26", "dataset": "webvoyager", "query": "Locate the latest report on BBC News about the impact of recent natural disasters in Asia and summarize the key points and areas affected.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--26", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Earth - Weather & Science, eg. Indonesia hit by some of strongest winds recorded", "answer_type": "possible"}}}
{"query_id": "BBC News--27", "dataset": "webvoyager", "query": "Find the most recent article on BBC News about archaeological discoveries and summarize the main findings and their significance.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--27", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Archaeological discoveries: eg, Historical 10,000BC artefacts found on road project, Significant discoveries", "answer_type": "possible"}}}
{"query_id": "BBC News--28", "dataset": "webvoyager", "query": "Find the Market Data section on BBC News and tell me which company the data comes from.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--28", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Business - Market Data, Source: Morningstar", "answer_type": "golden"}}}
{"query_id": "BBC News--29", "dataset": "webvoyager", "query": "Visit BBC News Audio and find out which podcast episode is currently featured as the \"New Releases\".", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--29", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Audio - Podcasts - New Releases ...", "answer_type": "possible"}}}
{"query_id": "BBC News--30", "dataset": "webvoyager", "query": "In the Culture section, identify the latest film release reviewed and provide a brief summary of the review.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--30", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Culture - Film & TV, <review>, <summary>", "answer_type": "possible"}}}
{"query_id": "BBC News--31", "dataset": "webvoyager", "query": "Check the Sports section for the result of the most recent Manchester United football match.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--31", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Sunday 11th February, Aston Villa 1:2 Manchester United", "answer_type": "possible"}}}
{"query_id": "BBC News--32", "dataset": "webvoyager", "query": "Find the artificial intelligence section, what is the top headline at this time, and which companies are involved?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--32", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Innovation - Artificial Intelligence, <headline>, <companies>", "answer_type": "possible"}}}
{"query_id": "BBC News--33", "dataset": "webvoyager", "query": "In the World News section, find the latest war situations of Middle East and provide a brief summary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--33", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "News - Israel-Gaza War, <article>, <summary>", "answer_type": "possible"}}}
{"query_id": "BBC News--34", "dataset": "webvoyager", "query": "Find The SpeciaList section in Travel and browse the page to see which cities are mentioned.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--34", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Sydney, New York, Tenerife ...", "answer_type": "possible"}}}
{"query_id": "BBC News--35", "dataset": "webvoyager", "query": "In the Asia section, browse and identify the most recent report about technological advancements and summarize its content.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--35", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "News - World - Asia, <article>, <summary>", "answer_type": "possible"}}}
{"query_id": "BBC News--36", "dataset": "webvoyager", "query": "Look up recent articles in the Africa news section in World, summarize what topics most of these news are about", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--36", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "News - World - Africa, <article>, <summary>", "answer_type": "possible"}}}
{"query_id": "BBC News--37", "dataset": "webvoyager", "query": "Identify the latest book review featured in the Culture section and provide the title and author of the book.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--37", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Culture - Books, eg, Sloane Crosley: What to do when you lose a friend", "answer_type": "possible"}}}
{"query_id": "BBC News--38", "dataset": "webvoyager", "query": "Find news related to the storm in Weather section and indicate where and when the severe weather occurred.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--38", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Earth - Weather & Science, article about severe weather, eg, You can't hear it, but this sound can reveal that a tornado is on its way", "answer_type": "possible"}}}
{"query_id": "BBC News--39", "dataset": "webvoyager", "query": "Check the Horse Racing results in Sport section, browse all the games that took place yesterday and see which one had the highest number of runners.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--39", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "eg, 2024-01-30: Chepstow Summer Sessions Handicap Chase, 13 runners", "answer_type": "possible"}}}
{"query_id": "BBC News--40", "dataset": "webvoyager", "query": "Read and summarise a recent story on BBC News about people being injured or killed in wars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--40", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "News - Israel-Gaza War, <article>", "answer_type": "possible"}}}
{"query_id": "BBC News--41", "dataset": "webvoyager", "query": "Find Golf in BBC News, check the Leaderboard at this point in Women's Majors and count which country has the most players in the top 20? Which player has the best score amongst the Australian players and in what place.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.bbc.com/news/", "metadata": {"original_task_id": "BBC News--41", "website": "BBC News", "category": "BBC News", "additional": {"ground_truth": "Sport - Golf - Leaderboard - Women's Majors, most in top20: American, best in Australian: Grace Kim in 36", "answer_type": "possible"}}}
{"query_id": "Booking--0", "dataset": "webvoyager", "query": "Find a Mexico hotel with deals for December 25-26.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--0", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Be Local", "answer_type": "possible"}}}
{"query_id": "Booking--1", "dataset": "webvoyager", "query": "Find the cheapest available hotel room for a three night stay from 1st Jan in Jakarta. The room is for 2 adults, just answer the cheapest hotel room and the price.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--1", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "OYO 3755 Sweet Home, US$14", "answer_type": "possible"}}}
{"query_id": "Booking--2", "dataset": "webvoyager", "query": "Find a hotel in Ohio From December 20th to December 23th for 3 adults and 2 rooms.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--2", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Berlin Heritage Inn, US$549 for 3 adults and 2 rooms", "answer_type": "possible"}}}
{"query_id": "Booking--3", "dataset": "webvoyager", "query": "Find a hotel with 4 star and above rating in Los Angeles for 3 days from Dec 18th.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--3", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Freehand Los Angeles", "answer_type": "possible"}}}
{"query_id": "Booking--4", "dataset": "webvoyager", "query": "Search for the cheapest Hotel near Kashi Vishwanath Temple that offer breakfast from Dec 25th - Dec 26th.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--4", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Moonlight Residency, Breakfast included, US$14", "answer_type": "possible"}}}
{"query_id": "Booking--5", "dataset": "webvoyager", "query": "Search a hotel with free WiFi and air conditioning in Bali from Jan 1 to Jan 4, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--5", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Palasari Villa, free WiFi and air conditioning", "answer_type": "possible"}}}
{"query_id": "Booking--6", "dataset": "webvoyager", "query": "Book one room which provides breakfast, and airport shuttle from Jan 22 to 25 in Los Angeles.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--6", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "La Quinta by Wyndham LAX", "answer_type": "possible"}}}
{"query_id": "Booking--7", "dataset": "webvoyager", "query": "Find a hotel room on January 3-6 that is closest to National University of Singapore and costs less than $500", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--7", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Fragrance Hotel - Ocean View", "answer_type": "possible"}}}
{"query_id": "Booking--8", "dataset": "webvoyager", "query": "Get the hotel with highest review score and free cancelation in Chennai for 20/12/2023 - 21/12/2023.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--8", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "OYO Flagship Valasaravakkam", "answer_type": "possible"}}}
{"query_id": "Booking--9", "dataset": "webvoyager", "query": "Find hotels for 2 adults in London with a price less than 250 dollars for four days starting from December 25. You must browse the page and offer at least 3 options.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--9", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "The Birds Nest Hostel; Umbrella Properties London Excel; Umbrella Properties London Woolwich", "answer_type": "possible"}}}
{"query_id": "Booking--10", "dataset": "webvoyager", "query": "Find a well-reviewed hotel in Paris with available bookings suitable for a couple (2 adults) on Valentine's Day week, February 14-21, 2024, that offers free cancellation options.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--10", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Villa Alessandra", "answer_type": "possible"}}}
{"query_id": "Booking--11", "dataset": "webvoyager", "query": "Reserve a hotel in downtown Chicago with a rating of 9 or higher for a stay from March 20-27, 2024, which offers free cancellation and includes a fitness center.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--11", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Pendry Chicago", "answer_type": "possible"}}}
{"query_id": "Booking--12", "dataset": "webvoyager", "query": "Find a hotel in Paris with a customer review score of 8 or higher, free Wi-Fi, and available for a 5-night stay starting on January 5th, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--12", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Mode Paris Aparthotel", "answer_type": "possible"}}}
{"query_id": "Booking--13", "dataset": "webvoyager", "query": "Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--13", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Le Bellevue", "answer_type": "possible"}}}
{"query_id": "Booking--14", "dataset": "webvoyager", "query": "Book a highly-rated hotel with a swimming pool and free WiFi near the Louvre Museum in Paris for the weekend of March 3-5, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--14", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Nolinski Paris", "answer_type": "possible"}}}
{"query_id": "Booking--15", "dataset": "webvoyager", "query": "Find the highest-rated luxury hotel in Rome available for booking from January 10, 2024, to January 20, 2024, for 2 adults. Include the cost, amenities offered, and customer rating.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--15", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Rhinoceros; rating 9.2; cost US$5771; Amenities: air conditioning, free WiFi...", "answer_type": "possible"}}}
{"query_id": "Booking--16", "dataset": "webvoyager", "query": "Look for a hotel in Paris with a user rating of 9 or higher and available for a 5-night stay starting January 15, 2024. The hotel should also offer free Wi-Fi and breakfast included in the price. Provide the name, location, and price per night.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--16", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Zoku Paris; 48 Avenue de la Porte de Clichy, 17th arr., Paris; US$210 per night", "answer_type": "possible"}}}
{"query_id": "Booking--17", "dataset": "webvoyager", "query": "Find a hotel in Paris with a fitness center and a rating of 8 or higher available for a 5-night stay starting from February 14, 2024, and sort the results by best reviewed.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--17", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Villa-des-Prés", "answer_type": "possible"}}}
{"query_id": "Booking--18", "dataset": "webvoyager", "query": "Search a hotel in London with a user rating of 8 or higher for a stay between February 14th, 2024, and February 21st, 2024, suitable for a couple. Provide the name and a short description of the hotel.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--18", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Cromwell Serviced Apartments; Cromwell Serviced Apartments is an apartment featuring rooms with free Wifi and air conditioning in the center of London", "answer_type": "possible"}}}
{"query_id": "Booking--19", "dataset": "webvoyager", "query": "Look for a hotel with customer ratings above an 8.0 in Paris, France for a weekend stay from March 18, 2024, to March 20, 2024, and list top three suggestions based on user reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--19", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Hôtel des Arts Montmartre; Bulgari Hotel Paris; Four Seasons Hotel George V Paris", "answer_type": "possible"}}}
{"query_id": "Booking--20", "dataset": "webvoyager", "query": "Locate a hotel in Rome with a good rating (7 or above) that offers free cancellation and breakfast included, for a three-night stay from February 28 to March 2, 2024, for two adults.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--20", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "47 Boutique Hotel, 8.6 ratings, breakfast included, free cancellation", "answer_type": "possible"}}}
{"query_id": "Booking--21", "dataset": "webvoyager", "query": "Find a hotel in Sydney with a rating of 8 or higher, providing free Wi-Fi and parking, available for a four-night stay starting on March 10, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--21", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Lexie Suites, 9.1 ratings, free Wi-Fi and parking", "answer_type": "possible"}}}
{"query_id": "Booking--22", "dataset": "webvoyager", "query": "Search for a hotel in Amsterdam with a customer review score of 9 or higher, offering bicycle rentals, for a week-long stay from March 15 to March 22, 2024, for two adults.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--22", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "nhow Amsterdam Rai, 9.0 ratings, bicycle rentals", "answer_type": "possible"}}}
{"query_id": "Booking--23", "dataset": "webvoyager", "query": "Identify a hotel in Tokyo with a spa and wellness center, rated 9 or above, with availability for a five-night stay starting on February 20, 2024. Check if free cancellation is offered.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--23", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "The Peninsula Tokyo, 9.2 ratings, Spa and Fitness center", "answer_type": "possible"}}}
{"query_id": "Booking--24", "dataset": "webvoyager", "query": "Find a hotel in Barcelona for a stay from February 25-28, 2024. Please sort the results by distance from the beach and make sure they offer free Wi-Fi and breakfast.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--24", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Unite Hostel Barcelona, 8.2 ratings, 400m from beach, free Wi-Fi and breakfast", "answer_type": "possible"}}}
{"query_id": "Booking--25", "dataset": "webvoyager", "query": "Search for a hotel in Lisbon with airport shuttle, rated 8.5 or above, available for a six-night stay from March 1 to March 7, 2024, for two adults, breakfast included.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--25", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "The Homeboat Company Parque das Nações-Lisboa, 9.5 ratings, airport shuttle, breakfast included", "answer_type": "possible"}}}
{"query_id": "Booking--26", "dataset": "webvoyager", "query": "Check Booking.com for a 3-star hotel or higher in Paris with a guest rating above 8.0 and available parking for dates February 20-23, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--26", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "InterContinental Paris Le Grand, an IHG Hotel, US$2208, 8.6 ratings, 5-star, parking", "answer_type": "possible"}}}
{"query_id": "Booking--27", "dataset": "webvoyager", "query": "Locate a hotel in Melbourne offering free parking and free WiFi, for a stay from February 28 to March 4, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--27", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Nesuto Docklands, 8.9 ratings, free parking and free WiFi", "answer_type": "possible"}}}
{"query_id": "Booking--28", "dataset": "webvoyager", "query": "Find a hotel in Dubai with a swimming pool, for a week-long stay from February 22 to February 29, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--28", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Park Regis by Prince Dubai Islands, swimming pool", "answer_type": "possible"}}}
{"query_id": "Booking--29", "dataset": "webvoyager", "query": "Search for a hotel in Toronto with a fitness center and a rating of 8+, available for a two-night stay from March 5 to March 7, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--29", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Fairmont Royal York Hotel, 8.3 ratings, fitness center", "answer_type": "possible"}}}
{"query_id": "Booking--30", "dataset": "webvoyager", "query": "Search for hotels in London from March 20 to March 23, 2024, on Booking. How many hotels are left after applying the Breakfast included and Fitness center filters?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--30", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "After applying the Breakfast included and Fitness center: 228 hotels", "answer_type": "possible"}}}
{"query_id": "Booking--31", "dataset": "webvoyager", "query": "Search for hotels in Rio de Janeiro from March 1-7, 2024, check the Brands filter to see which brand has the most hotels and which brand has the fewest.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--31", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Brands has the most hotels: Windsor, Rede Atlântico; Brands has the fewest hotels: Ramada", "answer_type": "possible"}}}
{"query_id": "Booking--32", "dataset": "webvoyager", "query": "Look for hotels in Sydney from February 24 to February 27, 2024, on Booking. Once the Swimming Pool and Airport Shuttle filters are applied, what is the total number of hotels available?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--32", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Swimming Pool and Airport Shuttle filters are applied: 1 hotel", "answer_type": "possible"}}}
{"query_id": "Booking--33", "dataset": "webvoyager", "query": "Find the Customer Service on the Booking website, browse the questions about cancellation, and tell me 'how do I know whether my booking has been cancelled'.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--33", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "After you cancel a booking with us, you should get an email confirming the cancellation. Make sure to check your inbox and spam/junk mail folders. If you dont receive an email within 24 hours, contact the property to confirm they got your cancellation.", "answer_type": "golden"}}}
{"query_id": "Booking--34", "dataset": "webvoyager", "query": "Search for a hotel in Berlin available for a three-night stay from March 15 to March 18, 2024, for one adult. Tell me the price in USD and CNY for the three-night stay.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--34", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Hotel Adlon Kempinski Berlin, US$1185, CNY 8528", "answer_type": "possible"}}}
{"query_id": "Booking--35", "dataset": "webvoyager", "query": "Browse the booking website to get inspiration for your next trip, and summarize at least three places mentioned in one of the travel articles.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--35", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Ace Hotel, Downtown Los Angeles; The Hollywood Roosevelt; Hotel Indigo, an IHG Hotel", "answer_type": "possible"}}}
{"query_id": "Booking--36", "dataset": "webvoyager", "query": "Search for a budget hotel in Rome under $100 per night for one adult from March 20 to March 23, 2024. Sort the results by price, identify if any of top three results offer breakfast.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--36", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "ROMA GONDOLA SRLS, US$81, no breakfast", "answer_type": "possible"}}}
{"query_id": "Booking--37", "dataset": "webvoyager", "query": "Search for a resort (not hotel) in Bali, detailing the available dates between March 20, 2024, and March 25, 2024, and checking any provided tour or cultural experiences.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--37", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Kappa Senses Ubud, resort, Activity include: Tour or class about local culture", "answer_type": "possible"}}}
{"query_id": "Booking--38", "dataset": "webvoyager", "query": "Look up Vienna hotel options with availability for a 4-night stay from February 28 to March 4, 2024, with amenities that include a Parking, breakfast included, and a rating of 8+ on Booking.com.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--38", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "ARCOTEL Wimberger Wien, 8.2 ratings, Parking, breakfast included", "answer_type": "possible"}}}
{"query_id": "Booking--39", "dataset": "webvoyager", "query": "Find a pet-friendly hotel with parking available in downtown Toronto for the stay of February 24-26, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--39", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "One King West Hotel and Residence, pet-friendly hotel, parking", "answer_type": "possible"}}}
{"query_id": "Booking--40", "dataset": "webvoyager", "query": "I need to choose a hotel in Shenzhen, please select date (6 March to 8 March 2024) and click the search button. How much it costs when convert the price to Chinese Yuan on the page.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--40", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Four Seasons Hotel Shenzhen, US$522, CNY 3760", "answer_type": "possible"}}}
{"query_id": "Booking--41", "dataset": "webvoyager", "query": "Browse Booking's homepage to find out which company it belongs to.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--41", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Booking Holdings Inc.", "answer_type": "golden"}}}
{"query_id": "Booking--42", "dataset": "webvoyager", "query": "Search for a hotel in Hokkaido for the period March 1 to March 7, 2024, with a rating of 9+, check out its user reviews, which categories are greater than 9 and which are less than 9?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--42", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Heiseikan Shiosaitei Hanatsuki, 9.0 ratings, high: Staff 9.3, Facilities 9.0, Cleanliness 9.4, Comfort 9.3. low: Value for money 8.2, Location 8.7, Free WiFi 8.1", "answer_type": "possible"}}}
{"query_id": "Booking--43", "dataset": "webvoyager", "query": "Search for properties in Los Angeles, browse the results page to see what filters are available, list some of them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.booking.com/", "metadata": {"original_task_id": "Booking--43", "website": "Booking", "category": "Booking", "additional": {"ground_truth": "Breakfast Included, Wonderful: 9+, Fitness center ...", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--0", "dataset": "webvoyager", "query": "Look up the pronunciation and definition of the word \"sustainability\" on the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--0", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /səˌsteɪ.nəˈbɪl.ə.ti/, US: /səˌsteɪ.nəˈbɪl.ə.t̬i/; the quality of being able to continue over a period of time", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--1", "dataset": "webvoyager", "query": "Find the pronunciation, definition, and a sample sentence for the word 'serendipity'.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--1", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˌser.ənˈdɪp.ə.ti/, US: /ˌser.ənˈdɪp.ə.t̬i/; the fact of finding interesting or valuable things by chance; There is a real element of serendipity in archaeology.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--2", "dataset": "webvoyager", "query": "Look up the pronunciation, definition, and example sentence for the word \"ubiquitous\" in UK and US English.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--2", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /juːˈbɪk.wɪ.təs/, US: /juːˈbɪk.wə.t̬əs/; seeming to be everywhere; Leather is very much in fashion this season, as is the ubiquitous denim.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--3", "dataset": "webvoyager", "query": "Look up the definition, pronunciation, and examples of the word \"zeitgeist.\"", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--3", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈtsaɪt.ɡaɪst/ or /ˈzaɪt.ɡaɪst/, US: /ˈtsaɪt.ɡaɪst/ or /ˈzaɪt.ɡaɪst/; the general set of ideas, beliefs, feelings, etc. that is typical of a particular period in history; Our methods of working, then, were facilitated and in some ways strongly encouraged by the technologies available to us, the products of a zeitgeist of convergence.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--4", "dataset": "webvoyager", "query": "Look for the British English pronunciation of the word \"innovate\" and write down the International Phonetic Alphabet (IPA) notation, then find one example sentence provided in the Cambridge Dictionary that uses this word.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--4", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈɪn.ə.veɪt/; Above all, this proposal aims to correct the allocative inefficiencies of the existing patent system, while preserving the dynamic incentives to innovate.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--5", "dataset": "webvoyager", "query": "Learn the UK and US pronunciation of the word \"procrastination\", and find one example sentence that reflects its use in context.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--5", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /prəˌkræs.tɪˈneɪ.ʃən/, US: /proʊˌkræs.tɪˈneɪ.ʃən/; Vacillation and procrastination, out of fears of recession or otherwise, would run grave risks.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--6", "dataset": "webvoyager", "query": "Search for the word \"sustainability\" on the Cambridge Dictionary, what is the translation of sustainability into Chinese and French in the dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--6", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "可持续性; durabilité , viabilité", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--7", "dataset": "webvoyager", "query": "Look up the meaning, pronunciation, and an example sentence of the word \"gestalt\" using the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--7", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ɡəˈʃtælt/, US: /ɡəˈʃtɑːlt/; something such as a structure or experience that, when considered as a whole, has qualities that are more than the total of all its parts; In the comic and cartoon mythoses, however, most gestalts have one default transformation.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--8", "dataset": "webvoyager", "query": "Find three different meanings of \"dog\" in Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--8", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "a common animal with four legs, especially kept by people as a pet or to hunt or guard things; a man who is unpleasant or not to be trusted; to follow someone closely and continuously", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--9", "dataset": "webvoyager", "query": "Look up the British pronunciation of the word \"euphoria\" and find an example sentence using that word on the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--9", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /juːˈː.ri.ə/; They were in a state of euphoria for days after they won the prize.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--10", "dataset": "webvoyager", "query": "Look up the definition and pronunciation of the word \"impeccable\" and also find an example sentence using that word.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--10", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ɪmˈpek.ə.bəl/, US: /ɪmˈpek.ə.bəl/; perfect, with no problems or bad parts; His English is impeccable.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--11", "dataset": "webvoyager", "query": "Look up the pronunciation and definition of the word \"ameliorate,\" and provide an example sentence using the word.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--11", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /əˈmiːl.jə.reɪt/, US: /əˈmiːl.jə.reɪt/; to make a bad or unpleasant situation better; Foreign aid is badly needed to ameliorate the effects of the drought.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--12", "dataset": "webvoyager", "query": "Find the pronunciation, definition, and a sample sentence for the word \"resilience\" in the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--12", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /rɪˈzɪl.jəns/, US: /rɪˈzɪl.jəns/; the ability to be happy, successful, etc. again after something difficult or bad has happened; Trauma researchers emphasize the resilience of the human psyche.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--13", "dataset": "webvoyager", "query": "Find one word, one phase and one idiom related to euphoria in Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--13", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "beatitude; bed of roses; for fun", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--14", "dataset": "webvoyager", "query": "Use the Cambridge Dictionary to find the pronunciation, definition, and one example sentence for the word \"concatenate\".", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--14", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /kənˈkæt.ə.neɪt/, US: /kənˈkæt̬.ə.neɪt/; to put things together as a connected series; The filename is a series of concatenated words with no spaces.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--15", "dataset": "webvoyager", "query": "Find the pronunciation and a sample sentence for the word \"pandemic.\"", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--15", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /pænˈdem.ɪk/, US: /pænˈdem.ɪk/; In some parts of the world malaria is still pandemic.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--16", "dataset": "webvoyager", "query": "Look up the definition of \"cryptocurrency\" on Cambridge Dictionary, provide the pronunciation, and use it in two example sentences that illustrate different contexts.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--16", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈkrɪp.təʊˌkʌr.ən.si/, US: /ˈkrɪp.toʊˌkɝː.ən.si/; It is one of several prominent efforts to enable complex financial functions in a cryptocurrency; Vice versa, a cryptocurrency can be a legal tender, in which case it is not a virtual currency.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--17", "dataset": "webvoyager", "query": "How many meanings of \"unblemished\" are given in Cambridge Dictionary? Please browse the page and give the number directly.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--17", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "2", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--18", "dataset": "webvoyager", "query": "Search for \"to behave well\" in Cambridge Dictionary's Thesaurus and see which synonyms the dictionary gives.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--18", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "behaves themselves; be on their best behaviour", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--19", "dataset": "webvoyager", "query": "Try a Cambridge Dictionary translation and tell me which company provided the translation.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--19", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Microsoft", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--20", "dataset": "webvoyager", "query": "Look up the definition, pronunciation (both UK and US), and find one example sentence for the word \"altruism\" in the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--20", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈæl.tru.ɪ.zəm/, US: /ˈæl.tru.ɪ.zəm/; Def: willingness to do things that bring advantages to others, even if it results in disadvantage for yourself; She's not known for her altruism.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--21", "dataset": "webvoyager", "query": "Search for the word \"ephemeral\" on Cambridge Dictionary and find its translation into Spanish.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--21", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "efímero", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--22", "dataset": "webvoyager", "query": "Use the Cambridge Dictionary to find the definition, UK pronunciation, and an example sentence for the word \"quintessential.\"", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--22", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˌkwɪn.tɪˈsen.ʃəl/, US:/ˌkwɪn.tɪˈsen.ʃəl/; Def: being the most typical example or most important part of something; Sheep's milk cheese is the quintessential Corsican cheese.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--23", "dataset": "webvoyager", "query": "Find the US English pronunciation of the word \"meticulous\" using the Cambridge Dictionary and note the International Phonetic Alphabet (IPA) notation, then find one example sentence provided in the dictionary using this word.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--23", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "US: /məˈtɪk.jə.ləs/; Many hours of meticulous preparation have gone into writing the book.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--24", "dataset": "webvoyager", "query": "Look up the definition and both UK and US pronunciation of the word \"reverie,\" and provide an example sentence using the word from Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--24", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈrev.ər.i/, US:/ˈrev.ɚ.i/; He was lost in reverie until he suddenly heard someone behind him.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--25", "dataset": "webvoyager", "query": "Find two different meanings of the word \"harmony\" in the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--25", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Meaning 1: a pleasant musical sound made by different notes being played or sung at the same time; Meaning 2: a situation in which people are peaceful and agree with each other, or when things seem right or suitable together", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--26", "dataset": "webvoyager", "query": "Search for the word \"nostalgia\" in the Cambridge Dictionary and report the translation of this word into Chinese.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--26", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "怀旧", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--27", "dataset": "webvoyager", "query": "Look up the meaning, pronunciation, and an example sentence of the word \"solitude\" using the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--27", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈsɒl.ɪ.tʃuːd/, US: /ˈsɑː.lə.tuːd/; the situation of being alone without other people; After months of solitude at sea it felt strange to be in company.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--28", "dataset": "webvoyager", "query": "Search for \"feel giddy\" in Cambridge Dictionary's Thesaurus and list the synonyms the dictionary provides.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--28", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Synonyms: feel dizzy; whirl; spin; reel", "answer_type": "golden"}}}
{"query_id": "Cambridge Dictionary--29", "dataset": "webvoyager", "query": "Go to the Plus section of Cambridge Dictionary, find Image quizzes and do an easy quiz about Animals and tell me your final score.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--29", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Action: finish an easy Image quiz about Animals", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--30", "dataset": "webvoyager", "query": "Find the grammar for present perfect simple uses in English, including examples of affirmative, negative, and interrogative sentences, on the Cambridge Dictionary website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--30", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Present perfect simple: uses; Ive been there a couple of times before; We havent met before, have we?; Have you ever tried to write your name and address with your left hand?", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--31", "dataset": "webvoyager", "query": "Look up the use of modal verbs in grammar section for expressing possibility (e.g., 'might', 'could', 'may') and find examples of their usage in sentences on the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--31", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "She might sell her house; We could have lunch early; It may be possible for him to get home tonight.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--32", "dataset": "webvoyager", "query": "Search for the differences between \"fewer\" and \"less\" in grammar section, and provide examples illustrating their correct usage from the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--32", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Article: 'Less or fewer?'; I do less work at weekends than I used to; Better cycle routes would mean fewer cars and fewer accidents.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--33", "dataset": "webvoyager", "query": "Find explanations and examples of the passive voice in Grammar on the Cambridge Dictionary website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--33", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Cambridge University Press published this book. (active); This book was published by Cambridge University Press. (passive)", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--34", "dataset": "webvoyager", "query": "Use the Cambridge Dictionary to understand the rules for forming and using comparative and superlative adjectives in English Grammar, including example sentences.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--34", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "This car is more expensive than my last one; Joe used to be the slowest runner in the class.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--35", "dataset": "webvoyager", "query": "Find the most common prepositions that consist of groups of words on the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--35", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "ahead of; except for; instead of; owing to; apart from; in addition to ...", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--36", "dataset": "webvoyager", "query": "Search for guidelines on using indirect speech in English, with examples of how to change direct speech to indirect speech, on the Cambridge Dictionary.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--36", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Example: direct: Im tired, I said; indirect: I told them (that) I was tired.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--37", "dataset": "webvoyager", "query": "Use Cambridge Dictionary to understand the use of articles ('a', 'an', 'the') in English Grammar, including examples of usage with both countable and uncountable nouns.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--37", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "<understandings>, Countable nouns: I have a sister and a brother. That was an excellent meal. The lion roared. Uncountable nouns: I hope we have nice weather. The weather was awful last summer...", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--38", "dataset": "webvoyager", "query": "Go to the Plus section of Cambridge Dictionary, finish a recommended Grammar quiz without login and tell me your final score.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--38", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Action: finish a recommended Grammar quiz", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--39", "dataset": "webvoyager", "query": "Try the Word Scramble game in the Plus section, Can you beat the clock by unscrambling the letters to spell the word? (Just try the first example.)", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--39", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Action: finish the Word Scramble game in the Plus section", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--40", "dataset": "webvoyager", "query": "Look up the definition, pronunciation in UK English, and at least one example using the word 'mitigate'.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--40", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "UK: /ˈmɪt.ɪ.ɡeɪt/, US: /ˈmɪt̬.ə.ɡeɪt/; to make something less harmful, unpleasant, or bad; It is unclear how to mitigate the effects of tourism on the island.", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--41", "dataset": "webvoyager", "query": "Find and browse Cambridge Dictionary Shop section, listing 3 items.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--41", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Shop: Cambridge Dictionary organic cotton Hoodie; On top of the world organic cotton T shirt - white writing variety; Multitasking Mug", "answer_type": "possible"}}}
{"query_id": "Cambridge Dictionary--42", "dataset": "webvoyager", "query": "Convert the Cambridge Dictionary homepage from English (UK) to Deutsch.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://dictionary.cambridge.org/", "metadata": {"original_task_id": "Cambridge Dictionary--42", "website": "Cambridge Dictionary", "category": "Cambridge Dictionary", "additional": {"ground_truth": "Action: Click English (UK), change language to: Deutsch", "answer_type": "golden"}}}
{"query_id": "Coursera--0", "dataset": "webvoyager", "query": "Find a beginner-level online course about '3d printing' which lasts 1-3 months, and is provided by a renowned university.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--0", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Rapid Prototyping Using 3D Printing, Specialization", "answer_type": "possible"}}}
{"query_id": "Coursera--1", "dataset": "webvoyager", "query": "Search for a beginner-level online course about Python programming, suitable for someone who has no programming experience on Coursera.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--1", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Python for Data Science, AI & Development", "answer_type": "possible"}}}
{"query_id": "Coursera--2", "dataset": "webvoyager", "query": "Find a Beginner's Spanish Specialization on Coursera and show all the courses in this Specialization.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--2", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Learn Spanish: Basic Spanish Vocabulary, Specialization; Spanish Vocabulary: Meeting People; Spanish Vocabulary: Cultural Experience; Spanish Vocabulary: Sports, Travel, and the Home; Spanish Vocabulary: Careers and Social Events; Spanish Vocabulary Project", "answer_type": "possible"}}}
{"query_id": "Coursera--3", "dataset": "webvoyager", "query": "Identify a new course or Specialization on Coursera related to Python Data Science, sort the courses by newest, what the first course is and which institution offers it.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--3", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Data Science with NumPy, Sets, and Dictionaries; Duke University", "answer_type": "possible"}}}
{"query_id": "Coursera--4", "dataset": "webvoyager", "query": "Identify a course or Specialization on Coursera that helps business process management with with a rating 4.7.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--4", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Business Foundations, Specialization", "answer_type": "possible"}}}
{"query_id": "Coursera--5", "dataset": "webvoyager", "query": "Identify a Specialization on Coursera that teaches C++ programming for beginners, provide the name and what the learning outcomes are.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--5", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Coding for Everyone: C and C++, Specialization; Outcomes: Learn in-demand skills from university and industry experts; Master a subject or tool with hands-on projects; Develop a deep understanding of key concepts; Earn a career certificate from University of California, Santa Cruz", "answer_type": "possible"}}}
{"query_id": "Coursera--6", "dataset": "webvoyager", "query": "Identify a course on Coursera related to 'Artificial Intelligence for Healthcare' and note the course duration along with the number of quizzes in Assessments.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--6", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Fundamentals of Machine Learning for Healthcare; 14 hours (approximately); 19 quizzes", "answer_type": "possible"}}}
{"query_id": "Coursera--7", "dataset": "webvoyager", "query": "Find a course on Coursera that teaches Reinforcement Learning for Intermediate with a rating of at least 4.5. Provide the name of the course, the institution offering it, and the number of reviews it has received.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--7", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Reinforcement Learning, Specialization; University of Alberta; 3.3K reviews", "answer_type": "possible"}}}
{"query_id": "Coursera--8", "dataset": "webvoyager", "query": "Find a free course related to 'R for Data Science' available on Coursera. Scroll to find a course with the Free tag. What language the course is taught in?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--8", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introducción a Data Science: Programación Estadística con R; Taught in Spanish", "answer_type": "possible"}}}
{"query_id": "Coursera--9", "dataset": "webvoyager", "query": "Identify a Coursera course on artificial intelligence ethics that has a duration of less than 20 hours to complete and has been rated 4+ stars by participants.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--9", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Artificial Intelligence: Ethics & Societal Challenges", "answer_type": "possible"}}}
{"query_id": "Coursera--10", "dataset": "webvoyager", "query": "Locate an introductory course related to artificial intelligence on Coursera, ensuring it's suitable for beginners and contains at least one module discussing Ethical Considerations.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--10", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introduction to Artificial Intelligence (AI)", "answer_type": "possible"}}}
{"query_id": "Coursera--11", "dataset": "webvoyager", "query": "Search for a Specialization on Coursera about project management that is produced by a university, show a testimonial for this Specialization.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--11", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Project Management, Specialization; Felipe M. \"To be able to take courses at my own pace and rhythm has been an amazing experience. I can learn whenever it fits my schedule and mood.\"", "answer_type": "possible"}}}
{"query_id": "Coursera--12", "dataset": "webvoyager", "query": "Look for a Coursera course (not Specialization) that teaches Java programming basics.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--12", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introduction to Java", "answer_type": "possible"}}}
{"query_id": "Coursera--13", "dataset": "webvoyager", "query": "Look for a Specialization on Coursera that teaches Python programming, and identify the skills you will learn by taking this Specialization.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--13", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Python 3 Programming, Specialization; Learn Python 3 basics, from the basics to more advanced concepts like lists and functions; Practice and become skilled at solving problems and fixing errors in your code; Gain the ability to write programs that fetch data from internet APIs and extract useful information.", "answer_type": "possible"}}}
{"query_id": "Coursera--14", "dataset": "webvoyager", "query": "Find a course on Coursera related to Introductory Project Management that includes modules on Agile methodology.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--14", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Agile Project Management", "answer_type": "possible"}}}
{"query_id": "Coursera--15", "dataset": "webvoyager", "query": "Find a course on Coursera named 'Introduction to Mathematical Thinking' offered by Stanford, what is the percentage (rounded) of 5 star ratings in reviews and which level has the least percentage?.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--15", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "85%; 2-star", "answer_type": "possible"}}}
{"query_id": "Coursera--16", "dataset": "webvoyager", "query": "Identify a course on Coursera named 'Introduction to Finance: The Basics', who is the course instructor and what other courses does he/she teach.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--16", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Xi Yang; Introduction to Finance: The Role of Financial Markets", "answer_type": "possible"}}}
{"query_id": "Coursera--17", "dataset": "webvoyager", "query": "How many results are there for a search on Coursera for Machine Learning, then filtered by Credit Eligible and 1-4 Years duration?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--17", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "23", "answer_type": "possible"}}}
{"query_id": "Coursera--18", "dataset": "webvoyager", "query": "Identify a Coursera course that teaches JavaScript, which is beginner-friendly and includes a certificate upon completion.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--18", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Programming with JavaScript", "answer_type": "possible"}}}
{"query_id": "Coursera--19", "dataset": "webvoyager", "query": "Identify a course on Coursera that provides an introduction to Psychology, list the instructor's name, the institution offering it, and how many hours it will approximately take to complete.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--19", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Instructor: Paul Bloom; Yale University; 14 hours", "answer_type": "possible"}}}
{"query_id": "Coursera--20", "dataset": "webvoyager", "query": "Find an Intermediate-level online course on Coursera about 'Blockchain Technology' which lasts between 1 to 4 weeks, and is provided by a well-known institution. Also, note the course's main goals and the instructor's name.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--20", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introduction to Supply Chain Finance & Blockchain Technology; New York Institute of Finance; Instructors: Oliver Belin, Jack Farmer; <summary of main goals>", "answer_type": "possible"}}}
{"query_id": "Coursera--21", "dataset": "webvoyager", "query": "Search for an online course on Coursera about 'Digital Marketing', suitable for beginner-level learners. Specify the course duration, the main learning outcomes, and the institution offering the course.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--21", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Foundations of Digital Marketing and E-commerce; Google; Instructors: Google Career Certificates; <outcomes>; duration: 1 - 4 weeks or 25 hours (approximately)", "answer_type": "possible"}}}
{"query_id": "Coursera--22", "dataset": "webvoyager", "query": "Identify a Specialization on Coursera that focuses on 'Human Resource', list the courses included in this Specialization, and the institution offering it.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--22", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Human Resource Management: HR for People Managers Specialization; University of Minnesota; Course 1: Preparing to Manage Human Resources; Course 2: Recruiting, Hiring, and Onboarding Employees; Course 3: Managing Employee Performance; Course 4: Managing Employee Compensation; Course 5: Human Resources Management Capstone: HR for People Managers", "answer_type": "possible"}}}
{"query_id": "Coursera--23", "dataset": "webvoyager", "query": "Find a course on Coursera about 'Artificial Intelligence Ethics', which has a duration of less than 5 weeks and has been rated 4.5 stars or higher. Provide the course name and the instructor's name.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--23", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Artificial Intelligence: Ethics & Societal Challenges; Lund University; 4.6 stars; Instructors: Maria Hedlund, Lena Lindström, Erik Persson", "answer_type": "possible"}}}
{"query_id": "Coursera--24", "dataset": "webvoyager", "query": "Locate an online course on Coursera related to 'Sustainability' that belongs to Physical Science and Engineering subject. The course should include a module on Measuring Sustainability. Note the course duration and the offering institution.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--24", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introduction to Sustainability; University of Illinois at Urbana-Champaign; Instructors: Dr. Jonathan Tomkin; duration: Approx. 25 hours to complete, 3 weeks at 8 hours a week", "answer_type": "possible"}}}
{"query_id": "Coursera--25", "dataset": "webvoyager", "query": "Find a course on Coursera about 'Relativity' for beginners. List the course's main topics and the estimated time (in hours) required to complete it.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--25", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Understanding Einstein: The Special Theory of Relativity; <topic>; Approx. 80 hours to complete", "answer_type": "possible"}}}
{"query_id": "Coursera--26", "dataset": "webvoyager", "query": "Identify a Specialization on Coursera that offers an overview of 'Renewable Energy'. The Specialization should be beginner-level and include a course on Renewable Energy Futures. Note the instructor's name and the number of weeks required to complete the course if I spend 5 hours a week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--26", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Renewable Energy Specialization; Instructors: Stephen R. Lawrence, Paul Komor; 2 months", "answer_type": "possible"}}}
{"query_id": "Coursera--27", "dataset": "webvoyager", "query": "Search for a Specialization on Coursera about 'Data Visualization' that includes a project. Provide the name of the Specialization, the institution offering it, and the skills that will be developed by completing it.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--27", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Data Visualization with Tableau Specialization; University of California, Davis; <skills>", "answer_type": "possible"}}}
{"query_id": "Coursera--28", "dataset": "webvoyager", "query": "Locate a Coursera Guided project related to 'Astrophysics' suitable for advanced learners. Mention the course duration, the institution offering it, and the main subjects covered in the course.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--28", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Explore Einstein's theories of Relativity using Wolfram; Coursera Project Network; 2 hours; <main subjects>", "answer_type": "possible"}}}
{"query_id": "Coursera--29", "dataset": "webvoyager", "query": "Browse the Coursera website and find the price required for one year of Coursera Plus. How much is the discount? Then list 3 companies that work with Coursera.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--29", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "$399/year, discount: 59 / month * 12 - 399 = 309; Google, IBM, and Imperial College London ...", "answer_type": "possible"}}}
{"query_id": "Coursera--30", "dataset": "webvoyager", "query": "Locate the course 'Modern Art & Ideas' on Coursera offered by The Museum of Modern Art. Find out the percentage (rounded) of 3-star ratings in the reviews and note which star level has the lowest percentage.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--30", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "3 stars: 2.5%; 1 star has the lowest percentage", "answer_type": "possible"}}}
{"query_id": "Coursera--31", "dataset": "webvoyager", "query": "Search for the course 'Exploring Quantum Physics' on Coursera, offered by the University of Maryland, College Park. Identify the percentage (rounded) of 5-star ratings in the reviews.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--31", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "52.6%", "answer_type": "possible"}}}
{"query_id": "Coursera--32", "dataset": "webvoyager", "query": "Search for 'Data Analysis' courses on Coursera. Apply filters to find courses that are 'Beginner Level' and have a duration ranging from 1 to 3 months. Determine the total count of courses that match these specifications.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--32", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "568 results", "answer_type": "possible"}}}
{"query_id": "Coursera--33", "dataset": "webvoyager", "query": "Find a beginner level Coursera course related to \"Internet of Things (IoT)\" with a high rating. Provide the course name, instructor's name, and a brief summary of the skills that will be taught.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--33", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introduction and Programming with IoT Boards; Instructor: James Won-Ki HONG; <summary>", "answer_type": "possible"}}}
{"query_id": "Coursera--34", "dataset": "webvoyager", "query": "Find the course on Coursera named 'Essentials of Global Health'. Determine the instructor of this course and summarize his bio, note if there are any additional courses he offers on Coursera.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--34", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Instructor: Richard Skolnik; <summary> of bio; no other course", "answer_type": "possible"}}}
{"query_id": "Coursera--35", "dataset": "webvoyager", "query": "Find a Coursera course on Sustainable Agriculture practices, and detail the course's objectives and the background of the lead instructor.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--35", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Introduction to Sustainability; <objectives>; Instructor: Dr. Jonathan Tomkin", "answer_type": "possible"}}}
{"query_id": "Coursera--36", "dataset": "webvoyager", "query": "Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--36", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Master of Advanced Study in Engineering; UC Berkeley College of Engineering; Fall 2024; March 1, 2024: Fall 2024 Priority Application Deadline; April 1, 2024: Fall 2024 Final Application Deadline", "answer_type": "possible"}}}
{"query_id": "Coursera--37", "dataset": "webvoyager", "query": "Browse the Coursera homepage and list at least three free courses.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--37", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Business Analytics with Excel: Elementary to Advanced; Cybersecurity for Everyone; Financial Markets ...", "answer_type": "possible"}}}
{"query_id": "Coursera--38", "dataset": "webvoyager", "query": "Browse Coursera, which universities and companies from Australia are partners of Coursera? List all of them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--38", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Macquarie University; The University of Melbourne; The University of Sydney; University of Western Australia; UNSW Sydney (The University of New South Wales)", "answer_type": "golden"}}}
{"query_id": "Coursera--39", "dataset": "webvoyager", "query": "Find the Space Safety course offered by TUM on Coursera. How many videos are there in module 2? What is the name of each video?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--39", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "6 videos; Introduction; Space Debris; Mitigation; Measurements; Protection; Atmospheric Re-entry", "answer_type": "golden"}}}
{"query_id": "Coursera--40", "dataset": "webvoyager", "query": "Browse Coursera for Business and Coursera for Teams and summarise some of their advantages.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--40", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "Coursera for Business: Strengthen critical skills with content you can trust; Develop, retain, and advance critical talent; Lower training costs without sacrificing quality; Track and measure skills to demonstrate ROI; Coursera for Teams: Upskill 5 to 125 employees; Unlimited access to 10,250+ learning opportunities; Program setup and launch tools; Analytics and benchmarking dashboard", "answer_type": "possible"}}}
{"query_id": "Coursera--41", "dataset": "webvoyager", "query": "Browse online degrees section on Coursera and list 3 Bachelor's degree programmes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.coursera.org/", "metadata": {"original_task_id": "Coursera--41", "website": "Coursera", "category": "Coursera", "additional": {"ground_truth": "BSc Computer Science, University of London; Bachelor of Science in Cybersecurity Technology, University of Maryland Global Campus; Bachelor of Information Technology, Illinois Institute of Technology", "answer_type": "possible"}}}
{"query_id": "ESPN--0", "dataset": "webvoyager", "query": "Look up the current standings for the NBA Eastern Conference on ESPN.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--0", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<standings> (NBA Eastern Conference)", "answer_type": "possible"}}}
{"query_id": "ESPN--1", "dataset": "webvoyager", "query": "Check the latest articles on ESPN for updates on any trades that occurred in the NBA within the past 2 days.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--1", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<article> (trades), maybe no article", "answer_type": "possible"}}}
{"query_id": "ESPN--2", "dataset": "webvoyager", "query": "Show the scores and main highlight of the Milwaukee Bucks game that took place within the last 2 days on ESPN.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--2", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<score> (Milwaukee Bucks vs xxx); <highlight>", "answer_type": "possible"}}}
{"query_id": "ESPN--3", "dataset": "webvoyager", "query": "Retrieve the final score from the most recent NBA game broadcast on ESPN, including the playing teams' names and the date of the match.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--3", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<score> (most recent NBA game)", "answer_type": "possible"}}}
{"query_id": "ESPN--4", "dataset": "webvoyager", "query": "Check ESPN for the final scores of NBA games that were played yesterday.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--4", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<score> (yesterday)", "answer_type": "possible"}}}
{"query_id": "ESPN--5", "dataset": "webvoyager", "query": "Identify the top scorer in the NBA from the latest completed game and note down the points scored, the team they play for, and their position on the team.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--5", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<player>; <PTS>; <team>; <position> (eg, James Harden; scored 35 points; LA Clippers; Shooting Guard (SG))", "answer_type": "possible"}}}
{"query_id": "ESPN--6", "dataset": "webvoyager", "query": "Find the result of the latest basketball game between the Los Angeles Lakers and the Boston Celtics, including the final score and top scorer from the match.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--6", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Los Angeles Lakers vs Boston Celtics, 115 - 126; Kristaps Porzingis", "answer_type": "possible"}}}
{"query_id": "ESPN--7", "dataset": "webvoyager", "query": "Retrieve the final score and a brief summary of the latest NBA game played by the Los Angeles Lakers as reported on ESPN.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--7", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<score> (latest, Los Angeles Lakers vs xxx); <summary>", "answer_type": "possible"}}}
{"query_id": "ESPN--8", "dataset": "webvoyager", "query": "Find information on ESPN about the top three scoring leaders in the NBA as of the last day of the regular season, and note which teams they play for.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--8", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Joel Embiid (PHI) with 34.4 PPG, Luka Doncic (DAL) with 32.9 PPG, and Giannis Antetokounmpo (MIL) with 31.4 PPG.", "answer_type": "possible"}}}
{"query_id": "ESPN--9", "dataset": "webvoyager", "query": "Search on ESPN for how many teams have Los Angeles in their name and how many of them are NBA.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--9", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "10 teams have Los Angeles in their name; 2 teams are NBA", "answer_type": "golden"}}}
{"query_id": "ESPN--10", "dataset": "webvoyager", "query": "Check ESPN for the score and a brief recap of the latest college football championship game.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--10", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<score>; <summary> (latest college football championship game)", "answer_type": "possible"}}}
{"query_id": "ESPN--11", "dataset": "webvoyager", "query": "How many NBA teams are there and list all the teams with 'New' in their name.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--11", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "30; New York Knicks; New Orleans Pelicans", "answer_type": "golden"}}}
{"query_id": "ESPN--12", "dataset": "webvoyager", "query": "The first three Top Headlines in the current ESPN home page correspond to which sports leagues?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--12", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<League 1>; <League 2>; <League 3>", "answer_type": "possible"}}}
{"query_id": "ESPN--13", "dataset": "webvoyager", "query": "Identify today's top headline in the Basketball section of ESPN, and summarize the main points of that article.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--13", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<headline>; <summary>", "answer_type": "possible"}}}
{"query_id": "ESPN--14", "dataset": "webvoyager", "query": "Find the latest news about NBA trades or player movements on ESPN and report the most recent trade deal OR player acquisition.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--14", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "News about NBA trades", "answer_type": "possible"}}}
{"query_id": "ESPN--15", "dataset": "webvoyager", "query": "Check the scores of the NBA games played on December 25, 2023.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--15", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "(US Time) Bucks vs Knicks, 122 - 129; Warriors vs Nuggets, 114 - 120; Celtics vs Lakers, 126 - 115; 76ers vs Heat, 113 - 119; Mavericks vs Suns, 128 - 114", "answer_type": "golden"}}}
{"query_id": "ESPN--16", "dataset": "webvoyager", "query": "Check the schedule for the NBA game on December 25, 2023, and provide the teams that are playing and their current standings in their respective conferences.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--16", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "teams and current standings", "answer_type": "possible"}}}
{"query_id": "ESPN--17", "dataset": "webvoyager", "query": "Check out the NBA Basketball Power Index 2023-24 to see which teams are in first place and which are in last place.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--17", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Boston Celtics; San Antonio Spurs", "answer_type": "golden"}}}
{"query_id": "ESPN--18", "dataset": "webvoyager", "query": "How many sports leagues can you choose from on the ESPN home page?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--18", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "31 (in ESPN America)", "answer_type": "golden"}}}
{"query_id": "ESPN--19", "dataset": "webvoyager", "query": "Who has the highest salary in Boston Celtics Roster 2023-24?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--19", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Jrue Holiday", "answer_type": "golden"}}}
{"query_id": "ESPN--20", "dataset": "webvoyager", "query": "Look up the current leaders in rebounds and assists in the NBA Western Conference on ESPN.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--20", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "For Western, rebounds: Domantas Sabonis; assists: Luka Doncic", "answer_type": "possible"}}}
{"query_id": "ESPN--21", "dataset": "webvoyager", "query": "Show the scores and main highlight of the Denver Nuggets game that occurred within the last 3 days on ESPN.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--21", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<score> within 3 days; <highlight>", "answer_type": "possible"}}}
{"query_id": "ESPN--22", "dataset": "webvoyager", "query": "Find the latest Team transactions in the NBA within the past week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--22", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Team transaction: eg, February 1, TRANSACTION: Dallas Mavericks, Assigned F Olivier-Maxence Proster to the Texas Legends of the G League.", "answer_type": "possible"}}}
{"query_id": "ESPN--23", "dataset": "webvoyager", "query": "Find the result of the latest basketball game between the Miami Heat and the New York Knicks, including the final score and top rebounder from the match.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--23", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "NBA <score>, latest, Miami Heat - New York Knicks, eg, January 28, 2024, 109 - 125, Top rebounder: B. Adebayo, P. Achiuwa", "answer_type": "possible"}}}
{"query_id": "ESPN--24", "dataset": "webvoyager", "query": "Find the final score from the most recent NFL game broadcast on ESPN, including the teams' names and the date of the match.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--24", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "NFL <score>, latest, eg, January 29, 2024, Chiefs - Ravens, 17 - 10", "answer_type": "possible"}}}
{"query_id": "ESPN--25", "dataset": "webvoyager", "query": "Identify the player with the most assists in the latest NBA game and show me the assists, the team they play for, and their position.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--25", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "NBA game, latest, eg, February 2, 2024, Lakers - Celtics, 114 - 105, most assist: 14, D. Russell, position: PG, team: Los Angeles Lakers", "answer_type": "possible"}}}
{"query_id": "ESPN--26", "dataset": "webvoyager", "query": "Find information on ESPN NBA schedule. Tell me yesterday's matchups in which the loser high was higher than the winner high.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--26", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "NBA game, yesterday, eg, January 26, 2024, Philadelphia - Indiana, 134 - 122, winner high 26 - loser high 31; Denver - New York, 122 - 84, winner high 26 - loser high 31; Chicago - Los Angeles, 141 - 132, winner high 29 - loser high 32", "answer_type": "possible"}}}
{"query_id": "ESPN--27", "dataset": "webvoyager", "query": "Search on ESPN for how many teams have 'Golden' in their name and how many of them are in the NHL.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--27", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "30 teams in search results, 1 team Vegas Golden Knights (NHL)", "answer_type": "golden"}}}
{"query_id": "ESPN--28", "dataset": "webvoyager", "query": "How many MLB teams are there and list all the teams with 'City' in their name.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--28", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "30 teams in search results, Kansas City Royals", "answer_type": "golden"}}}
{"query_id": "ESPN--29", "dataset": "webvoyager", "query": "Identify today's top headline in the Soccer section of ESPN, and summarize the main points of that article.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--29", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<headline> today", "answer_type": "possible"}}}
{"query_id": "ESPN--30", "dataset": "webvoyager", "query": "Check out the NHL Standings 2023-24 on ESPN to see which teams are at the top and which are at the bottom in Eastern and Western Conference. What about the situation in Division.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--30", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "NHL Standings 2023-24, top - bottom, Eastern Conference: New York Rangers - Columbus Blue Jackets; Western Conference: Vancouver Canucks - Chicago Blackhawks; Division: ATLANTIC, Boston Bruins - Montreal Canadiens; METROPOLITAN: New York Rangers - Columbus Blue Jackets; CENTRAL: Dallas Stars - Chicago Blackhawks; PACIFIC: Vancouver Canucks - San Jose Sharks", "answer_type": "possible"}}}
{"query_id": "ESPN--31", "dataset": "webvoyager", "query": "Who has the heaviest weight among infielders in the New York Yankees Roster 2023-24?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--31", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Carlos Rodon, 255 lbs", "answer_type": "golden"}}}
{"query_id": "ESPN--32", "dataset": "webvoyager", "query": "Review yesterday's NHL game results on ESPN, focusing on teams' performance.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--32", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "NHL <score> yesterday", "answer_type": "possible"}}}
{"query_id": "ESPN--33", "dataset": "webvoyager", "query": "Locate the latest ESPN articles discussing potential MVP candidates in the NFL for 2023 season.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--33", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Article, '2023 NFL MVP: Ranking five finalists, plus stats'", "answer_type": "possible"}}}
{"query_id": "ESPN--34", "dataset": "webvoyager", "query": "Visit ESPN to view the Philadelphia 76ers' latest injuries.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--34", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Philadelphia 76ers - Injuries, latest", "answer_type": "possible"}}}
{"query_id": "ESPN--35", "dataset": "webvoyager", "query": "Browse ESPN to find out when the next game of the Los Angeles Lakers will start. Then navigate to the ticket purchasing website from ESPN, what is the cheapest ticket available.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--35", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "next game of Los Angeles Lakers, <price>", "answer_type": "possible"}}}
{"query_id": "ESPN--36", "dataset": "webvoyager", "query": "Search for Lionel Messi's last 5 games, which teams has he played for, and what are the results?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--36", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "<games>; Inter Miami CF, <results>", "answer_type": "possible"}}}
{"query_id": "ESPN--37", "dataset": "webvoyager", "query": "Check out LeBron James' Stats to see how many games he has played in his career so far.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--37", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "1471", "answer_type": "possible"}}}
{"query_id": "ESPN--38", "dataset": "webvoyager", "query": "Check Los Angeles Lakers Stats 2023-24, calculate Anthony Davis' games played (GP) percentage, tell me if there are other players with the same games played percentage as Anthony Davis.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--38", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "54/58 = 93.1%, no other players, https://www.espn.com/nba/team/stats/_/name/lal/los-angeles-lakers", "answer_type": "possible"}}}
{"query_id": "ESPN--39", "dataset": "webvoyager", "query": "Check the New York Jets Depth Chart in the NFL section of ESPN and identify the players listed as injured in the 2ND position.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--39", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "check IR on https://www.espn.com/nfl/team/depth/_/name/nyj/new-york-jets", "answer_type": "possible"}}}
{"query_id": "ESPN--40", "dataset": "webvoyager", "query": "Browse the ESPN+ page from ESPN for a brief summary of what ESPN+ Tools is used for.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--40", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Bracket Predictor, Bracket Analyzer, Custom Dollar Value Generator", "answer_type": "possible"}}}
{"query_id": "ESPN--41", "dataset": "webvoyager", "query": "Find out which four teams the NFC North contains in the NFL on ESPN.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--41", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "Chicago Bears, Detroit Lions, Green Bay Packers, and Minnesota Vikings", "answer_type": "golden"}}}
{"query_id": "ESPN--42", "dataset": "webvoyager", "query": "Check out NCAAM standings on ESPN, what are the teams with equal wins and losses in the America East Conference currently?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--42", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "check America East Conference on https://www.espn.com/mens-college-basketball/standings", "answer_type": "possible"}}}
{"query_id": "ESPN--43", "dataset": "webvoyager", "query": "Check out NCAAW recruiting on ESPN, what colleges are the top three players from?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.espn.com/", "metadata": {"original_task_id": "ESPN--43", "website": "ESPN", "category": "ESPN", "additional": {"ground_truth": "espnW Rankings Class of 2023, Judea Watkins from USC, Mikaylah Williams from LSU, Jadyn Donovan from Duke", "answer_type": "possible"}}}
{"query_id": "GitHub--0", "dataset": "webvoyager", "query": "Search for an open-source project related to 'climate change data visualization' on GitHub and report the project with the most stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--0", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "resource-watch/resource-watch", "answer_type": "golden"}}}
{"query_id": "GitHub--1", "dataset": "webvoyager", "query": "Search for an open-source repository for machine learning in Python, specifically focused on decision trees, updated within the last 2 days.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--1", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "google/yggdrasil-decision-forests", "answer_type": "possible"}}}
{"query_id": "GitHub--2", "dataset": "webvoyager", "query": "Look for the trending Python repositories on GitHub with most stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--2", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "myshell-ai/OpenVoice", "answer_type": "possible"}}}
{"query_id": "GitHub--3", "dataset": "webvoyager", "query": "Find out how much more package storage the Enterprise version has over Team in GitHub Pricing.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--3", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "48GB", "answer_type": "golden"}}}
{"query_id": "GitHub--4", "dataset": "webvoyager", "query": "Find a popular JavaScript repository created in the last 30 days on GitHub with a Readme file.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--4", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (use advanced search like 'javascript created:>2023-12-10 language:JavaScript')", "answer_type": "possible"}}}
{"query_id": "GitHub--5", "dataset": "webvoyager", "query": "Find a Python repository on GitHub that has been updated in the past 2 days and has at least 500 stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--5", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (stars:\"> 500\" language:Python), then choose recently undated", "answer_type": "possible"}}}
{"query_id": "GitHub--6", "dataset": "webvoyager", "query": "Search for an open-source project related to 'cryptocurrency wallet' updated in the past 30 days and provide the top three contributors.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--6", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "blocknetdx/blocknet; laanwj, sipa, theuni", "answer_type": "possible"}}}
{"query_id": "GitHub--7", "dataset": "webvoyager", "query": "Find the official GitHub repository for ALBERT and show me what files the repo changed in the most recent commit.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--7", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "classifier_utils.py and squad_utils.py", "answer_type": "golden"}}}
{"query_id": "GitHub--8", "dataset": "webvoyager", "query": "Look up the latest stable release version of Vuex and find out when it was published.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--8", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "Latest v4.0.2 on Jun 17, 2021", "answer_type": "golden"}}}
{"query_id": "GitHub--9", "dataset": "webvoyager", "query": "Locate a repository on GitHub that was created in the last week and has 50 or more stars. Provide brief details about the project's purpose and its programming language.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--9", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (stars:>=50 created:>=xxxx-xx-xx)", "answer_type": "possible"}}}
{"query_id": "GitHub--10", "dataset": "webvoyager", "query": "If I start using Copilot Individual, how much US dollars will it cost per year and what features does it have?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--10", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "$100 per year; Code completions, Chat, and more for indie developers and freelancers.", "answer_type": "golden"}}}
{"query_id": "GitHub--11", "dataset": "webvoyager", "query": "Find a newly created open-source project on GitHub related to 'climate change' that has been initiated in January 2023; check the main programming language used and the project's description.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--11", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "TheAIDojo/AI-for-Climate-Change; Jupyter Notebook; Repository of notebooks and associated code that covers the fundamental concepts of deep learning and its application to climate science.", "answer_type": "possible"}}}
{"query_id": "GitHub--12", "dataset": "webvoyager", "query": "Retrieve the latest release from the 'electron/electron' repository on GitHub and note down the release version number and date.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--12", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "v29.0.0-alpha.5, 19 hours ago (real-time release)", "answer_type": "possible"}}}
{"query_id": "GitHub--13", "dataset": "webvoyager", "query": "Identify the latest top-trending open-source project in the category of 'Machine Learning' on GitHub, and check the number of stars it has received.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--13", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "microsoft/ML-For-Beginners", "answer_type": "possible"}}}
{"query_id": "GitHub--14", "dataset": "webvoyager", "query": "Locate the repository for the open-source project \"vscode\" and identify the top three contributors.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--14", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "bpasero; jrieken; mjbvz", "answer_type": "possible"}}}
{"query_id": "GitHub--15", "dataset": "webvoyager", "query": "Locate a repository on GitHub related to 'quantum computing' that has been updated within the last week and has at least 50 stars. Provide a brief description of the project.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--15", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "desireevl/awesome-quantum-computing", "answer_type": "possible"}}}
{"query_id": "GitHub--16", "dataset": "webvoyager", "query": "Find the GitHub Skill section and how many courses are under the 'First day on GitHub' heading.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--16", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "3", "answer_type": "golden"}}}
{"query_id": "GitHub--17", "dataset": "webvoyager", "query": "Locate a C++ project on GitHub that has been recently updated in the last week and has at least 500 stars, then describe its main purpose.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--17", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "microsoft/terminal; The new Windows Terminal and the original Windows console host, all in the same place!", "answer_type": "possible"}}}
{"query_id": "GitHub--18", "dataset": "webvoyager", "query": "Identify and report the most popular (in terms of stars) open-source image processing tool on GitHub.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--18", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "OpenCV", "answer_type": "golden"}}}
{"query_id": "GitHub--19", "dataset": "webvoyager", "query": "Look up the most recently updated Python repository on GitHub that is tagged with 'web scraping' and has over 100 stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--19", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "scrapy/scrapy", "answer_type": "possible"}}}
{"query_id": "GitHub--20", "dataset": "webvoyager", "query": "Open GitHub Copilot's FAQs to find the official answer to when Copilot chat can be used on mobile.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--20", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "'Chat in GitHub Mobile is coming soon.' OR 'We do not have a set timeline for making Copilot Chat available on mobile. Well continue to update this page with the latest information on new capabilities for various plans.'", "answer_type": "golden"}}}
{"query_id": "GitHub--21", "dataset": "webvoyager", "query": "Find the Security topic in GitHub Resources and answer the role of GitHub Advanced Security.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--21", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "With AI-powered application security testing tools embedded in your development workflow, GitHub Advanced Security outperforms non-native add-ons by delivering 7x faster remediation rates for identified vulnerabilities.", "answer_type": "possible"}}}
{"query_id": "GitHub--22", "dataset": "webvoyager", "query": "Find an open-source repository on GitHub focused on natural language processing in Ruby, updated within the last week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--22", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (natural language processing language:Ruby)", "answer_type": "possible"}}}
{"query_id": "GitHub--23", "dataset": "webvoyager", "query": "Find the wiki page of ohmyzsh on GitHub and tell me how to change the theme of zsh to agnoster.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--23", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "edit the .zshrc file and set the ZSH_THEME variable to \"agnoster\"", "answer_type": "golden"}}}
{"query_id": "GitHub--24", "dataset": "webvoyager", "query": "Locate the GitHub repository for the open-source project \"angular\" and identify the last three issues closed.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--24", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "recently closed issue in repo angular/angular: https://github.com/angular/angular/issues?q=is%3Aissue+is%3Aclosed", "answer_type": "possible"}}}
{"query_id": "GitHub--25", "dataset": "webvoyager", "query": "Search for a 'virtual reality' related repository on GitHub updated in the last 10 days with at least 200 stars and summarize its main objective.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--25", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (virtual reality stars:>=200), <summary>", "answer_type": "possible"}}}
{"query_id": "GitHub--26", "dataset": "webvoyager", "query": "Find the Resolve merge conflicts course in GitHub Skills and what actions learners will perform in this course.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--26", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "Create a pull request. Resolve a merge conflict. Create a merge conflict. Merge your pull request.", "answer_type": "golden"}}}
{"query_id": "GitHub--27", "dataset": "webvoyager", "query": "Find a Ruby repository on GitHub that has been updated in the past 3 days and has at least 1000 stars.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--27", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (language:Ruby stars:>1000)", "answer_type": "possible"}}}
{"query_id": "GitHub--28", "dataset": "webvoyager", "query": "Identify the most starred JavaScript repositories on GitHub that were created after 2023-12-29.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--28", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (language:JavaScript created:>2023-12-29), sort by Most stars", "answer_type": "possible"}}}
{"query_id": "GitHub--29", "dataset": "webvoyager", "query": "Compare the maximum number of private repositories allowed in the Free and Pro plans in GitHub Pricing.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--29", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "Unlimited", "answer_type": "golden"}}}
{"query_id": "GitHub--30", "dataset": "webvoyager", "query": "Search for an open-source project related to 'blockchain technology' on GitHub updated in the past 15 days and list the top five contributors.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--30", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "eg, aptos-labs/aptos-core, contributors: davidiw, gregnazario, JoshLind, bmwill, rustielin", "answer_type": "possible"}}}
{"query_id": "GitHub--31", "dataset": "webvoyager", "query": "Find the official GitHub repository for TensorFlow and list the files changed in the last commit. Tell me the name of changed files, total additions and total deletion.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--31", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "Tensorflow latest commit", "answer_type": "possible"}}}
{"query_id": "GitHub--32", "dataset": "webvoyager", "query": "Discover the latest C# repository on GitHub related to 'game development' and having over 150 stars, and describe its main features.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--32", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (game development language:C# stars:>150), <features>", "answer_type": "possible"}}}
{"query_id": "GitHub--33", "dataset": "webvoyager", "query": "Find Customer Stories on the GitHub page and list the 2 stories that appear on the web page.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--33", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "Philips builds and deploys digital health technology faster with innersource on GitHub. Shopify keeps pushing eCommerce forward with help from GitHub tools.", "answer_type": "possible"}}}
{"query_id": "GitHub--34", "dataset": "webvoyager", "query": "Search for an open-source project on GitHub related to 'Protein prediction' and identify the project with the highest number of forks.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--34", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "kexinhuang12345/DeepPurpose", "answer_type": "possible"}}}
{"query_id": "GitHub--35", "dataset": "webvoyager", "query": "Check the latest release version of React and the date it was published on GitHub.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--35", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "18.2.0 (June 14, 2022)", "answer_type": "golden"}}}
{"query_id": "GitHub--36", "dataset": "webvoyager", "query": "Identify a new open-source project on GitHub related to 'AI agriculture' that created in 2022, and note its main programming language and description.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--36", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "<repo> (AI agriculture created:2022)", "answer_type": "possible"}}}
{"query_id": "GitHub--37", "dataset": "webvoyager", "query": "List the 3 features mentioned in GitHub's Copilot product page.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--37", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "The AI coding assistant elevating developer workflows. Get AI-based suggestions in real time. Docs that feel tailored for you.", "answer_type": "possible"}}}
{"query_id": "GitHub--38", "dataset": "webvoyager", "query": "Identify and report the most popular (by stars) open-source repo related to cybersecurity on GitHub.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--38", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "WerWolv/ImHex", "answer_type": "golden"}}}
{"query_id": "GitHub--39", "dataset": "webvoyager", "query": "Browse the GitHub Trending and find out which developer is currently ranked first this month and the corresponding repository.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--39", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "find info on https://github.com/trending/developers", "answer_type": "possible"}}}
{"query_id": "GitHub--40", "dataset": "webvoyager", "query": "Select Sign up on the GitHub homepage to see if email 'test123@gmail.com' already exists.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://github.com/", "metadata": {"original_task_id": "GitHub--40", "website": "GitHub", "category": "GitHub", "additional": {"ground_truth": "Perform Action. email 'test123@gmail.com' already exists", "answer_type": "golden"}}}
{"query_id": "Google Flights--0", "dataset": "webvoyager", "query": "Book a journey with return option on same day from Edinburg to Manchester on December 28th and show me the lowest price option available.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--0", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Aer Lingus 11:40am - 4:45pm, $412 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--1", "dataset": "webvoyager", "query": "Show me the list of one-way flights today (February 17, 2024) from Chicago to Paris.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--1", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Air France 5:30PM 8:25AM (+1), United 6:30PM 9:55AM(+1), Delta 12:00PM 8:10AM(+1)... (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--2", "dataset": "webvoyager", "query": "Find the lowest fare from all eligible one-way flights for 1 adult from JFK to Heathrow on Jan. 22.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--2", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Tap Air Portugal 10:00PM 5:30PM(+1), $355 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--3", "dataset": "webvoyager", "query": "Search for the one-way flight available from Calgary to New York on Jan. 1st with the lowest carbon dioxide emissions.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--3", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "WestJet 9:55AM 4:34PM, emission: 225 kg CO2, $704 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--4", "dataset": "webvoyager", "query": "Search for one-way flights from New York to London on Dec. 26th and filter the results to show only non-stop flights.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--4", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Norse Atlantic UK 6:10PM 6:00AM(+1), $331, Nonstop (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--5", "dataset": "webvoyager", "query": "Find flights from Chicago to London on 20 December and return on 23 December.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--5", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Scandinavian Airlines 9:45PM 4:00PM(+1), $1456 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--6", "dataset": "webvoyager", "query": "Search for a flight on December 19 and return on December 26 from Tel Aviv to Venice and Select First Class.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--6", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "flydubai, Emirates, and AccesRail, 12:40 PM - 8:34 PM(+1), $8991 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--7", "dataset": "webvoyager", "query": "Find a round trip from Phoenix to Miami (Dec. 25th - Dec. 28th), show the First Class plane tickets for me that do not exceed $1320..", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--7", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "American Airlines, 5:44 AM 1:25 PM, $1,247 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--8", "dataset": "webvoyager", "query": "Search a one-way filght from Dublin To Athens Greece for 1 Adult that leaves on December 30 and analyse the price graph for the next 2 months.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--8", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Analyse the picture of Price graph (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--9", "dataset": "webvoyager", "query": "Find a one way economy flight from Pune to New York in Jan. 15th and show me how long it will take for flight transfer.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--9", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Air India, LOT, 3:55PM 8:35PM(+1), transfer time: 18 hours 20 mins (real-time, Transfer time only.)", "answer_type": "possible"}}}
{"query_id": "Google Flights--10", "dataset": "webvoyager", "query": "Locate the cheapest round-trip flights from New York to Tokyo leaving on January 25, 2024, and returning on February 15, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--10", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Air Canada, 9:15AM 4:50PM(+1), $1169 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--11", "dataset": "webvoyager", "query": "Compare the prices for round-trip flights from New York to Tokyo for a departure on February 10, 2024, and a return on February 24, 2024, and select the option with the least number of stops.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--11", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "United flight, 11:15AM 3:35PM(+1), $1366, Nonstop (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--12", "dataset": "webvoyager", "query": "Find the best-priced round-trip flight from New York to London leaving on December 25, 2023, and returning on January 5, 2024, with one stop or fewer.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--12", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Norse Atlantic UK, 6:10PM 6:00AM(+1), $757, Nonstop (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--13", "dataset": "webvoyager", "query": "Find the cheapest round-trip flight option from New York City to Tokyo for a departure on January 10, 2024, and a return on January 24, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--13", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Turkish Airlines, 8:00PM 8:30AM(+2), $1142, 1 stop (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--14", "dataset": "webvoyager", "query": "Compare flight options and find the lowest round trip fare from New York to London departing on January 10, 2024, and returning on January 17, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--14", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Norse Atlantic UK, 6:10PM 6:00AM(+1), $546 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--15", "dataset": "webvoyager", "query": "Compare the prices and total duration of non-stop flights from New York to Tokyo Narita Airport departing on February 12th, 2024, and returning on February 26th, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--15", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Only one flight, United flight, 11:15AM 3:35PM(+1), $1316 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--16", "dataset": "webvoyager", "query": "Find the cheapest one-way flight from New York to Tokyo departing on January 15, 2024, and provide the airline and total flight duration.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--16", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Norse Atlantic UK, Air China, 6:10PM 1:40PM(+2), $671, 2 stops (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--17", "dataset": "webvoyager", "query": "Find the cheapest round-trip flight from New York to Paris leaving on December 27, 2023, and returning on January 10, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--17", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Scandinavian Airlines, 5:35PM 1:25PM(+1), $608, 2 stops (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--18", "dataset": "webvoyager", "query": "Compare flight options from New York to Tokyo for a round trip leaving on January 25, 2024, and returning on February 15, 2024, for one adult. Prioritize the comparisons by the shortest travel time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--18", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "United, 11:15AM 3:35PM(+1), duration 14 hr 20 min, $1316 (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--19", "dataset": "webvoyager", "query": "Find the cheapest one-way flight from London to Paris, departing on January 25, 2024. Include the airline, total travel time, and layovers for the chosen flight.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--19", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "easyJet, 6:35 PM - 8:55 PM, $35, nonstop (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Flights--20", "dataset": "webvoyager", "query": "Book a round-trip flight from San Francisco to Berlin, departing on March 5, 2024, and returning on March 12, 2024, and find the option with the shortest total travel time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--20", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Lufthansa United, 2:40PM 12:55PM(+1), 13 hr 15 min", "answer_type": "possible"}}}
{"query_id": "Google Flights--21", "dataset": "webvoyager", "query": "Locate the lowest-priced one-way flight from Tokyo to Sydney for an adult, departing on February 25, 2024, and include the flight duration and number of layovers.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--21", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Jetstar JAL, Qantas, 8:10PM 10:40AM(+1), 12 hr 30 min, 1 stop", "answer_type": "possible"}}}
{"query_id": "Google Flights--22", "dataset": "webvoyager", "query": "Find a round-trip flight from Rio de Janeiro to Los Angeles, leaving on March 15, 2024, and returning on March 22, 2024, and select the option with the least carbon dioxide emissions.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--22", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Gol, Aeromexico, 7:00AM 10:22PM, 746 kg CO2", "answer_type": "possible"}}}
{"query_id": "Google Flights--23", "dataset": "webvoyager", "query": "Search for a one-way flight from Mumbai to Vancouver on February 28, 2024, filtering the results to show only 1-stop flights.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--23", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Air Canada Lufthansa, 4:25AM 4:15PM; Air India, Air Canada, 6:35AM 4:15PM; ...(1 stop)", "answer_type": "possible"}}}
{"query_id": "Google Flights--24", "dataset": "webvoyager", "query": "Compare prices for economy class round-trip flights from Dubai to Rome, departing on March 1, 2024, and returning on March 8, 2024, and select the option with the fewest stops.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--24", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Etihad ITA, 2:25AM 5:45AM, 6 hr 20 min, Nonstop", "answer_type": "possible"}}}
{"query_id": "Google Flights--25", "dataset": "webvoyager", "query": "Find a one-way business class flight from Buenos Aires to Amsterdam on March 10, 2024, and provide the details of the flight with the shortest duration.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--25", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "KLM, 4:25PM 9:40AM(+1), 13 hr 15 min, EZEAMS, Nonstop, $3912, 3251 kg CO2", "answer_type": "possible"}}}
{"query_id": "Google Flights--26", "dataset": "webvoyager", "query": "Search for the cheapest round-trip flights from Bangkok to Madrid, leaving on February 26, 2024, and returning on February 28, 2024, and provide options under $1000.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--26", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Royal Jordanian, 2:20AM 2:05PM", "answer_type": "possible"}}}
{"query_id": "Google Flights--27", "dataset": "webvoyager", "query": "Locate a one-way flight from Johannesburg to Toronto on March 30, 2024, for one adult, and analyze the price trends for the following month.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--27", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "British Airways, American, 7:45PM 6:28PM(+1), <analyze the price graph>", "answer_type": "possible"}}}
{"query_id": "Google Flights--28", "dataset": "webvoyager", "query": "Find the best-priced round-trip flight from Seattle to Paris, departing on February 27, 2024, and returning on March 1, 2024, with a maximum of one stop.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--28", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Icelandair, 2:35PM 12:00PM(+1), 1 stop, $1602", "answer_type": "possible"}}}
{"query_id": "Google Flights--29", "dataset": "webvoyager", "query": "Compare the prices and total travel time of non-stop flights from Mexico City to Frankfurt, departing on March 5, 2024, and returning on March 15, 2024.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--29", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Only one flight, Lufthansa, 9:00PM 2:40PM(+1), 10 hr 40 min", "answer_type": "possible"}}}
{"query_id": "Google Flights--30", "dataset": "webvoyager", "query": "Find the most affordable one-way flight from Cape Town to Singapore, departing on March 20, 2024, and include the airline and total number of layovers.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--30", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Ethiopian, 2:35PM 2:50PM(+1), 1 stop, $633", "answer_type": "possible"}}}
{"query_id": "Google Flights--31", "dataset": "webvoyager", "query": "Find a one-way economy flight from Auckland to Honolulu on March 25, 2024, browse the full page and display a flight option with the most stops.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--31", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Qantas, Qatar Airways, AlaskaEmirates, Mar 25, 4:05PM 11:59PM(+1), most: 3 stops", "answer_type": "possible"}}}
{"query_id": "Google Flights--32", "dataset": "webvoyager", "query": "Search for round-trip flights from Stockholm to Toronto, departing on March 3, 2024, and returning on March 10, 2024, and sort the results to find the shortest total travel time.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--32", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Icelandair, 12:50PM 6:15PM, 11 hr 25 min", "answer_type": "possible"}}}
{"query_id": "Google Flights--33", "dataset": "webvoyager", "query": "Find a one-way flight from Shanghai to Vancouver on February 27, 2024, and compare the options based on carbon dioxide emissions.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--33", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Korean Air, 2:00PM 11:15AM, 13 hr 15 min, 816 kg CO2; EVA AirAir Canada, 8:10PM 6:35PM, 3,672 kg CO2; ...", "answer_type": "possible"}}}
{"query_id": "Google Flights--34", "dataset": "webvoyager", "query": "Compare business class flight options from Lisbon to Singapore for a one-way trip on March 15, 2024, select one of the flights and see which websites offer its booking options. Which one is the cheapest.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--34", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Emirates, 8:45PM 9:15PM(+1), booking options: Emirates, Gotogate, Martigo, Expedia, kiss&fly, eDreams ... cheapest: Gotogate", "answer_type": "possible"}}}
{"query_id": "Google Flights--35", "dataset": "webvoyager", "query": "Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2024, including the total travel time and number of stops.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--35", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "EgyptAir, Lufthansa, Air Canada, 10:05AM 6:20PM, 15 hr 15 min, 1 stop, $644", "answer_type": "possible"}}}
{"query_id": "Google Flights--36", "dataset": "webvoyager", "query": "Search for round-trip flights from Helsinki to New Delhi, departing on March 28, 2024, and returning on April 4, 2024, and filter the results to show only flights under $1000.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--36", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Finnair, 6:00PM 6:05AM(+1), $744 ...", "answer_type": "possible"}}}
{"query_id": "Google Flights--37", "dataset": "webvoyager", "query": "Locate a round-trip flight from Buenos Aires to Beijing, leaving on February 28, 2024, and returning on March 3, 2024, check out one of the options and tell me if the airline for my return flight is the same as my departure flight.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--37", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Lufthansa, 5:50PM 9:30AM(+2), return flight can be Lufthansa, 11:20AM 7:55AM(+1), the same as departure flight", "answer_type": "possible"}}}
{"query_id": "Google Flights--38", "dataset": "webvoyager", "query": "Compare the prices and flight durations for economy class flights from Oslo to Dubai, departing on March 8, 2024, and show the options with no more than two layovers.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--38", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Emirates, 2:10PM 11:55PM, Nonstop ...", "answer_type": "possible"}}}
{"query_id": "Google Flights--39", "dataset": "webvoyager", "query": "Find a one-way flight from Prague to a city in Japan on March 20, 2024, which city in Japan is cheaper to go to, Tokyo or a certain city in Hokkaido?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--39", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Prague to Tokyo, British Airways, Air China, 7:05 AM 1:40 PM(+1)", "answer_type": "possible"}}}
{"query_id": "Google Flights--40", "dataset": "webvoyager", "query": "Browse destinations on the Google Flights homepage from Seattle, look at destinations on a map, and recommend some famous places to travel that are within a reasonable distance and price.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--40", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "Seattle to Las Vegas $21, Seattle to Los Angeles $42", "answer_type": "possible"}}}
{"query_id": "Google Flights--41", "dataset": "webvoyager", "query": "Choose one way business class ticket from Hong Kong to Glacier National Park on 8 March 2024, offering a 1 stop ticket.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/travel/flights/", "metadata": {"original_task_id": "Google Flights--41", "website": "Google Flights", "category": "Google Flights", "additional": {"ground_truth": "United, Operated by Skywest DBA United Express, 10:30PM 12:45PM(+1), 1 stop", "answer_type": "possible"}}}
{"query_id": "Google Map--0", "dataset": "webvoyager", "query": "Find 5 beauty salons with ratings greater than 4.8 in Seattle, WA.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--0", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Beehive Salon, Intermezzo Salon & Spa, Cindy's Beauty Salon, The Red Chair Salon, Ella and Oz Salon", "answer_type": "possible"}}}
{"query_id": "Google Map--1", "dataset": "webvoyager", "query": "Tell me one bus stop that is nearest to the intersection of main street and Amherst street in Altavista.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--1", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "'Amherst and 7th' or 'Main Street Middle'", "answer_type": "golden"}}}
{"query_id": "Google Map--2", "dataset": "webvoyager", "query": "Find Apple Stores close to zip code 90028", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--2", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Apple The Grove, Apple Beverly Center", "answer_type": "possible"}}}
{"query_id": "Google Map--3", "dataset": "webvoyager", "query": "The least amount of walking from Central Park Zoo to the Broadway Theater in New York.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--3", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Approximately 20 min", "answer_type": "possible"}}}
{"query_id": "Google Map--4", "dataset": "webvoyager", "query": "Plan a trip from Boston Logan Airport to North Station.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--4", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Drive via MA-1A S and take about 10 mins (based on real-time traffic conditions)", "answer_type": "possible"}}}
{"query_id": "Google Map--5", "dataset": "webvoyager", "query": "Search for a parking garage near Thalia Hall in Chicago that isn't open 24 hours.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--5", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "SP+ Parking in 1750 W 13th St, Chicago, IL 60608", "answer_type": "possible"}}}
{"query_id": "Google Map--6", "dataset": "webvoyager", "query": "Find all Uniqlo locations in Chicago, IL.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--6", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "UNIQLO State Street", "answer_type": "possible"}}}
{"query_id": "Google Map--7", "dataset": "webvoyager", "query": "Find bus stops in Alanson, MI", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--7", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Alanson, MI (EZ-Mart) Bus Stop", "answer_type": "golden"}}}
{"query_id": "Google Map--8", "dataset": "webvoyager", "query": "Find a place to climb within 2 miles of zip code 90028.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--8", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Hollywood Boulders", "answer_type": "golden"}}}
{"query_id": "Google Map--9", "dataset": "webvoyager", "query": "Find the art gallery that is nearest to Los Angeles Hindu Temple.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--9", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "'Honor Fraser Gallery' or 'Walter Maciel Gallery'.", "answer_type": "golden"}}}
{"query_id": "Google Map--10", "dataset": "webvoyager", "query": "Search for a park in the state of California called Castle Mountains National Monument and find out it's Basic Information.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--10", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "located in Barstow, CA 92311; open 24 hours; phone number is (760) 252-6100", "answer_type": "possible"}}}
{"query_id": "Google Map--11", "dataset": "webvoyager", "query": "Locate a large store in Washington that has kids' and maternity products, also check if it has a parking lot.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--11", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Village Maternity with a wheelchair accessible parking lot", "answer_type": "possible"}}}
{"query_id": "Google Map--12", "dataset": "webvoyager", "query": "Find 5 places that serve burgers near 44012 zip code and sort these 5 places by highest rating.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--12", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Taki's Greek Kitchen - 4.7, Thai Chili - 4.7, Parker's Grille & Tavern - 4.5, Legacy Restaurant & Grille - 4.5, Jake's On the Lake - 4.5", "answer_type": "possible"}}}
{"query_id": "Google Map--13", "dataset": "webvoyager", "query": "Find a parking lot in Gloucester and book a ride from there to North Plymouth, view the map to understand the route better.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--13", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Drive via MA-3 N and I-93 N, about 1.5 hours (based on real-time traffic conditions).", "answer_type": "possible"}}}
{"query_id": "Google Map--14", "dataset": "webvoyager", "query": "Find motorcycle parking near Radio City Music Hall.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--14", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Rising Wolf Garage (should be motorcycle parking)", "answer_type": "possible"}}}
{"query_id": "Google Map--15", "dataset": "webvoyager", "query": "Find daytime only parking nearest to Madison Square Garden. Summarize what people are saying about it. ", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--15", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Quik Park; <reviews>", "answer_type": "possible"}}}
{"query_id": "Google Map--16", "dataset": "webvoyager", "query": "Find EV charging supported parking closest to Smithsonian museum.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--16", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "EVgo Charging Station", "answer_type": "possible"}}}
{"query_id": "Google Map--17", "dataset": "webvoyager", "query": "Search for locksmiths open now but not open 24 hours in Texas City.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--17", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Protech Key and Locksmith (UTC 12:30)", "answer_type": "possible"}}}
{"query_id": "Google Map--18", "dataset": "webvoyager", "query": "Find a route between Chicago to Los Angeles, then print the route details.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--18", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Drive via I-80 W, about 29 hours", "answer_type": "possible"}}}
{"query_id": "Google Map--19", "dataset": "webvoyager", "query": "I will arrive Pittsburgh Airport soon. Provide the name of the Hilton hotel closest to the airport. Then, tell me the the walking time to the nearest supermarket from the hotel.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--19", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Hilton Garden Inn Pittsburgh Airport, walking time around 15min - 30min", "answer_type": "possible"}}}
{"query_id": "Google Map--20", "dataset": "webvoyager", "query": "Find Tesla Destination Charger closest to the National Air and Space Museum.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--20", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Tesla Destination Charger, 1330 Maryland Ave SW, Washington, DC 20024", "answer_type": "possible"}}}
{"query_id": "Google Map--21", "dataset": "webvoyager", "query": "Identify the nearest bus stop to the corner of Elm Street and Oak Street in Massachusetts.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--21", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Elm Street & Oak Street, 18 Bay St, Amesbury, MA 01913", "answer_type": "golden"}}}
{"query_id": "Google Map--22", "dataset": "webvoyager", "query": "Find a Best Buy store near zip code 33139.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--22", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Best Buy, 1131 5th St, Miami Beach, FL 33139", "answer_type": "possible"}}}
{"query_id": "Google Map--23", "dataset": "webvoyager", "query": "Determine the shortest walking route from The Metropolitan Museum of Art to Times Square in New York.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--23", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "around 42 min (1.9 miles) via 7th Ave", "answer_type": "possible"}}}
{"query_id": "Google Map--24", "dataset": "webvoyager", "query": "Plan a journey from San Francisco International Airport to Union Square via driving.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--24", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "via US-101 N, around 19 min (current traffic condition), 14.6 miles", "answer_type": "possible"}}}
{"query_id": "Google Map--25", "dataset": "webvoyager", "query": "Search for a parking facility near the Fox Theater in Detroit that closes at night.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--25", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Park Rite Parking, Closes 11 PM", "answer_type": "possible"}}}
{"query_id": "Google Map--26", "dataset": "webvoyager", "query": "Search for Los Angeles on Google Map, try to print the map as PDF and summarize the information on the map.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--26", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "<Action>, print PDF", "answer_type": "golden"}}}
{"query_id": "Google Map--27", "dataset": "webvoyager", "query": "Locate the Target stores in Atlanta, GA. How many results are shown on the map.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--27", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "8", "answer_type": "possible"}}}
{"query_id": "Google Map--28", "dataset": "webvoyager", "query": "Find the search settings for Google Map, what options are shown on that page?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--28", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Privacy & Safety: Activity, Content, More options; Other settings", "answer_type": "golden"}}}
{"query_id": "Google Map--29", "dataset": "webvoyager", "query": "Identify bus stops in Ypsilanti, MI, list three of them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--29", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Ypsilanti Transit Center; Ellsworth + Michigan; YTC - Stop 5", "answer_type": "possible"}}}
{"query_id": "Google Map--30", "dataset": "webvoyager", "query": "Locate a parking lot near the Brooklyn Bridge that open 24 hours. Review the user comments about it.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--30", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "2-68 Division St Garage, <reviews>", "answer_type": "possible"}}}
{"query_id": "Google Map--31", "dataset": "webvoyager", "query": "First search New York's Central Park Zoo on Google Map, and then find the way to share the map. What is the generated sharing link?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--31", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "share link, https://maps.app.goo.gl/Bnp4Q67dTHoFZ4Lx8", "answer_type": "golden"}}}
{"query_id": "Google Map--32", "dataset": "webvoyager", "query": "Search for plumbers available now but not open 24 hours in Orlando, FL.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--32", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Drain Genie Plumbing Services", "answer_type": "possible"}}}
{"query_id": "Google Map--33", "dataset": "webvoyager", "query": "Check out Denver International Airport's information and tell me: 1) which level has the least proportion in reviews; 2) what are its Accessibility and Amenities.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--33", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "star 2 has the least proportion; Accessibility: Assistive hearing loop; Wheelchair accessible entrance; Wheelchair accessible parking lot; Wheelchair accessible restroom; Wheelchair accessible seating; Amenities: Baggage storage; Wi-Fi; Free Wi-Fi", "answer_type": "golden"}}}
{"query_id": "Google Map--34", "dataset": "webvoyager", "query": "Find a hiking trail within 2 miles of zip code 80202.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--34", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Speer Blvd Park ...", "answer_type": "possible"}}}
{"query_id": "Google Map--35", "dataset": "webvoyager", "query": "Search for a natural reserve in Texas called Big Bend National Park and gather its Basic Information.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--35", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Big Bend National Park, TX; (432) 477-2251; 6PXX+WW Big Bend National Park, Texas; Tickets: $30 ...", "answer_type": "possible"}}}
{"query_id": "Google Map--36", "dataset": "webvoyager", "query": "Identify 5 restaurants serving pizza near the 30309 zip code and rank them by their ratings.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--36", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Varasano's Pizzeria - Buckhead, 4.9; DaVinci's Pizzeria, 4.4; Mellow Mushroom Atlanta - Buckhead, 4.4; Vinny's N.Y. Pizza & Grill - Piedmont, 4.2; Gino's NY Pizza Bar, 4.0", "answer_type": "possible"}}}
{"query_id": "Google Map--37", "dataset": "webvoyager", "query": "Locate a parking area in Salem and find a route from there to Marblehead, including map directions for better understanding.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--37", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Take Lafayette St and Pleasant St to Cross St in Marblehead, 14 min (3.9 mi); Drive to Rowland St, 1 min (0.1 mi)", "answer_type": "possible"}}}
{"query_id": "Google Map--38", "dataset": "webvoyager", "query": "Search for bicycle parking near the Empire State Building.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--38", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Bike Parking, 104 W 38th St, New York, NY 10018", "answer_type": "possible"}}}
{"query_id": "Google Map--39", "dataset": "webvoyager", "query": "Find a route from Miami to New Orleans, and provide the detailed route information.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--39", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Miami, Florida to New Orleans, Louisiana; Get on I-95 N from S Miami Ave, 5 min (1.4 mi); Follow Florida's Tpke, I-75 N and I-10 W to Carondelet St in New Orleans. Take exit 12B from US-90 BUS W, 12 hr 6 min (864 mi); Turn left onto Carondelet St, 3 min (0.6 mi)", "answer_type": "possible"}}}
{"query_id": "Google Map--40", "dataset": "webvoyager", "query": "Find a restaurant in Boston that eats Boston lobster and asks for a rating of 4.6 or higher, and check out what a one-star review says.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/maps/", "metadata": {"original_task_id": "Google Map--40", "website": "Google Map", "category": "Google Map", "additional": {"ground_truth": "Boston Sail Loft, 4.6; one star review: Not sure about the rest of the seafood here since I left immediately after trying their AWFUL Chowder. I won't call it clam chowder since I didn't see a single piece of clam. This stuff was more like if you heated up half & Half then sprinkle dill and salt in it. It's too bad the tourist think this is how it's supposed to taste.", "answer_type": "possible"}}}
{"query_id": "Google Search--0", "dataset": "webvoyager", "query": "Find the initial release date for Guardians of the Galaxy Vol. 3 the movie.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--0", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "May 5, 2023", "answer_type": "golden"}}}
{"query_id": "Google Search--1", "dataset": "webvoyager", "query": "Find Kevin Durant's bio", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--1", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Born on September 29, 1988; Professional basketball player for the Phoenix Suns now.", "answer_type": "possible"}}}
{"query_id": "Google Search--2", "dataset": "webvoyager", "query": "Search for the latest news title about the NBA team the Los Angeles Lakers.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--2", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "News Title (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--3", "dataset": "webvoyager", "query": "Show me a list of comedy movies, sorted by user ratings. Show me the Top 5 movies.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--3", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Life Is Beautiful, Back to the Future, The Intouchables, City Lights, Modern Times", "answer_type": "golden"}}}
{"query_id": "Google Search--4", "dataset": "webvoyager", "query": "Show most played games in Steam. And tell me the number of players in In game at this time", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--4", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Counter-Strike 2, 602,898 players (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--5", "dataset": "webvoyager", "query": "find the score of the latest nba game played by the phoenix suns.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--5", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Suns 120-107 Trail Blazers (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--6", "dataset": "webvoyager", "query": "Browse the monthly trending searches in Columbus.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--6", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "New Year's Eve parties, Christmas markets, january, comedy shows... (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--7", "dataset": "webvoyager", "query": "Find the software requirements for iPhones that support AirDrop's ability to continue transmitting over the web when out of range.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--7", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "IOS 17.1", "answer_type": "golden"}}}
{"query_id": "Google Search--8", "dataset": "webvoyager", "query": "Find the video on YouTube: 'Oscars 2023: Must-See Moments!'. Tell me who the first comment displayed under that video belongs to, and how many thumbs up and replies it has.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--8", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "user: @melvinsmiley5295, 329 thumbs up and 2 replies (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--9", "dataset": "webvoyager", "query": "Show the rating of Prometheus movie on IMDb and Rotten Tomatoes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--9", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "IMDb 7.0/10, Rotten Tomatoes 73%", "answer_type": "possible"}}}
{"query_id": "Google Search--10", "dataset": "webvoyager", "query": "Find the no. 1 weekly charts ranked artist based on Billboard and tell me 10 most played song by this artist until now.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--10", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Taylor Swift, 10 songs (different sources have different results)", "answer_type": "possible"}}}
{"query_id": "Google Search--11", "dataset": "webvoyager", "query": "According to FlightAware, tell me the busiest airport last week and its total arrivals and departures last week.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--11", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "KATL, 13555 total arrivals and departures (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--12", "dataset": "webvoyager", "query": "Find the year that Tom Brady had the most touchdowns in a single seasson.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--12", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "2007", "answer_type": "golden"}}}
{"query_id": "Google Search--13", "dataset": "webvoyager", "query": "What are Jerry Trainor's upcoming projects?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--13", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Strange Planet, 2023", "answer_type": "possible"}}}
{"query_id": "Google Search--14", "dataset": "webvoyager", "query": "Find the retired players the year before last named James Smith and tell me which club he has been a member of from 20202021.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--14", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Yeovil Town", "answer_type": "golden"}}}
{"query_id": "Google Search--15", "dataset": "webvoyager", "query": "Please try to log in to twitter with email: webagenttest@testmail.com and password: test123456. Let me know if the login was successful.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--15", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Not successful", "answer_type": "golden"}}}
{"query_id": "Google Search--16", "dataset": "webvoyager", "query": "How many members are there in the OpenAI community on Reddit, and what is the hottest news right now?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--16", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "880K, ChatGPT will soon have real-time news access (real-time)", "answer_type": "possible"}}}
{"query_id": "Google Search--17", "dataset": "webvoyager", "query": "Tell me the names of Trump's kids", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--17", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Ivanka Trump, Barron Trump, Donald Trump Jr., Tiffany Trump, Eric Trump", "answer_type": "golden"}}}
{"query_id": "Google Search--18", "dataset": "webvoyager", "query": "When and where the most recent World Cup was held, and which team was the winner?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--18", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Qatar; November 20 to December 18, 2022; Argentina", "answer_type": "golden"}}}
{"query_id": "Google Search--19", "dataset": "webvoyager", "query": "What are the first 7 bits of the SHA of the Bert's latest commit on GitHub, and what exactly was changed in that commit.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--19", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "eedf571, Smaller BERT Models", "answer_type": "golden"}}}
{"query_id": "Google Search--20", "dataset": "webvoyager", "query": "Find the release date for the latest \"Fast & Furious\" movie.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--20", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "April 4, 2025", "answer_type": "golden"}}}
{"query_id": "Google Search--21", "dataset": "webvoyager", "query": "Show a list of the top 5 highest-grossing animated movies, sorted by box office earnings.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--21", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "The Lion King (2019); Frozen II (2019); The Super Mario Bros. Movie (2023); Frozen (2013); Incredibles 2 (2018)", "answer_type": "golden"}}}
{"query_id": "Google Search--22", "dataset": "webvoyager", "query": "Browse and list the top three trending topics this month in New York City.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--22", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "trending topics: 1.valentines day events; 2.fashion week; 3.job fairs; 4.march; 5.february", "answer_type": "possible"}}}
{"query_id": "Google Search--23", "dataset": "webvoyager", "query": "Retrieve a short biography of LeBron James.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--23", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "<bio> LeBron James", "answer_type": "possible"}}}
{"query_id": "Google Search--24", "dataset": "webvoyager", "query": "What is the name of the star system closest to the Solar System, and what are the discovered planets in it?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--24", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Alpha Centauri star system; Proxima Centauri b, Proxima Centauri c, and Proxima Centauri d", "answer_type": "golden"}}}
{"query_id": "Google Search--25", "dataset": "webvoyager", "query": "Get the latest news headline about the English Premier League football club Manchester United.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--25", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "eg, Manchester United 1-2 Fulham: Alex Iwobi scores in added time for huge away win", "answer_type": "possible"}}}
{"query_id": "Google Search--26", "dataset": "webvoyager", "query": "Identify the hardware requirements for using the latest version of Adobe Photoshop on a Mac.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--26", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "RAM 8 GB; Processor: Multicore Intel® or Apple Silicon processor (2 GHz or faster processor with SSE 4.2 or later) with 64-bit support; Operating system, macOS Big Sur (version 11.0) or later; Graphics card, GPU with Metal support, 1.5 GB of GPU memory ...", "answer_type": "possible"}}}
{"query_id": "Google Search--27", "dataset": "webvoyager", "query": "Check the current air quality index in Paris.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--27", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Current PM2.5 AQI\t43", "answer_type": "possible"}}}
{"query_id": "Google Search--28", "dataset": "webvoyager", "query": "Check the IMDb and Metacritic scores of the movie \"Inception.\"", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--28", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "IMDb score 8.8, Metacritic score 74%.", "answer_type": "golden"}}}
{"query_id": "Google Search--29", "dataset": "webvoyager", "query": "Find out the current world record for the men's 100m sprint.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--29", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "9.58s held by Usain Bolt of Jamaica", "answer_type": "golden"}}}
{"query_id": "Google Search--30", "dataset": "webvoyager", "query": "Find the current number one artist on the Spotify Global Top 50 chart and list his/her top 10 songs as of now.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--30", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "real-time, Benson Boone; Beautiful Things, In The Stars, GHOST TOWN, To Love Someone, Before You, NIGHTS LIKE THESE, Sugar Sweet, ROOM FOR 2, Little Runaway, What Was", "answer_type": "possible"}}}
{"query_id": "Google Search--31", "dataset": "webvoyager", "query": "Discover which year Cristiano Ronaldo scored the most goals in a single season.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--31", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "2014-15 season", "answer_type": "golden"}}}
{"query_id": "Google Search--32", "dataset": "webvoyager", "query": "Find out where and when the most recent UEFA Champions League final was held, and which team won.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--32", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Manchester City Football Club; June 10, 2023; Atatürk Olympic Stadium, Istanbul, Turkey", "answer_type": "possible"}}}
{"query_id": "Google Search--33", "dataset": "webvoyager", "query": "Find and copy the SHA of the latest commit in the TensorFlow repository on GitHub, then find a textbox to paste and tell me what the SHA is.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--33", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "<SHA> of latest Tensorflow", "answer_type": "possible"}}}
{"query_id": "Google Search--34", "dataset": "webvoyager", "query": "Determine the distance from Earth to Mars as of today's date.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--34", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "345,957,886 kilometers", "answer_type": "possible"}}}
{"query_id": "Google Search--35", "dataset": "webvoyager", "query": "Look up the latest research paper related to black holes published in the journal \"Nature Astronomy\".", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--35", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "eg, 19 February 2024, The accretion of a solar mass per day by a 17-billion solar mass black hole", "answer_type": "possible"}}}
{"query_id": "Google Search--36", "dataset": "webvoyager", "query": "Search for the most recent Nobel Prize winner in Physics and their contribution to the field.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--36", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "French-Swedish physicist Anne L'Huillier, French scientist Pierre Agostini, and Hungarian-born Frank Krausz. <summary>", "answer_type": "possible"}}}
{"query_id": "Google Search--37", "dataset": "webvoyager", "query": "Find the current top 3 super-earth planets and give a brief introduction to them.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--37", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Gliese 667Cc, Kepler-22b, Kepler-69c", "answer_type": "possible"}}}
{"query_id": "Google Search--38", "dataset": "webvoyager", "query": "Search for the next visible solar eclipse in North America and its expected date, and what about the one after that.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--38", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "next: April 8, 2024. The one after that will take place on August 23, 2044.", "answer_type": "possible"}}}
{"query_id": "Google Search--39", "dataset": "webvoyager", "query": "Identify the top-10 trending travel destination for 2024 through a blog, how many of them are in Asian.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--39", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "Tokyo, Japan; Seoul, South Korea; Halong Bay, Vietnam; Palawan Island, Philippines; Sapa, Vietnam; Bogota, Colombia; Pattaya, Thailand; Alajuela, Costa Rica; Phnom Penh, Cambodia; Kuala Lumpur, Malaysia. Asian: Tokyo, Japan; Seoul, South Korea; Halong Bay, Vietnam; Palawan Island, Philippines; Sapa, Vietnam; Kuala Lumpur, Malaysia; Phnom Penh, Cambodia", "answer_type": "possible"}}}
{"query_id": "Google Search--40", "dataset": "webvoyager", "query": "Look up the elevation of Mount Kilimanjaro on Google Search.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--40", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "19,341 feet (5,895 meters)", "answer_type": "golden"}}}
{"query_id": "Google Search--41", "dataset": "webvoyager", "query": "Look up the current statistics of air pollution level in Los Angeles using Google Search.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--41", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "current air pollution level in Los Angeles", "answer_type": "possible"}}}
{"query_id": "Google Search--42", "dataset": "webvoyager", "query": " Use Google Search to find an article that explains the major differences between American English and British English.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.google.com/", "metadata": {"original_task_id": "Google Search--42", "website": "Google Search", "category": "Google Search", "additional": {"ground_truth": "The main difference between British English and American English is in pronunciation. Some words are also different in each variety of English, and there are also a few differences in the way they use grammar. Here are five of the most common grammatical differences between British and American English. 1. Present perfect and past simple; 2. got and gotten; 3. Verb forms with collective nouns; 4. have and take; 5. shall", "answer_type": "possible"}}}
{"query_id": "Huggingface--0", "dataset": "webvoyager", "query": "Find a pre-trained natural language processing model on Hugging Face that can perform sentiment analysis, and make sure the model's last update is within March 2023.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--0", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "distilroberta-finetuned-financial-news-sentiment-analysis", "answer_type": "possible"}}}
{"query_id": "Huggingface--1", "dataset": "webvoyager", "query": "Use the Huggingface Inference API to generate a short story about a dragon and a wizard.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--1", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<story> (generated by Inference API)", "answer_type": "possible"}}}
{"query_id": "Huggingface--2", "dataset": "webvoyager", "query": "Discover three new and popular open-source NLP models for language translation released in the past month on Huggingface.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--2", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<model 1>; <model 2>; <model 3>; (last month, recently created)", "answer_type": "possible"}}}
{"query_id": "Huggingface--3", "dataset": "webvoyager", "query": "Look up a model with a license of cc-by-sa-4.0 with the most likes on Hugging face.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--3", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "replit/replit-code-v1-3b", "answer_type": "possible"}}}
{"query_id": "Huggingface--4", "dataset": "webvoyager", "query": "Locate an open-source conversational AI model on Hugging Face, trained in English and list its main features and applications.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--4", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "TinyLlama/TinyLlama-1.1B-Chat-v1.0; TinyLlama can be plugged and played in many open-source projects built upon Llama. Besides, TinyLlama is compact with only 1.1B parameters; Applications: cater to a multitude of applications demanding a restricted computation and memory footprint.", "answer_type": "possible"}}}
{"query_id": "Huggingface--5", "dataset": "webvoyager", "query": "Find a model released on Hugging Face for recipe generation. Retrieve the information of the model, including its name, model size and tensor type.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--5", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "flax-community/t5-recipe-generation; 223M params; F32", "answer_type": "possible"}}}
{"query_id": "Huggingface--6", "dataset": "webvoyager", "query": "Find the model sentence-transformers/all-MiniLM-L6-v2 and use the Inference API on the webpage to get the similarity of the following two sentences: 'Tomorrow is Sunday', 'Eat a burger on Sunday'.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--6", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "0.550", "answer_type": "golden"}}}
{"query_id": "Huggingface--7", "dataset": "webvoyager", "query": "Which is the most downloaded audio related dataset on Hugging face currently.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--7", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "autumnjohnson/ceti_audio", "answer_type": "golden"}}}
{"query_id": "Huggingface--8", "dataset": "webvoyager", "query": "Retrieve an example of a pre-trained language model in natural language processing and identify the tasks it is specifically designed for, like translation or text summarization.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--8", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "microsoft/phi-2; Text generation", "answer_type": "possible"}}}
{"query_id": "Huggingface--9", "dataset": "webvoyager", "query": "Find the most download machine translation model on Huggingface which focuses on English and Japanese (en-ja) and report the evaluation metrics stated for it.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--9", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Helsinki-NLP/opus-mt-ja-en; BLEU 41.7\t; chr-F 0.589", "answer_type": "golden"}}}
{"query_id": "Huggingface--10", "dataset": "webvoyager", "query": "Open space: argilla/notux-chat-ui and interact with it by asking it 'which team trained you'. What is its answer.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--10", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Mistral AI team", "answer_type": "golden"}}}
{"query_id": "Huggingface--11", "dataset": "webvoyager", "query": "Identify the latest updated image to video model available on Huggingface and summarize its main features.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--11", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "motexture/VSeq2VSeq; Text to video diffusion model with variable length frame conditioning for infinite length video generation.", "answer_type": "possible"}}}
{"query_id": "Huggingface--12", "dataset": "webvoyager", "query": "Find the most recently updated machine learning model on Huggingface which focuses on Error Correction.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--12", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Jaagup/errors_corrections_min3", "answer_type": "possible"}}}
{"query_id": "Huggingface--13", "dataset": "webvoyager", "query": "Search for LLaMA in the huggingface doc, what type is the spaces_between_special_tokens parameter in LlamaTokenizer and what is its default value.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--13", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "bool, defaults to False", "answer_type": "golden"}}}
{"query_id": "Huggingface--14", "dataset": "webvoyager", "query": "How much is the Pro account of Hugging face for a month and what are the features?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--14", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "$9/month; Pro Account: Get a PRO badge on your profile, Early access to new features, Unlock Inference for PROs, Higher tier for AutoTrain", "answer_type": "golden"}}}
{"query_id": "Huggingface--15", "dataset": "webvoyager", "query": "Identify the most downloaded models on Hugging face that use the PaddlePaddle library.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--15", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "junnyu/roformer_chinese_base", "answer_type": "possible"}}}
{"query_id": "Huggingface--16", "dataset": "webvoyager", "query": "Find information on the latest (as of today's date) pre-trained language model on Huggingface suitable for text classification and briefly describe its intended use case and architecture.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--16", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<model> (today, text classification)", "answer_type": "possible"}}}
{"query_id": "Huggingface--17", "dataset": "webvoyager", "query": "Find the most recently updated open-source project related to natural language processing on the Huggingface platform. Provide the project's name, creator, and a brief description of its functionality.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--17", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<model>; <creator>; <description> (recent, NLP)", "answer_type": "possible"}}}
{"query_id": "Huggingface--18", "dataset": "webvoyager", "query": "Look up TRL's forward modelling in the hugging face documentation on how to add a margin to a loss.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--18", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "As in the Llama 2 paper, you can add a margin to the loss by adding a margin column to the dataset. The reward collator will automatically pass it through and the loss will be computed accordingly.", "answer_type": "golden"}}}
{"query_id": "Huggingface--19", "dataset": "webvoyager", "query": "Explore and summarize the features of the most recent open-source NLP model released by Hugging Face for English text summarization.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--19", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<model> (Most recent, English text summarization)", "answer_type": "possible"}}}
{"query_id": "Huggingface--20", "dataset": "webvoyager", "query": "Locate a pre-trained natural language processing model on Hugging Face that specializes in named entity recognition (NER), confirm that the model was last updated in 2022 and has 1M+ downloads.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--20", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "ckiplab/bert-base-chinese-ner", "answer_type": "golden"}}}
{"query_id": "Huggingface--21", "dataset": "webvoyager", "query": "Look up the tour about how to use the 'pipeline' feature in the Hugging Face Transformers library for sentiment analysis, and identify the default model it uses.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--21", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "from transformers import pipeline \\n classifier = pipeline(\"sentiment-analysis\") \\n classifier(\"We are very happy to show you the 🤗 Transformers library.\") ... distilbert/distilbert-base-uncased-finetuned-sst-2-english", "answer_type": "golden"}}}
{"query_id": "Huggingface--22", "dataset": "webvoyager", "query": "Identify the steps to convert a PyTorch model to TensorFlow using the Hugging Face Transformers library as described in their documentation.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--22", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<summary> of https://huggingface.co/docs/transformers/main/en/add_tensorflow_model#4-model-implementation", "answer_type": "possible"}}}
{"query_id": "Huggingface--23", "dataset": "webvoyager", "query": "Identify three innovative and widely recognized open-source NLP models for automatic speech recognition released in the past month on Huggingface.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--23", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "eg, openai/whisper-large-v3", "answer_type": "possible"}}}
{"query_id": "Huggingface--24", "dataset": "webvoyager", "query": "Search for a model on Hugging Face with an Apache-2.0 license that has received the highest number of likes.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--24", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "mistralai/Mixtral-8x7B-Instruct-v0.1", "answer_type": "golden"}}}
{"query_id": "Huggingface--25", "dataset": "webvoyager", "query": "In the Hugging Face documentation, find the tutorial on loading adapters with PEFT, tell me how to load in 8bit or 4bit.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--25", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Add the load_in_8bit or load_in_4bit parameters to from_pretrained() and set device_map=\"auto\" to effectively distribute the model to your hardware. (Or use code)", "answer_type": "golden"}}}
{"query_id": "Huggingface--26", "dataset": "webvoyager", "query": "Identify a model on Hugging Face designed for generating travel chats. Obtain information about the model, including its name, size and training framwork.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--26", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "PhilipTheGreat/DiabloGPT-small-Traveller, GPT2LMHeadModel, 510 MB", "answer_type": "possible"}}}
{"query_id": "Huggingface--27", "dataset": "webvoyager", "query": "Determine the most downloaded dataset related to Text Retrieval in NLP on Hugging Face.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--27", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "nlphuji/mscoco_2014_5k_test_image_text_retrieval", "answer_type": "golden"}}}
{"query_id": "Huggingface--28", "dataset": "webvoyager", "query": "Retrieve an example of a pre-trained model on Hugging Face that is optimized for question answering tasks and detail the languages it supports.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--28", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "eg, /roberta-base-squad2, language: English", "answer_type": "possible"}}}
{"query_id": "Huggingface--29", "dataset": "webvoyager", "query": "Summarize the description of the recent open-source NLP model released on Hugging Face for medical summarization.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--29", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "<summary> of Falconsai/medical_summarization (T5 Large for Medical Text Summarization)", "answer_type": "possible"}}}
{"query_id": "Huggingface--30", "dataset": "webvoyager", "query": "Identify the most downloaded English-Chinese (en-zh) machine translation model on Huggingface and report its latest performance metrics and usage guidelines.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--30", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Helsinki-NLP/opus-mt-en-zh; testset, BLEU, chr-F: Tatoeba-test.eng.zho, 31.4, 0.268", "answer_type": "golden"}}}
{"query_id": "Huggingface--31", "dataset": "webvoyager", "query": "Identify the latest machine learning model on Huggingface that specializes in detecting fake news, including the date of its last update.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--31", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "eg, Hawat/make-believe-fakenews-detection, Updated Jan 16 2024", "answer_type": "possible"}}}
{"query_id": "Huggingface--32", "dataset": "webvoyager", "query": "On the Hugging Face website, search for the model 'GPT-J-6B' and find the 'temperature' parameter in its settings. What is the default value of this parameter?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--32", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "\"temperature\": 1.0", "answer_type": "golden"}}}
{"query_id": "Huggingface--33", "dataset": "webvoyager", "query": "List three hugging face docs. How many GitHub stars have they earned so far?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--33", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "eg, Transformers - 119,672 stars, Diffusers - 20,775 stars, Datasets - 17,960 stars.", "answer_type": "possible"}}}
{"query_id": "Huggingface--34", "dataset": "webvoyager", "query": "List the benefits of hugging face classroom mentioned on Hugging face website.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--34", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Empower your students with state-of-the-art resources; Give your students unlimited access to modern machine learning tools; Easily manage your classroom ...", "answer_type": "possible"}}}
{"query_id": "Huggingface--35", "dataset": "webvoyager", "query": "Find the latest Diffusion-related blog on Hugging Face, and read its intro or overview section to roughly summarize the content of the blog.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--35", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "eg, Accelerating SD Turbo and SDXL Turbo Inference with ONNX Runtime and Olive, Published January 15, 2024, <summary>", "answer_type": "possible"}}}
{"query_id": "Huggingface--36", "dataset": "webvoyager", "query": "Summarize all the payment plans and their advantages in huggingface pricing.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--36", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "summary of https://huggingface.co/pricing", "answer_type": "possible"}}}
{"query_id": "Huggingface--37", "dataset": "webvoyager", "query": "Browse the daily paper on Hugging Face. What is the title of the first article, how many upvotes has it received, and is there any related model or data release?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--37", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "huggingface posts, https://huggingface.co/posts", "answer_type": "possible"}}}
{"query_id": "Huggingface--38", "dataset": "webvoyager", "query": "Investigate the 'transformers' library in the Hugging Face documentation, focusing on how to add new tokens to a tokenizer.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--38", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "use add_tokens method", "answer_type": "golden"}}}
{"query_id": "Huggingface--39", "dataset": "webvoyager", "query": "Investigate in the Hugging Face documentation how to utilize the 'Trainer' API for training a model on a custom dataset, and note the configurable parameters of the Trainer class.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--39", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Trainer example, https://huggingface.co/docs/evaluate/main/en/transformers_integrations#trainer", "answer_type": "possible"}}}
{"query_id": "Huggingface--40", "dataset": "webvoyager", "query": "Check out Text Embeddings Inference in Hugging face's Doc to summarise the strengths of the toolkit.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--40", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "Streamlined Deployment; Efficient Resource Utilization; Dynamic Batching ...", "answer_type": "possible"}}}
{"query_id": "Huggingface--41", "dataset": "webvoyager", "query": "What is the current Text-to-3D model with the highest number of downloads and tell me are there Spaces that use the model.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--41", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "openai/shap-e; there are Spaces like hysts/Shap-E ...", "answer_type": "golden"}}}
{"query_id": "Huggingface--42", "dataset": "webvoyager", "query": "Check the Dataset Viewer for ai2lumos/lumos_complex_qa_plan_onetime on Hugging face. what is the content corresponding to user in the first message?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://huggingface.co/", "metadata": {"original_task_id": "Huggingface--42", "website": "Huggingface", "category": "Huggingface", "additional": {"ground_truth": "content: Please provide a reasonable subgoal-based plan to solve the given task.\\nTask: What was the opening date of the museum dedicated to the war that, after it occurred, Boston became one of the wealthiest international ports?; Initial Environment Description: None.", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--0", "dataset": "webvoyager", "query": "derivative of x^2 when x=5.6", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--0", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "11.2", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--1", "dataset": "webvoyager", "query": "Give a constraint on the set of inequalities for the inner region of the pentagram.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--1", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "2 a + 3 sqrt(5) x + 5 x>=sqrt(2 (5 + sqrt(5))) y AND 2 a + sqrt(50 + 22 sqrt(5)) y>=(5 + sqrt(5)) x AND sqrt(5) a + 2 sqrt(5) x + 2 sqrt(5 + 2 sqrt(5)) y <= a ... (Search inner region of the pentagram on Wolfram)", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--2", "dataset": "webvoyager", "query": "Calculate 3^71 and retain 5 significant figures in scientific notation.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--2", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "7.5095 * 10^33", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--3", "dataset": "webvoyager", "query": "Let g(x) be the integral of x^2 cos(2x). Write the expression of g(x).", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--3", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "1/4 (2 x cos(2 x) + (-1 + 2 x^2) sin(2 x)) + Constant", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--4", "dataset": "webvoyager", "query": "Pack 24 circles in a circle radius r. Compare Densest known packing and Square packing. Then tell me the radius of the inner circles.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--4", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "Densest known packing: 0.176939r; Square packing: 0.163961r", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--5", "dataset": "webvoyager", "query": "Show the solution of y\"(z) + sin(y(z)) = 0 from wolframalpha.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--5", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "y(z) = ± 2 am(1/2 sqrt((c_1 + 2) (z + c_2)^2), 4/(c_1 + 2)), am(x, m) is the Jacobi amplitude function", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--6", "dataset": "webvoyager", "query": "Simplify x^5-20x^4+163x^3-676x^2+1424x-1209 so that it has fewer items.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--6", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "7 + 3 (-4 + x)^3 + (-4 + x)^5", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--7", "dataset": "webvoyager", "query": "Give the final angle and final length after 6s of a Spring pendulum with spring equilibrium length=0.12m, initial length=0.24m, initial angle=80deg, mass=1kg, spring constant=120 N/m .", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--7", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "-73.26° from vertical; 0.252 m", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--8", "dataset": "webvoyager", "query": "Give 12 lbs of 4-cyanoindole, converted to molar and indicate the percentage of C, H, N.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--8", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "approximately: 38.3 mol; 76.0% C; 4.3% H; 19.7% N", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--9", "dataset": "webvoyager", "query": "Annual energy production of Diablo Canyon 2 in 2010.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--9", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "9752 GW h/yr (gigawatt hours per year)", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--10", "dataset": "webvoyager", "query": "Give the geomagnetic field on June 20, 2023 in Oslo.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--10", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "geomagnetic field, total 51.5 uT;", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--11", "dataset": "webvoyager", "query": "Show the electrical resistivity of UNS A92024 and UNS G10800 at 20 degrees Celsius.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--11", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "UNS A92024: 4.9×10^-6 Ω cm (ohm centimeters) (at 20 °C); UNS G10800: 1.8×10^-5 Ω cm (ohm centimeters)", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--12", "dataset": "webvoyager", "query": "Which character in unicode 8900 to 8920 looks like a snowflake", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--12", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "8902 (U+22C6)", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--13", "dataset": "webvoyager", "query": "What is 10,000 US dollars worth now in 1980 and in 1970?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--13", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "approximately: 36430; 77325", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--14", "dataset": "webvoyager", "query": "Compare the total Calories: whopper vs baconator vs big mac. Assume that each serving of food is 300g.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--14", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "approximately: Whopper, 657 Cal; Baconator, 902 Cal; Big Mac, 730 Cal", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--15", "dataset": "webvoyager", "query": "Show the blood relationship fraction between you and your father's mother's sister's son.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--15", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "3.125%", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--16", "dataset": "webvoyager", "query": "Weight lose for a male with current weight 90 kg, 40 year old, 175 cm. If he intakes 1500 calories every day, how long will it take to lose 17 kg.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--16", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "intake 1500 Cal/d for 3 months 12 days to lose 17 kg with a sedentary activity level", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--17", "dataset": "webvoyager", "query": "Show the average price of movie ticket in Providence, Nashville, Boise in 2023.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--17", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "Providence $13.81; Nashville $12.65; Boise $12.65", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--18", "dataset": "webvoyager", "query": "Plot Albert Einstein curve with Parametric equations.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--18", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "show a Albert Einstein curve with parametric equations", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--19", "dataset": "webvoyager", "query": "Standing in the sun from 11:00 am with SPF 5 in Australia. Approximate time to sunburn for each skin type.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--19", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "<sunborn time> (real-time date)", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--20", "dataset": "webvoyager", "query": "Compute the integral of 3e^(2x) from x=0 to x=5.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--20", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "approximately 33038", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--21", "dataset": "webvoyager", "query": "Calculate (1+0.1*i)^8 + (10.2*i)^8 where i is a complex number.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--21", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "approximately 0.717183 - 0.425258 i", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--22", "dataset": "webvoyager", "query": "Determine the area of a regular hexagon with a side length of 7 cm.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--22", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "127.306 cm^2 or 147 \\sqrt(3) / 2", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--23", "dataset": "webvoyager", "query": "Calculate the population growth rate of Canada from 2020 to 2023 using Wolfram Alpha.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--23", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "mean population growth rate of Canada from 2020 to 2023 is 0.9998% per year", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--24", "dataset": "webvoyager", "query": "Solve the differential equation y''(t) - 2y'(t) + 10y(t) = 0 and display its general solution.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--24", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "y(t) = c1 e^t sin(3t) + c2 e^t cos(3t)", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--25", "dataset": "webvoyager", "query": "Calculate the final position and velocity of a projectile launched at 45 degrees with an initial speed of 30 m/s after 3 seconds.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--25", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "if g=9.81; x = 63.64m, y = 19.49m; Vx = 21.21 m/s, Vy = -8.22 m/s", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--26", "dataset": "webvoyager", "query": "Convert 15 kilograms of sulfuric acid to moles and display the percentage composition of H, S, and O by weight.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--26", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "if no H2O, 153 moles, hydrogen (H), 32.69% for sulfur (S), and 65.25% for oxygen (O).", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--27", "dataset": "webvoyager", "query": "Display the thermal conductivity of Copper (Cu) and Aluminum (Al) at 25 degrees Celsius.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--27", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "401.2 W/(m K); 236.9 W/(m K)", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--28", "dataset": "webvoyager", "query": "Identify the character in Unicode range 9632 to 9650 that represents a hollow parallelogram.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--28", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "9649 or U+25B1", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--29", "dataset": "webvoyager", "query": "Create a plot of cat curve using wolfram alpha.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--29", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "any cat curve", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--30", "dataset": "webvoyager", "query": "Calculate the estimated time to sunburn for different skin types when exposed to the sun at 1:00 pm with SPF 1 in Brazil.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--30", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "real-time, search query: sunburn 1:00 pm with SPF 1 in Brazil", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--31", "dataset": "webvoyager", "query": "Using Wolfram Alpha, determine the current temperature and wind speed in Chicago, IL.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--31", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "real-time, search query: current temperature and wind speed in Chicago, IL.", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--32", "dataset": "webvoyager", "query": "Print all prime numbers between 1000 and 1200 using Wolfram alpha.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--32", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193.", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--33", "dataset": "webvoyager", "query": "Identify the electrical energy output of a hydroelectric power plant named Itaipu Dam in 2023 using Wolfram Alpha.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--33", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "89.5 TWh (terawatt hours)", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--34", "dataset": "webvoyager", "query": "Calculate the mass of Jupiter compared to Earth using Wolfram Alpha. Also, find the length of one day on Jupiter.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--34", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "317.8 times that of Earth, and the length of one day on Jupiter is approximately 9.925 hours", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--35", "dataset": "webvoyager", "query": "Calculate the determinant of a 6x6 Hilbert matrix.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--35", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "1/186313420339200000", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--36", "dataset": "webvoyager", "query": "Determine the convergence or divergence of the series Σ (n=1 to ∞) of 1/(n^3 + 1).", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--36", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "converges", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--37", "dataset": "webvoyager", "query": "How many days are there between February 12, 2024 and August 9, 2050?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--37", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "9675", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--38", "dataset": "webvoyager", "query": "Compute the length of a curve defined by y = 2x^3 - 3x^2 + 4x - 5 from x = 0 to x = 3.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--38", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "around 39.2", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--39", "dataset": "webvoyager", "query": "Use Wolfram alpha to write the expression of the ellipse x^2 + 3 y^2 = 4 rotated 33 degrees counterclockwise.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--39", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "x^2(\\sin(\frac{2π}{15}) - 2) + 2xy \\cos(\frac{2π}{15}) + 4 = y^2(2 + \\sin(\frac{2π}{15}))", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--40", "dataset": "webvoyager", "query": "Approximate amount of fat burned by a 28yo, 172cm tall, 70kg woman running for 30min at a pace of 6min/mile.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--40", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "around 0.078 kg", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--41", "dataset": "webvoyager", "query": "What is the approximate Heart Rate Reserve of a 50 year old man who has a heart rate of 60bpm at rest.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--41", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "110 bpm", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--42", "dataset": "webvoyager", "query": "What is the raw memory of a 100.2\" * 123.5\" true colour picture at 72 ppi?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--42", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "192 MB", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--43", "dataset": "webvoyager", "query": "A polyominoes of order 6 means you have 6 identical squares to combine different shapes (2-sided). How many combinations are there? Looking at all the shapes in the result, how many of them have only 2 rows in total?", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--43", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "35; 12", "answer_type": "golden"}}}
{"query_id": "Wolfram Alpha--44", "dataset": "webvoyager", "query": "Solve the ODE, g' + cos(g) = 0, if there is a constant in the result, determine the value of the constant by the condition that g(0) = 1.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--44", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "g(x) = 2 cos^(-1)((sinh(x) (cos(1/2) - sin(1/2)) + cosh(x) (cos(1/2) - sin(1/2)) + sin(1/2) + cos(1/2))/(sqrt(2) sqrt(-(sin(1) - 1) sinh(2 x) - (sin(1) - 1) cosh(2 x) + 1 + sin(1)))) OR ...", "answer_type": "possible"}}}
{"query_id": "Wolfram Alpha--45", "dataset": "webvoyager", "query": "A 175cm tall, 85kg, 40yo man climbs 2500 steps at about 18cm per step and 40 steps per minute. summarise the Metabolic properties.", "graders": ["webvoyager_grader", "fara_combined"], "start_url": "https://www.wolframalpha.com/", "metadata": {"original_task_id": "Wolfram Alpha--45", "website": "Wolfram Alpha", "category": "Wolfram Alpha", "additional": {"ground_truth": "energy expenditure | 2720 kJ (kilojoules); average energy expenditure per step | 1.1 kJ/step (kilojoules per step); fat burned | 0.0842 kg (kilograms); oxygen consumption | 129.9 L (liters); metabolic equivalent | 7 metabolic equivalents", "answer_type": "golden"}}}