#!/usr/bin/env python3 """ Dataset Converter for Web Agent Eval System Converts benchmark datasets (WebVoyager, Mind2Web) to unified JSONL format. Usage: python converter.py webvoyager [--reference ] python converter.py mind2web python converter.py online-mind2web Output format (one JSON per line): { "query_id": "unique-id", "dataset": "webvoyager|mind2web|online-mind2web", "query": "task instruction", "graders": ["webvoyager_grader"], "start_url": "https://...", "metadata": { "original_task_id": "...", "website": "...", "category": "...", "additional": {...} } } """ import json import argparse import sys from pathlib import Path from typing import Optional def convert_webvoyager( input_path: str, output_path: str, reference_path: Optional[str] = None ) -> int: """ Convert WebVoyager dataset to unified format. WebVoyager format (JSONL): {"web_name": "Allrecipes", "id": "Allrecipes--0", "ques": "...", "web": "https://..."} Reference answers format (JSON): { "Allrecipes": { "answers": [{"id": 0, "type": "golden", "ans": "..."}] } } """ # Load reference answers if provided reference_answers = {} if reference_path: with open(reference_path, 'r', encoding='utf-8') as f: ref_data = json.load(f) for website, data in ref_data.items(): if 'answers' in data: for ans in data['answers']: key = f"{website}--{ans['id']}" reference_answers[key] = { 'answer': ans.get('ans'), 'type': ans.get('type', 'unknown') } count = 0 with open(input_path, 'r', encoding='utf-8') as infile, \ open(output_path, 'w', encoding='utf-8') as outfile: for line_num, line in enumerate(infile, 1): line = line.strip() if not line: continue try: data = json.loads(line) except json.JSONDecodeError as e: print(f"Warning: Skipping line {line_num}, invalid JSON: {e}", file=sys.stderr) continue task_id = data.get('id', f'webvoyager-{line_num}') # Build unified format unified = { 'query_id': task_id, 'dataset': 'webvoyager', 'query': data.get('ques', ''), 'graders': ['webvoyager_grader'], 'start_url': data.get('web'), 'metadata': { 'original_task_id': task_id, 'website': data.get('web_name'), 'category': data.get('web_name'), # WebVoyager uses website as category } } # Add reference answer if available if task_id in reference_answers: unified['metadata']['additional'] = { 'ground_truth': reference_answers[task_id]['answer'], 'answer_type': reference_answers[task_id]['type'] } outfile.write(json.dumps(unified, ensure_ascii=False) + '\n') count += 1 return count def convert_mind2web(input_path: str, output_path: str) -> int: """ Convert Mind2Web dataset to unified format. Mind2Web format (JSON array or JSONL): { "annotation_id": "unique-id", "website": "website-name", "domain": "domain", "subdomain": "subdomain", "confirmed_task": "task description", "action_reprs": ["action1", "action2", ...] } """ # Try loading as JSON array first, fall back to JSONL tasks = [] with open(input_path, 'r', encoding='utf-8') as f: content = f.read().strip() if content.startswith('['): # JSON array tasks = json.loads(content) else: # JSONL for line in content.split('\n'): if line.strip(): tasks.append(json.loads(line)) count = 0 with open(output_path, 'w', encoding='utf-8') as outfile: for task in tasks: task_id = task.get('annotation_id', f'mind2web-{count}') website = task.get('website', '') # Extract start URL from website field or construct from domain start_url = None if website.startswith('http'): start_url = website elif task.get('domain'): start_url = f"https://{task.get('domain')}" unified = { 'query_id': task_id, 'dataset': 'mind2web', 'query': task.get('confirmed_task', ''), 'graders': ['mind2web_judge'], 'start_url': start_url, 'metadata': { 'original_task_id': task_id, 'website': website, 'category': task.get('domain'), 'additional': { 'subdomain': task.get('subdomain'), 'action_reprs': task.get('action_reprs', []), 'reference_length': len(task.get('actions', [])) } } } outfile.write(json.dumps(unified, ensure_ascii=False) + '\n') count += 1 return count def convert_online_mind2web(input_path: str, output_path: str) -> int: """ Convert Online-Mind2Web dataset to unified format. Online-Mind2Web format: { "task_id": "unique-id", "website": "https://...", "confirmed_task": "task description", "level": "easy|medium|hard", "reference_length": 5 } """ # Try loading as JSON array first, fall back to JSONL tasks = [] with open(input_path, 'r', encoding='utf-8') as f: content = f.read().strip() if content.startswith('['): tasks = json.loads(content) else: for line in content.split('\n'): if line.strip(): tasks.append(json.loads(line)) count = 0 with open(output_path, 'w', encoding='utf-8') as outfile: for task in tasks: task_id = task.get('task_id', f'online-mind2web-{count}') unified = { 'query_id': task_id, 'dataset': 'online-mind2web', 'query': task.get('confirmed_task', ''), 'graders': ['mind2web_judge'], 'start_url': task.get('website'), 'metadata': { 'original_task_id': task_id, 'website': task.get('website'), 'category': task.get('level', 'unknown'), 'additional': { 'level': task.get('level'), 'reference_length': task.get('reference_length') } } } outfile.write(json.dumps(unified, ensure_ascii=False) + '\n') count += 1 return count def main(): parser = argparse.ArgumentParser( description='Convert benchmark datasets to unified JSONL format', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__ ) subparsers = parser.add_subparsers(dest='dataset', required=True) # WebVoyager subcommand wv_parser = subparsers.add_parser('webvoyager', help='Convert WebVoyager dataset') wv_parser.add_argument('input', help='Input JSONL file (WebVoyager_data.jsonl)') wv_parser.add_argument('output', help='Output JSONL file') wv_parser.add_argument('--reference', '-r', help='Reference answers JSON file') # Mind2Web subcommand m2w_parser = subparsers.add_parser('mind2web', help='Convert Mind2Web dataset') m2w_parser.add_argument('input', help='Input JSON/JSONL file') m2w_parser.add_argument('output', help='Output JSONL file') # Online-Mind2Web subcommand om2w_parser = subparsers.add_parser('online-mind2web', help='Convert Online-Mind2Web dataset') om2w_parser.add_argument('input', help='Input JSON/JSONL file') om2w_parser.add_argument('output', help='Output JSONL file') args = parser.parse_args() # Validate input file exists if not Path(args.input).exists(): print(f"Error: Input file not found: {args.input}", file=sys.stderr) sys.exit(1) # Create output directory if needed output_dir = Path(args.output).parent if output_dir and not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) # Convert based on dataset type if args.dataset == 'webvoyager': reference = getattr(args, 'reference', None) count = convert_webvoyager(args.input, args.output, reference) elif args.dataset == 'mind2web': count = convert_mind2web(args.input, args.output) elif args.dataset == 'online-mind2web': count = convert_online_mind2web(args.input, args.output) else: print(f"Unknown dataset: {args.dataset}", file=sys.stderr) sys.exit(1) print(f"Converted {count} tasks to {args.output}") if __name__ == '__main__': main()