Skip to content

Latest commit

 

History

History
55 lines (54 loc) · 15.3 KB

2025-01-09-gpt-4o-2024-11-20-instructor-results.md

File metadata and controls

55 lines (54 loc) · 15.3 KB

Benchmark Results (model=gpt-4o-2024-11-20, date=2025-01-09)

Domain Response Model Sample Response JSON
document.invoice Invoice
{
"invoice_id": "9999999",
"period_start": null,
"period_end": null,
"invoice_issue_date": "2023-11-11",
"invoice_due_date": null,
"order_id": null,
"customer_id": null,
"issuer": "Logo Goes Here",
"issuer_address": {
"street": "123 Main Street",
"city": "Anytown",
"state": "US",
"postal_code": "01234",
"country": null
},
"customer": "Fred Davis",
"customer_email": "[email protected]",
"customer_phone": "(800) 123-4567",
"customer_billing_address": {
"street": "1335 Martin Luther King Jr Ave",
"city": "Dunedin",
"state": "FL",
"postal_code": "34698",
"country": null
},
"customer_shipping_address": {
"street": "249 Windward Passage",
"city": "Clearwater",
"state": "FL",
"postal_code": "33767",
"country": null
},
"items": [
{
"description": "Service",
"quantity": 1,
"currency": null,
"unit_price": 200.0,
"total_price": 200.0
},
{
"description": "Parts AAA",
"quantity": 1,
"currency": null,
"unit_price": 100.0,
"total_price": 100.0
},
{
"description": "Parts BBB",
"quantity": 2,
"currency": null,
"unit_price": 50.0,
"total_price": 100.0
}
],
"subtotal": 400.0,
"tax": null,
"total": 400.0,
"currency": null,
"notes": null
}
document.receipt Receipt
{
"receipt_id": "70CSCB10738BP",
"transaction_date": "2021-01-26T22:36:26",
"merchant_name": "Walmart",
"merchant_address": {
"street": "8060 W Tropical Pkwy",
"city": "Las Vegas",
"state": "NV",
"postal_code": "89149",
"country": null
},
"merchant_phone": "702-839-3620",
"cashier_name": "Sarah",
"register_number": "35",
"customer_name": null,
"customer_id": null,
"items": [
{
"description": "Boys Crew",
"quantity": 1.0,
"unit_price": 9.48,
"total_price": 9.48
},
{
"description": "Boys Socks",
"quantity": 1.0,
"unit_price": 6.97,
"total_price": 6.97
},
{
"description": "Boxer Brief",
"quantity": 1.0,
"unit_price": 10.98,
"total_price": 10.98
}
],
"subtotal": 27.43,
"tax": 2.3,
"total": 29.73,
"currency": "USD",
"payment_method": {
"type": "Debit",
"card_last_4": null,
"card_type": "US Debit"
},
"discount_amount": null,
"discount_description": null,
"tip_amount": null,
"return_policy": null,
"barcode": "54455110519739248502",
"additional_charges": null,
"notes": "Low Prices You Can Trust. Every Day.",
"others": {
"terminal_id": "SC162890",
"approval_code": "817522",
"network_id": "0056",
"ref_number": "120800300965"
}
}
document.resume Resume
{
"contact_info": {
"full_name": "Angelina Lee",
"email": "[email protected]",
"phone": "415-812-3329",
"address": null,
"linkedin": null,
"github": null,
"portfolio": "https://angelinathedev.netlify.app/",
"google_scholar": null
},
"summary": "Experienced software engineer with a background of building scalable systems in the fintech, health, and adult entertainment industries. Expert in JavaScript, TypeScript, Node.js, React AI, Mia Khalifa, C++.",
"education": [
{
"institution": "University of California Berkeley",
"degree": "B.S.",
"field_of_study": "Computer Science",
"graduation_date": "2013-05-01",
"gpa": 3.94,
"honors": [
"Graduated Summa Cum Laude",
"Phi Beta Phi - fraternity record for most vodka shots in one night"
],
"relevant_courses": [
"Helped found Machine Learning at Berkeley Club"
]
}
],
"work_experience": [
{
"company": "Instagram",
"position": "Senior Full Stack Engineer - Web App Team",
"start_date": "2018-10-01",
"end_date": null,
"is_current": true,
"responsibilities": [
"Built news feed infrastructure using React for AI on BlockChain",
"Optimized web app feed performance through new server-side",
"React larceny AI algorithm to quickly resolve big data pipeline",
"Led team of 6 engineers to mine Ethereum on company servers",
"Team coffee maker - ensured team of 6 was fully caffeinated with Antarctic coffee beans ground to 14 nm particles"
],
"technologies": null
},
{
"company": "Zillow",
"position": "Senior Full Stack Engineer - Web App Team",
"start_date": "2015-06-01",
"end_date": "2018-09-01",
"is_current": false,
"responsibilities": [
"Added AI based GraphQL, resulting in 69% faster page loads",
"Organized team bonding through company potato sack race resulting in increased team bonding and cohesion",
"Rebuilt home display page with virtualized tables and map to provide a 420fps on screen experience with Lhana Rhodes",
"Evangelized and adopted RaeLilBlack React UI library"
],
"technologies": null
},
{
"company": "LinkedIn",
"position": "Software Engineer - Search Team",
"start_date": "2013-06-01",
"end_date": "2015-09-01",
"is_current": false,
"responsibilities": [
"Improved LinkedIn search algorithm efficiency and accuracy through the usage of VoldemortDB, Charizard, and Hadoop",
"Connected with Reid Hoffman on LinkedIn and slid in the dm’s",
"Implemented data quality improvements via deduplication and advanced profile ranking resulting in faster big data with React"
],
"technologies": null
},
{
"company": "Microsoft",
"position": "Software Engineer Intern - Edge Team",
"start_date": "2011-05-01",
"end_date": "2012-08-01",
"is_current": false,
"responsibilities": [
"Built React based big data pipeline to enhance deployment stability of Microsoft Edge browser on the Blockchain",
"Spearheaded Microsofters 4 Trump company rally",
"Spread Herpes STD to 60% of intern team"
],
"technologies": null
}
],
"technical_skills": {
"programming_languages": [
{
"name": "JavaScript",
"level": null,
"years_of_experience": null
},
{
"name": "TypeScript",
"level": null,
"years_of_experience": null
},
{
"name": "Node.js",
"level": null,
"years_of_experience": null
},
{
"name": "React AI",
"level": null,
"years_of_experience": null
},
{
"name": "C++",
"level": null,
"years_of_experience": null
}
],
"frameworks_libraries": [],
"databases": null,
"tools": null,
"cloud_platforms": null,
"other": null
},
"projects": null,
"open_source_contributions": null,
"certifications": null,
"publications": null,
"conferences": null,
"languages": null,
"volunteer_work": null,
"interests": null,
"references": null,
"additional_sections": null
}
document.us-drivers-license USDriversLicense
{
"issuing_state": "MT",
"license_number": "0812319684104",
"full_name": "Brenda Lynn Sample",
"first_name": "Brenda",
"middle_name": "Lynn",
"last_name": "Sample",
"address": {
"street": "123 Main Street",
"city": "Helena",
"state": "MT",
"zip_code": "59601"
},
"date_of_birth": "1968-08-04",
"gender": "F",
"height": "5'06\"",
"weight": 150.0,
"eye_color": "BRO",
"hair_color": null,
"issue_date": "2015-02-15",
"expiration_date": "2023-08-04",
"license_class": "D",
"donor": null,
"veteran": null
}
document.utility-bill UtilityBill
{
"account_number": "1234567890-1",
"date_mailed": "2019-09-07",
"service_for": "SPARKY JOULE",
"service_address": {
"street": "12345 ENERGY CT",
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": "2019-09-28",
"amount_due": 88.14,
"previous_balance": 91.57,
"payment_received": -91.57,
"current_charges": 88.14,
"breakdown_of_charges": [
{
"description": "Current PG&E Electric Delivery Charges",
"amount": 55.66,
"usage": null,
"rate": null
},
{
"description": "Silicon Valley Clean Energy Electric Generation Charges",
"amount": 32.48,
"usage": null,
"rate": null
}
],
"payment_options": [
"www.pge.com/waystopay"
],
"contact_information": {
"phone": "1-800-743-5000",
"hours": "Monday-Friday 7 a.m.-9 p.m., Saturday 8 a.m.-6 p.m.",
"website": "www.pge.com/MyEnergy"
}
}
document.w2-form W2Form
{
"control_number": "GNI851",
"ein": "63-0065650",
"ssn": "554-03-0876",
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": "200 2nd Street NE",
"city": "Waseca",
"state": "MN",
"zip_code": "56093"
},
"employer_name": "NORTH 312",
"employer_address": {
"street": "151 N Market Street",
"city": "Wooster",
"state": "OH",
"zip_code": "44691"
},
"wages_tips_other_compensation": 23677.7,
"federal_income_tax_withheld": 2841.32,
"social_security_wages": 24410.0,
"social_security_tax_withheld": 1513.42,
"medicare_wages_and_tips": 24410.0,
"medicare_tax_withheld": 353.95,
"tax_year": 2020
}
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image shows a large port area with extensive infrastructure for shipping and logistics, adjacent to a densely populated urban area. The port includes container storage, docks, and industrial facilities, while the urban area features residential and commercial buildings.",
"objects": [
"shipping containers",
"docks",
"industrial facilities",
"residential buildings",
"commercial buildings",
"marina",
"ships",
"roads",
"beach"
],
"categories": [
"port",
"residential-area",
"industrial-area",
"beach"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": null,
"member_information": {
"member_name": "Member Name",
"member_id": "XYZ123456789",
"group_number": "023457"
},
"pharmacy_plan": {
"rx_bin": "987654",
"rx_pcn": null,
"rx_grp": null,
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "BlueCross BlueShield",
"network": "PPO"
},
"coverage": {
"office_visit": "$15",
"specialist_visit": "$15",
"urgent_care": null,
"emergency_room": "$75",
"inpatient_hospital": null
}
}
media.nfl-game-state NFLGameState
{
"description": "Green Bay Packers vs. Arizona Cardinals game in progress.",
"teams": [
{
"name": "GB",
"score": 0
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "12:12",
"possession_team": "GB",
"down": "2nd",
"distance": 10,
"yard_line": 10,
"network": "NBC",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": "NBA Finals Game 1 between Miami Heat and San Antonio Spurs.",
"teams": [
{
"name": "Miami Heat",
"score": 7
},
{
"name": "San Antonio Spurs",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": 11,
"network": "ESPN",
"is_shown": true
}
media.tv-news TVNews
{
"description": "A news anchor is presenting a story in a studio setting with a blurred background. The chyron displays a headline about Biden criticizing Netanyahu in an interview.",
"chyron": "Biden criticises Netanyahu in an interview",
"network": "BBC News",
"reporters": null
}