Skip to content

Latest commit

 

History

History
55 lines (54 loc) · 15.2 KB

2025-01-09-gpt-4o-mini-2024-07-18-instructor-results.md

File metadata and controls

55 lines (54 loc) · 15.2 KB

Benchmark Results (model=gpt-4o-mini-2024-07-18, date=2025-01-09)

Domain Response Model Sample Response JSON
document.invoice Invoice
{
"invoice_id": "9999999",
"period_start": null,
"period_end": null,
"invoice_issue_date": "2023-11-11",
"invoice_due_date": null,
"order_id": null,
"customer_id": null,
"issuer": "Anytown Invoice",
"issuer_address": {
"street": "123 Main Street",
"city": "Anytown",
"state": null,
"postal_code": "01234",
"country": null
},
"customer": "Fred Davis",
"customer_email": "[email protected]",
"customer_phone": "(800) 123-4567",
"customer_billing_address": {
"street": "1335 Martin Luther King Jr Ave",
"city": "Dunedin",
"state": "FL",
"postal_code": "34698",
"country": null
},
"customer_shipping_address": {
"street": "249 Windward Passage",
"city": "Clearwater",
"state": "FL",
"postal_code": "33767",
"country": null
},
"items": [
{
"description": "Service",
"quantity": 1,
"currency": null,
"unit_price": 200.0,
"total_price": 200.0
},
{
"description": "Parts AAA",
"quantity": 1,
"currency": null,
"unit_price": 100.0,
"total_price": 100.0
},
{
"description": "Parts BBB",
"quantity": 2,
"currency": null,
"unit_price": 50.0,
"total_price": 100.0
}
],
"subtotal": 400.0,
"tax": null,
"total": 400.0,
"currency": null,
"notes": null
}
document.receipt Receipt
{
"receipt_id": null,
"transaction_date": "2021-01-26T22:36:22",
"merchant_name": "Walmart",
"merchant_address": {
"street": "8060 W TROPICAL PKWY",
"city": "LAS VEGAS",
"state": "NV",
"postal_code": "89149",
"country": null
},
"merchant_phone": null,
"cashier_name": "SARAH",
"register_number": "35",
"customer_name": null,
"customer_id": null,
"items": [
{
"description": "BOYS CREW",
"quantity": 1.0,
"unit_price": 9.48,
"total_price": 9.48
},
{
"description": "BOYS SOCKS",
"quantity": 1.0,
"unit_price": 6.97,
"total_price": 6.97
},
{
"description": "BOXER BRIEF",
"quantity": 1.0,
"unit_price": 10.98,
"total_price": 10.98
}
],
"subtotal": 27.43,
"tax": 2.3,
"total": 29.73,
"currency": "USD",
"payment_method": {
"type": "Debit",
"card_last_4": "****",
"card_type": null
},
"discount_amount": null,
"discount_description": null,
"tip_amount": null,
"return_policy": null,
"barcode": null,
"additional_charges": null,
"notes": null,
"others": null
}
document.resume Resume
{
"contact_info": {
"full_name": "Angelina Lee",
"email": "[email protected]",
"phone": "415-812-3329",
"address": null,
"linkedin": null,
"github": null,
"portfolio": "https://angelinathedev.netlify.app/",
"google_scholar": null
},
"summary": "Experienced software engineer with a background of building scalable systems in the fintech, health, and adult entertainment industries. Expert in JavaScript, TypeScript, Node.js, React AI, Mia Khalifa, C++.",
"education": [
{
"institution": "University of California Berkeley",
"degree": "B.S.",
"field_of_study": "Computer Science",
"graduation_date": "2013-05-01",
"gpa": 3.94,
"honors": [
"Graduated Summa Cum Laude"
],
"relevant_courses": [
"Machine Learning at Berkeley Club"
]
}
],
"work_experience": [
{
"company": "Instagram",
"position": "Senior Full Stack Engineer - Web App Team",
"start_date": "2018-10-01",
"end_date": null,
"is_current": true,
"responsibilities": [
"Built news feed infrastructure using React for AI on BlockChain",
"Optimized web app feed performance through new server-side",
"React larceny AI algorithm to quickly resolve big data pipeline",
"Led team of 6 engineers to mine Ethereum on company servers",
"Team coffee maker - ensured team of 6 was fully caffeinated with Antarctic coffee beans ground to 14 m particles"
],
"technologies": null
},
{
"company": "Zillow",
"position": "Senior Full Stack Engineer - Web App Team",
"start_date": "2015-06-01",
"end_date": "2018-09-01",
"is_current": false,
"responsibilities": [
"Added AI based GraphQL, resulting in 69% faster page loads",
"Organized team bonding through company potato sack race resulting in increased team bonding and cohesivity",
"Rebuilt home display page with virtualized tables and map to provide a 420fps on screen experience with Lhana Rhodes",
"Evangelized and adopted RaeLiBlack UI library"
],
"technologies": null
},
{
"company": "LinkedIn",
"position": "Software Engineer - Search Team",
"start_date": "2013-06-01",
"end_date": "2015-09-01",
"is_current": false,
"responsibilities": [
"Improved LinkedIn search algorithm efficiency and accuracy through the usage of VoldemortDB, Chiarizard, and Hadoop",
"Connected with Reid Hoffman on LinkedIn and slid in the dm's",
"Implemented data quality improvements via deduplication and advanced profile ranking resulting in faster big data with React"
],
"technologies": null
},
{
"company": "Microsoft",
"position": "Software Engineer Intern - Edge Team",
"start_date": "2011-05-01",
"end_date": "2012-08-01",
"is_current": false,
"responsibilities": [
"Built React based big data pipeline to enhance deployment stability of Microsoft Edge browser on the Blockchain",
"Spearheaded Microsofts 4 Trump company rally",
"Spread Herpes STD to 60% of intern team"
],
"technologies": null
}
],
"technical_skills": {
"programming_languages": [
{
"name": "JavaScript",
"level": "Expert",
"years_of_experience": null
},
{
"name": "TypeScript",
"level": "Expert",
"years_of_experience": null
},
{
"name": "C++",
"level": "Expert",
"years_of_experience": null
}
],
"frameworks_libraries": [
{
"name": "Node.js",
"level": "Expert",
"years_of_experience": null
},
{
"name": "React AI",
"level": "Expert",
"years_of_experience": null
}
],
"databases": null,
"tools": null,
"cloud_platforms": null,
"other": null
},
"projects": null,
"open_source_contributions": null,
"certifications": null,
"publications": null,
"conferences": null,
"languages": null,
"volunteer_work": null,
"interests": null,
"references": null,
"additional_sections": null
}
document.us-drivers-license USDriversLicense
{
"issuing_state": "MT",
"license_number": "0812319684104",
"full_name": "Brenda Lynn Sample",
"first_name": "Brenda",
"middle_name": "Lynn",
"last_name": "Sample",
"address": {
"street": "123 MAIN STREET",
"city": "HELENA",
"state": "MT",
"zip_code": "59601"
},
"date_of_birth": "1968-08-04",
"gender": "F",
"height": "5'06\"",
"weight": 150.0,
"eye_color": "BRO",
"hair_color": null,
"issue_date": "2015-02-15",
"expiration_date": "2023-08-04",
"license_class": "D",
"donor": null,
"veteran": null
}
document.utility-bill UtilityBill
{
"account_number": "1234567890-1",
"date_mailed": "2019-09-07",
"service_for": "SPARKY JOULE",
"service_address": {
"street": "12345 ENERGY CT",
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": "2019-09-28",
"amount_due": 88.14,
"previous_balance": 0.0,
"payment_received": 91.57,
"current_charges": 88.14,
"breakdown_of_charges": [
{
"description": "Amount Due on Previous Statement",
"amount": 91.57,
"usage": null,
"rate": null
},
{
"description": "Payment(s) Received Since Last Statement",
"amount": -91.57,
"usage": null,
"rate": null
},
{
"description": "Current PG&E Electric Delivery Charges",
"amount": 55.66,
"usage": null,
"rate": null
},
{
"description": "Silicon Valley Clean Energy Electric Generation Charges",
"amount": 32.48,
"usage": null,
"rate": null
}
],
"payment_options": [
"www.pge.com/waystopay"
],
"contact_information": {
"phone": "1-800-743-5000",
"website": "www.pge.com/MyEnergy"
}
}
document.w2-form W2Form
{
"control_number": "GNI851",
"ein": "63-0065650",
"ssn": "554-03-0876",
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": "200 2nd Street NE",
"city": "Waseca",
"state": "MN",
"zip_code": "56093"
},
"employer_name": "NORTH 312",
"employer_address": {
"street": "151 N Market Street",
"city": "Wooster",
"state": "OH",
"zip_code": "44691"
},
"wages_tips_other_compensation": 23677.7,
"federal_income_tax_withheld": 2841.32,
"social_security_wages": 24410.0,
"social_security_tax_withheld": 1513.42,
"medicare_wages_and_tips": 24410.0,
"medicare_tax_withheld": 353.95,
"tax_year": 2020
}
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image captures a coastal urban area adjacent to a busy port. The scene features a mix of residential buildings, a marina, and extensive shipping facilities, including container storage and docks. The visibility is clear, allowing for detailed observation of the infrastructure and land use.",
"objects": [
"residential buildings",
"marina",
"shipping containers",
"docks",
"roads",
"beach",
"port facilities"
],
"categories": [
"commercial-area",
"port",
"residential-area",
"water-treatment",
"beach"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": {
"provider_service_number": null,
"precertification_number": null
},
"member_information": {
"member_name": "Member Name",
"member_id": "XY2 123456789",
"group_number": "023457"
},
"pharmacy_plan": {
"rx_bin": "987654",
"rx_pcn": null,
"rx_grp": "HIOPT",
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "BlueCross BlueShield",
"network": "PPO"
},
"coverage": {
"office_visit": "$15",
"specialist_visit": null,
"urgent_care": null,
"emergency_room": "$75",
"inpatient_hospital": null
}
}
media.nfl-game-state NFLGameState
{
"description": null,
"teams": [
{
"name": "GB",
"score": 0
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "12:12",
"possession_team": "GB",
"down": "2nd",
"distance": 10,
"yard_line": -10,
"network": "NBC",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": "Game is in the first quarter with Miami leading San Antonio.",
"teams": [
{
"name": "MIA",
"score": 7
},
{
"name": "SA",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": 11,
"network": "ESPN",
"is_shown": true
}
media.tv-news TVNews
{
"description": "A news anchor presenting a segment about Biden criticizing Netanyahu in an interview.",
"chyron": "Biden criticises Netanyahu in an interview",
"network": "BBC News",
"reporters": null
}