Skip to content

Latest commit

 

History

History
59 lines (58 loc) · 10.6 KB

2025-01-11-phi4-instructor-results.md

File metadata and controls

59 lines (58 loc) · 10.6 KB

Benchmark Results (model=phi4, date=2025-01-11)

Domain Response Model Sample Response JSON
document.invoice Invoice
{
"invoice_id": null,
"period_start": null,
"period_end": null,
"invoice_issue_date": null,
"invoice_due_date": null,
"order_id": null,
"customer_id": null,
"issuer": null,
"issuer_address": null,
"customer": null,
"customer_email": null,
"customer_phone": null,
"customer_billing_address": null,
"customer_shipping_address": null,
"items": null,
"subtotal": null,
"tax": null,
"total": null,
"currency": null,
"notes": null
}
document.receipt Receipt
{
"receipt_id": null,
"transaction_date": null,
"merchant_name": null,
"merchant_address": null,
"merchant_phone": null,
"cashier_name": null,
"register_number": null,
"customer_name": "John Doe",
"customer_id": null,
"items": [
{
"description": "Espresso Coffee",
"quantity": 2.0,
"unit_price": 3.5,
"total_price": 7.0
},
{
"description": "Blueberry Muffin",
"quantity": 1.0,
"unit_price": 2.75,
"total_price": 2.75
}
],
"subtotal": 9.75,
"tax": 0.78,
"total": 11.53,
"currency": "USD",
"payment_method": {
"type": "Credit Card",
"card_last_4": null,
"card_type": null
},
"discount_amount": null,
"discount_description": null,
"tip_amount": 1.0,
"return_policy": null,
"barcode": null,
"additional_charges": null,
"notes": "Thank you for visiting!",
"others": {}
}
document.resume Resume
document.us-drivers-license USDriversLicense
{
"issuing_state": "NY",
"license_number": "123456789",
"full_name": "John Doe",
"first_name": "John",
"middle_name": null,
"last_name": "Doe",
"address": {
"street": "123 Main St",
"city": "New York",
"state": "NY",
"zip_code": "10001"
},
"date_of_birth": "1980-05-15",
"gender": "M",
"height": "5'11\"",
"weight": 180.0,
"eye_color": "Blue",
"hair_color": "Brown",
"issue_date": "2022-01-10",
"expiration_date": "2032-01-10",
"license_class": "B",
"donor": true,
"veteran": false
}
document.utility-bill UtilityBill
{
"account_number": "123456789",
"date_mailed": "2023-09-15",
"service_for": "John Doe",
"service_address": {
"street": "123 Elm Street",
"city": "Springfield",
"state": "IL",
"zip_code": "62704"
},
"billing_period_start": "2023-08-01",
"billing_period_end": "2023-08-31",
"date_due": "2023-10-15",
"amount_due": 150.75,
"previous_balance": 25.0,
"payment_received": 50.0,
"current_charges": 175.75,
"breakdown_of_charges": [
{
"description": "Electricity Usage",
"amount": 100.75,
"usage": "500 kWh",
"rate": 0.2
},
{
"description": "Service Fee",
"amount": 10.0,
"usage": null,
"rate": null
},
{
"description": "Taxes",
"amount": 65.0,
"usage": null,
"rate": null
}
],
"payment_options": [
"Online Payment",
"Check by Mail",
"In-Person"
],
"contact_information": {
"customer_support": "1-800-555-1234",
"billing_inquiries": "1-800-555-5678",
"emergency_services": "911"
}
}
document.w2-form W2Form
{
"control_number": "123456789",
"ein": "12-3456789",
"ssn": "987-65-4321",
"employee_name": "John Doe",
"employee_address": {
"street": "123 Main St",
"city": "Anytown",
"state": "NY",
"zip_code": "12345"
},
"employer_name": "Acme Corporation",
"employer_address": {
"street": "456 Elm St",
"city": "Othertown",
"state": "CA",
"zip_code": "67890"
},
"wages_tips_other_compensation": 50000.0,
"federal_income_tax_withheld": 4000.0,
"social_security_wages": 49000.0,
"social_security_tax_withheld": 3036.8,
"medicare_wages_and_tips": 50000.0,
"medicare_tax_withheld": 735.0,
"tax_year": 2022
}
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image depicts a diverse landscape featuring urban development alongside natural features. The central area shows a well-developed residential zone with visible roads and housing structures, indicating a bustling community. Surrounding this urban core are patches of greenery, likely parks or small forested areas, providing a contrast to the built environment. To the east, there is an expansive body of water, possibly a lake or reservoir, bordered by what appears to be a commercial area with several large buildings and parking lots. The western edge of the image reveals agricultural lands, characterized by neatly arranged fields that suggest farming activities. A river meanders through the landscape, connecting various land features and contributing to the region's ecological diversity.",
"objects": [
"residential-area",
"commercial-area",
"park",
"lake",
"farmlands",
"river"
],
"categories": [
"residential-area",
"commercial-area",
"park",
"lake",
"farmlands",
"river"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": {
"provider_service_number": "123456789",
"precertification_number": null
},
"member_information": {
"member_name": "John Doe",
"member_id": "987654321",
"group_number": "ABC123"
},
"pharmacy_plan": {
"rx_bin": "111222333",
"rx_pcn": "444555666",
"rx_grp": null,
"pharmacy_help_desk": "1-800-PHARMACY"
},
"insurance_provider": {
"provider_name": "HealthFirst Insurance Co.",
"network": "PPO Network"
},
"coverage": {
"office_visit": "In-Network: $20 copay",
"specialist_visit": "In-Network: $40 copay",
"urgent_care": "In-Network: $50 visit fee",
"emergency_room": "In-Network: Deductible applies, then 20% coinsurance",
"inpatient_hospital": "In-Network: $1,000 deductible per admission"
}
}
retail.ecommerce-product-caption RetailEcommerceProductCaption
{
"description": "The product is a sleek, modern e-reader with a high-resolution display and an ergonomic design for comfortable reading. It features adjustable lighting to reduce eye strain in various environments.",
"rating": 85,
"name": "E-Reader Pro",
"brand": "TechRead",
"category": "Electronics / E-readers",
"price": "$199.99",
"color": "Matte Black"
}
media.tv-news TVNews
{
"description": null,
"chyron": null,
"network": null,
"reporters": null
}
media.nfl-game-state NFLGameState
{
"description": "The game is currently in progress with Team A leading by a narrow margin.",
"teams": [
{
"name": "Team A",
"score": 21
},
{
"name": "Team B",
"score": 19
}
],
"status": "in_progress",
"quarter": 3,
"clock_time": "07:32",
"possession_team": "Team A",
"down": "2nd",
"distance": 5,
"yard_line": 45,
"network": "ESPN",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": "The game is currently in progress with Team A leading by a narrow margin.",
"teams": [
{
"name": "Team A",
"score": 102
},
{
"name": "Team B",
"score": 98
}
],
"status": "in_progress",
"quarter": 3,
"clock_time": "4:32",
"shot_clock": 14,
"network": "ESPN",
"is_shown": true
}