-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
93 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,7 +42,7 @@ | |
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Apply `nest_asyncio` and bring your own LlamaCloud API key:" | ||
"Bring your own LlamaCloud API key:" | ||
] | ||
}, | ||
{ | ||
|
@@ -51,11 +51,6 @@ | |
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# llama-extract is async-first, running the sync code in a notebook requires the use of nest_asyncio\n", | ||
"import nest_asyncio\n", | ||
"\n", | ||
"nest_asyncio.apply()\n", | ||
"\n", | ||
"import os\n", | ||
"\n", | ||
"os.environ[\"LLAMA_CLOUD_API_KEY\"] = \"llx-...\"" | ||
|
@@ -77,9 +72,16 @@ | |
"source": [ | ||
"from llama_extract import LlamaExtract\n", | ||
"\n", | ||
"extractor = LlamaExtract()\n", | ||
"\n", | ||
"extraction_schema = extractor.infer_schema(\n", | ||
"extractor = LlamaExtract()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"extraction_schema = await extractor.ainfer_schema(\n", | ||
" \"Test Schema\", [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"]\n", | ||
")" | ||
] | ||
|
@@ -97,15 +99,62 @@ | |
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"{'type': 'object', 'properties': {'Invoice': {'type': 'object', 'properties': {'total': {'type': 'string'}, 'products': {'type': 'string'}, 'salesTax': {'type': 'string'}, 'subtotal': {'type': 'string'}, 'invoiceDate': {'type': 'string'}, 'invoiceNumber': {'type': 'string'}, 'billingAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'name': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'paymentDetails': {'type': 'object', 'properties': {'taxId': {'type': 'string'}, 'merchant': {'type': 'string'}, 'merchantAddress': {'type': 'object', 'properties': {'city': {'type': 'string'}, 'suite': {'type': 'string'}, 'street': {'type': 'string'}, 'country': {'type': 'string'}, 'postalCode': {'type': 'string'}}}, 'creditCardLastFour': {'type': 'string'}}}, 'referenceNumber': {'type': 'string'}}}}}\n" | ||
] | ||
"data": { | ||
"text/plain": [ | ||
"{'type': 'object',\n", | ||
" 'properties': {'Invoice': {'type': 'object',\n", | ||
" 'properties': {'total': {'type': 'string'},\n", | ||
" 'products': {'type': 'string'},\n", | ||
" 'salesTax': {'type': 'string'},\n", | ||
" 'subtotal': {'type': 'string'},\n", | ||
" 'invoiceDate': {'type': 'string'},\n", | ||
" 'invoiceNumber': {'type': 'string'},\n", | ||
" 'billingAddress': {'type': 'object',\n", | ||
" 'properties': {'city': {'type': 'string'},\n", | ||
" 'name': {'type': 'string'},\n", | ||
" 'country': {'type': 'string'},\n", | ||
" 'postalCode': {'type': 'string'}}},\n", | ||
" 'paymentDetails': {'type': 'object',\n", | ||
" 'properties': {'taxId': {'type': 'string'},\n", | ||
" 'merchant': {'type': 'string'},\n", | ||
" 'merchantAddress': {'type': 'object',\n", | ||
" 'properties': {'city': {'type': 'string'},\n", | ||
" 'suite': {'type': 'string'},\n", | ||
" 'street': {'type': 'string'},\n", | ||
" 'country': {'type': 'string'},\n", | ||
" 'postalCode': {'type': 'string'}}},\n", | ||
" 'creditCardLastFour': {'type': 'string'}}},\n", | ||
" 'referenceNumber': {'type': 'string'}}},\n", | ||
" 'Receipt': {'type': 'object',\n", | ||
" 'properties': {'items': {'type': 'array',\n", | ||
" 'items': {'type': 'object',\n", | ||
" 'properties': {'amount': {'type': 'string'},\n", | ||
" 'quantity': {'type': 'integer'},\n", | ||
" 'unitPrice': {'type': 'string'},\n", | ||
" 'description': {'type': 'string'}}}},\n", | ||
" 'total': {'type': 'string'},\n", | ||
" 'datePaid': {'type': 'string'},\n", | ||
" 'subtotal': {'type': 'string'},\n", | ||
" 'amountPaid': {'type': 'string'},\n", | ||
" 'paymentMethod': {'type': 'string'},\n", | ||
" 'receiptNumber': {'type': 'string'},\n", | ||
" 'billingAddress': {'type': 'object',\n", | ||
" 'properties': {'city': {'type': 'string'},\n", | ||
" 'name': {'type': 'string'},\n", | ||
" 'email': {'type': 'string'},\n", | ||
" 'street': {'type': 'string'},\n", | ||
" 'country': {'type': 'string'},\n", | ||
" 'postalCode': {'type': 'string'},\n", | ||
" 'phoneNumber': {'type': 'string'}}}}}}}" | ||
] | ||
}, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"print(extraction_schema.data_schema)" | ||
"extraction_schema.data_schema" | ||
] | ||
}, | ||
{ | ||
|
@@ -125,12 +174,12 @@ | |
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Extracting files: 100%|██████████| 2/2 [00:14<00:00, 7.11s/it]\n" | ||
"Extracting files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00, 3.10s/it]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"extractions = extractor.extract(\n", | ||
"extractions = await extractor.aextract(\n", | ||
" extraction_schema.id,\n", | ||
" [\"./data/noisebridge_receipt.pdf\", \"./data/parallels_invoice.pdf\"],\n", | ||
")" | ||
|
@@ -149,21 +198,40 @@ | |
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"{'Invoice': {'total': '$119.99', 'products': 'Parallels Desktop for Mac Pro Edition (1 Year)', 'salesTax': '$0.00', 'subtotal': '$119.99', 'invoiceDate': 'Jul 23, 2024', 'invoiceNumber': 'BKD-73649835575', 'billingAddress': {'city': 'California', 'name': 'Laurie Voss', 'country': 'United States', 'postalCode': '94110'}, 'paymentDetails': {'taxId': '20-4503251', 'merchant': 'Cleverbridge, Inc.', 'merchantAddress': {'city': 'Chicago', 'suite': 'Suite 700', 'street': '350 N Clark', 'country': 'United States', 'postalCode': '60654'}, 'creditCardLastFour': '4469'}, 'referenceNumber': '474534804'}}\n" | ||
] | ||
"data": { | ||
"text/plain": [ | ||
"{'Receipt': {'items': [{'amount': '$10.00',\n", | ||
" 'quantity': 1,\n", | ||
" 'unitPrice': '$10.00',\n", | ||
" 'description': '$10 / month'}],\n", | ||
" 'total': '$10.00',\n", | ||
" 'datePaid': 'July 19, 2024',\n", | ||
" 'subtotal': '$10.00',\n", | ||
" 'amountPaid': '$10.00',\n", | ||
" 'paymentMethod': 'Visa - 7267',\n", | ||
" 'receiptNumber': '2721 5058',\n", | ||
" 'billingAddress': {'city': 'San Francisco',\n", | ||
" 'name': 'Noisebridge',\n", | ||
" 'email': '[email protected]',\n", | ||
" 'street': '272 Capp St',\n", | ||
" 'country': 'United States',\n", | ||
" 'postalCode': '94110',\n", | ||
" 'phoneNumber': '1 650 701 7829'}}}" | ||
] | ||
}, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"print(extractions[0].data)" | ||
"extractions[0].data" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "llama-extract-tm5usU00-py3.11", | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
|
@@ -180,5 +248,5 @@ | |
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
"nbformat_minor": 4 | ||
} |