From 041b65d842e5a517e9a9160e8d37421136999576 Mon Sep 17 00:00:00 2001 From: reibs Date: Thu, 22 Feb 2024 15:07:44 -0800 Subject: [PATCH] update implementaiton notebook --- Example.ipynb | 327 +++++++++++++++++++++++++++++++++++++++++++ implementation.ipynb | 192 ------------------------- jaiqu/__init__.py | 10 -- 3 files changed, 327 insertions(+), 202 deletions(-) create mode 100644 Example.ipynb delete mode 100644 implementation.ipynb diff --git a/Example.ipynb b/Example.ipynb new file mode 100644 index 0000000..723dd09 --- /dev/null +++ b/Example.ipynb @@ -0,0 +1,327 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import jq\n", + "from jaiqu import validate_schema, translate_schema" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Desired data format \n", + "\n", + "Create a `jsonschema` dictionary for the format of data you want. Data extracted from your input will be extracted into this format." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "schema = {\n", + " \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"id\": {\n", + " \"type\": [\"string\", \"null\"],\n", + " \"description\": \"A unique identifier for the record.\"\n", + " },\n", + " \"date\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"A string describing the date.\"\n", + " },\n", + " \"model\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"A text field representing the model used.\"\n", + " }\n", + " },\n", + " \"required\": [\n", + " \"id\",\n", + " \"date\"\n", + " ]\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sample input data\n", + "Provoide an input JSON dictionary containing the data you want to extract." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "input_json = {\n", + " \"call.id\": \"123\",\n", + " \"datetime\": \"2022-01-01\",\n", + " \"timestamp\": 1640995200,\n", + " \"Address\": \"123 Main St\",\n", + " \"user\": {\n", + " \"name\": \"John Doe\",\n", + " \"age\": 30,\n", + " \"contact\": \"john@email.com\"\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### (Optional) Create hints\n", + "The jaiqu agent may not know certain concepts. For example, you might want to have some keys interpreted a certain way (i.e. interpret \"contact\" as \"email\"). For tricky interpretations, create hints." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "key_hints = \"We are processing outputs of an containing an id and a date of a user.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "16260aec353145dbb4055821841a64aa", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validating schema: 0%| | 0/3 [00:00=1.12.0 (from -r requirements.txt (line 2))\n", - " Using cached openai-1.12.0-py3-none-any.whl.metadata (18 kB)\n", - "Collecting jsonschema==4.21.1 (from -r requirements.txt (line 3))\n", - " Using cached jsonschema-4.21.1-py3-none-any.whl.metadata (7.8 kB)\n", - "Requirement already satisfied: attrs>=22.2.0 in ./env/lib/python3.11/site-packages (from jsonschema==4.21.1->-r requirements.txt (line 3)) (23.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in ./env/lib/python3.11/site-packages (from jsonschema==4.21.1->-r requirements.txt (line 3)) (2023.12.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in ./env/lib/python3.11/site-packages (from jsonschema==4.21.1->-r requirements.txt (line 3)) (0.33.0)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in ./env/lib/python3.11/site-packages (from jsonschema==4.21.1->-r requirements.txt (line 3)) (0.18.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (4.3.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (2.6.1)\n", - "Requirement already satisfied: sniffio in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (1.3.0)\n", - "Requirement already satisfied: tqdm>4 in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in ./env/lib/python3.11/site-packages (from openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (4.9.0)\n", - "Requirement already satisfied: idna>=2.8 in ./env/lib/python3.11/site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (3.6)\n", - "Requirement already satisfied: certifi in ./env/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (2024.2.2)\n", - "Requirement already satisfied: httpcore==1.* in ./env/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (1.0.4)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in ./env/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in ./env/lib/python3.11/site-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.16.2 in ./env/lib/python3.11/site-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->-r requirements.txt (line 2)) (2.16.2)\n", - "Using cached jsonschema-4.21.1-py3-none-any.whl (85 kB)\n", - "Using cached openai-1.12.0-py3-none-any.whl (226 kB)\n", - "Installing collected packages: openai, jsonschema\n", - "Successfully installed jsonschema-4.21.1 openai-1.12.0\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: jaiqu in ./env/lib/python3.11/site-packages (0.0.1)\n", - "Requirement already satisfied: jq==1.6.0 in ./env/lib/python3.11/site-packages (from jaiqu) (1.6.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/howardgil/Desktop/agentops/Jaiqu/env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "import os\n", - "!pip install -r requirements.txt\n", - "!pip install jaiqu\n", - "\n", - "#TODO update so that validate_schema is available on init\n", - "#TODO from jaiqu import validate_schema, translate_schema\n", - "from jaiqu.jaiqu import validate_schema, translate_schema\n", - "from jaiqu import JaiQu\n", - "JaiQu.init(openai_api_key=\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip uninstall openai\n", - "# !pip uninstall jsonschema" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Get from files\n", - "# Desired data format \n", - "schema = {\n", - " \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"id\": {\n", - " \"type\": [\"string\", \"null\"],\n", - " \"description\": \"A unique identifier for the record.\"\n", - " },\n", - " \"date\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"A string describing the date.\"\n", - " },\n", - " \"model\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"A text field representing the model used.\"\n", - " }\n", - " },\n", - " \"required\": [\n", - " \"id\",\n", - " \"date\"\n", - " ]\n", - "}\n", - "\n", - "# Provided data\n", - "input_json = {\n", - " \"call.id\": \"123\",\n", - " \"datetime\": \"2022-01-01\",\n", - " \"timestamp\": 1640995200,\n", - " \"Address\": \"123 Main St\",\n", - " \"user\": {\n", - " \"name\": \"John Doe\",\n", - " \"age\": 30,\n", - " \"contact\": \"john@email.com\"\n", - " }\n", - "}\n", - "\n", - "# (Optional) Create hints so the agent knows what to look for in the input\n", - "key_hints=\"We are processing outputs of an containing an id, a date, and a model. All the required fields should be present in this input, but the names might be different.\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Validating schema: 0%| | 0/3 [00:00