diff --git a/docs/notebooks/starter_tutorial.ipynb b/docs/notebooks/starter_tutorial.ipynb index 35edbec3..83f2a0b9 100644 --- a/docs/notebooks/starter_tutorial.ipynb +++ b/docs/notebooks/starter_tutorial.ipynb @@ -46,35 +46,21 @@ { "data": { "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - "
answer.example_question
Good
" + "
┏━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃ answer            ┃\n",
+       "┃ .example_question ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━┩\n",
+       "│ Good              │\n",
+       "└───────────────────┘\n",
+       "
\n" ], "text/plain": [ - "" + "┏━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35manswer \u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m.example_question\u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2mGood \u001b[0m\u001b[2m \u001b[0m│\n", + "└───────────────────┘\n" ] }, "metadata": {}, @@ -96,7 +82,7 @@ "results = q.run()\n", "\n", "# Inspect the results\n", - "results.select(\"example_question\").print()" + "results.select(\"example_question\").print(format=\"rich\")" ] }, { @@ -110,7 +96,7 @@ "tags": [] }, "source": [ - "*Note:* The default language model is currently GPT 4 preview; you will need an API key for OpenAI to use this model and run this example locally.\n", + "*Note:* The default language model at the time this notebook was last updated was gpt-4o; you will need an API key for OpenAI to use this model and run this example locally.\n", "See instructions on storing your [API Keys](https://docs.expectedparrot.com/en/latest/api_keys.html). \n", "Alternatively, you can activate [Remote Inference](https://docs.expectedparrot.com/en/latest/remote_inference.html) at your [Coop](https://docs.expectedparrot.com/en/latest/coop.html) account to run the example on the Expected Parrot server.\n", "\n", @@ -121,6 +107,7 @@ "\n", "We also show how to filter, sort, select and print components of the dataset of results.\n", "\n", + "#### Question types\n", "To see examples of all EDSL question types, run:" ] }, @@ -175,6 +162,7 @@ "tags": [] }, "source": [ + "#### Language models\n", "Newly released language models are automatically added to EDSL when they become available. \n", "To see a current list of available models, run:" ] @@ -190,168 +178,42 @@ }, "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "[['01-ai/Yi-34B-Chat', 'deep_infra', 0],\n", - " ['Austism/chronos-hermes-13b-v2', 'deep_infra', 1],\n", - " ['Gryphe/MythoMax-L2-13b', 'deep_infra', 2],\n", - " ['Gryphe/MythoMax-L2-13b-turbo', 'deep_infra', 3],\n", - " ['HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', 'deep_infra', 4],\n", - " ['Phind/Phind-CodeLlama-34B-v2', 'deep_infra', 5],\n", - " ['Qwen/Qwen2-72B-Instruct', 'deep_infra', 6],\n", - " ['Qwen/Qwen2-7B-Instruct', 'deep_infra', 7],\n", - " ['Sao10K/L3-70B-Euryale-v2.1', 'deep_infra', 8],\n", - " ['amazon.titan-text-express-v1', 'bedrock', 9],\n", - " ['amazon.titan-text-lite-v1', 'bedrock', 10],\n", - " ['amazon.titan-tg1-large', 'bedrock', 11],\n", - " ['anthropic.claude-3-5-sonnet-20240620-v1:0', 'bedrock', 12],\n", - " ['anthropic.claude-3-haiku-20240307-v1:0', 'bedrock', 13],\n", - " ['anthropic.claude-3-opus-20240229-v1:0', 'bedrock', 14],\n", - " ['anthropic.claude-3-sonnet-20240229-v1:0', 'bedrock', 15],\n", - " ['anthropic.claude-instant-v1', 'bedrock', 16],\n", - " ['anthropic.claude-v2', 'bedrock', 17],\n", - " ['anthropic.claude-v2:1', 'bedrock', 18],\n", - " ['bigcode/starcoder2-15b', 'deep_infra', 19],\n", - " ['bigcode/starcoder2-15b-instruct-v0.1', 'deep_infra', 20],\n", - " ['chatgpt-4o-latest', 'openai', 21],\n", - " ['claude-3-5-sonnet-20240620', 'anthropic', 22],\n", - " ['claude-3-haiku-20240307', 'anthropic', 23],\n", - " ['claude-3-opus-20240229', 'anthropic', 24],\n", - " ['claude-3-sonnet-20240229', 'anthropic', 25],\n", - " ['codellama/CodeLlama-34b-Instruct-hf', 'deep_infra', 26],\n", - " ['codellama/CodeLlama-70b-Instruct-hf', 'deep_infra', 27],\n", - " ['codestral-2405', 'mistral', 28],\n", - " ['codestral-latest', 'mistral', 29],\n", - " ['codestral-mamba-2407', 'mistral', 30],\n", - " ['codestral-mamba-latest', 'mistral', 31],\n", - " ['cognitivecomputations/dolphin-2.6-mixtral-8x7b', 'deep_infra', 32],\n", - " ['cognitivecomputations/dolphin-2.9.1-llama-3-70b', 'deep_infra', 33],\n", - " ['cohere.command-light-text-v14', 'bedrock', 34],\n", - " ['cohere.command-r-plus-v1:0', 'bedrock', 35],\n", - " ['cohere.command-r-v1:0', 'bedrock', 36],\n", - " ['cohere.command-text-v14', 'bedrock', 37],\n", - " ['curie:ft-emeritus-2022-11-30-12-58-24', 'openai', 38],\n", - " ['curie:ft-emeritus-2022-12-01-01-04-36', 'openai', 39],\n", - " ['curie:ft-emeritus-2022-12-01-01-51-20', 'openai', 40],\n", - " ['curie:ft-emeritus-2022-12-01-14-16-46', 'openai', 41],\n", - " ['curie:ft-emeritus-2022-12-01-14-28-00', 'openai', 42],\n", - " ['curie:ft-emeritus-2022-12-01-14-49-45', 'openai', 43],\n", - " ['curie:ft-emeritus-2022-12-01-15-29-32', 'openai', 44],\n", - " ['curie:ft-emeritus-2022-12-01-15-42-25', 'openai', 45],\n", - " ['curie:ft-emeritus-2022-12-01-15-52-24', 'openai', 46],\n", - " ['curie:ft-emeritus-2022-12-01-16-40-12', 'openai', 47],\n", - " ['databricks/dbrx-instruct', 'deep_infra', 48],\n", - " ['davinci:ft-emeritus-2022-11-30-14-57-33', 'openai', 49],\n", - " ['deepinfra/airoboros-70b', 'deep_infra', 50],\n", - " ['gemini-1.0-pro', 'google', 51],\n", - " ['gemini-1.5-flash', 'google', 52],\n", - " ['gemini-1.5-pro', 'google', 53],\n", - " ['gemini-pro', 'google', 54],\n", - " ['gemma-7b-it', 'groq', 55],\n", - " ['gemma2-9b-it', 'groq', 56],\n", - " ['google/codegemma-7b-it', 'deep_infra', 57],\n", - " ['google/gemma-1.1-7b-it', 'deep_infra', 58],\n", - " ['google/gemma-2-27b-it', 'deep_infra', 59],\n", - " ['google/gemma-2-9b-it', 'deep_infra', 60],\n", - " ['gpt-3.5-turbo', 'openai', 61],\n", - " ['gpt-3.5-turbo-0125', 'openai', 62],\n", - " ['gpt-3.5-turbo-1106', 'openai', 63],\n", - " ['gpt-3.5-turbo-16k', 'openai', 64],\n", - " ['gpt-4', 'openai', 65],\n", - " ['gpt-4-0125-preview', 'openai', 66],\n", - " ['gpt-4-0613', 'openai', 67],\n", - " ['gpt-4-1106-preview', 'openai', 68],\n", - " ['gpt-4-turbo', 'openai', 69],\n", - " ['gpt-4-turbo-2024-04-09', 'openai', 70],\n", - " ['gpt-4-turbo-preview', 'openai', 71],\n", - " ['gpt-4o', 'openai', 72],\n", - " ['gpt-4o-2024-05-13', 'openai', 73],\n", - " ['gpt-4o-2024-08-06', 'openai', 74],\n", - " ['gpt-4o-mini', 'openai', 75],\n", - " ['gpt-4o-mini-2024-07-18', 'openai', 76],\n", - " ['lizpreciatior/lzlv_70b_fp16_hf', 'deep_infra', 77],\n", - " ['llama-3.1-70b-versatile', 'groq', 78],\n", - " ['llama-3.1-8b-instant', 'groq', 79],\n", - " ['llama-guard-3-8b', 'groq', 80],\n", - " ['llama3-70b-8192', 'groq', 81],\n", - " ['llama3-8b-8192', 'groq', 82],\n", - " ['llama3-groq-70b-8192-tool-use-preview', 'groq', 83],\n", - " ['llama3-groq-8b-8192-tool-use-preview', 'groq', 84],\n", - " ['llava-v1.5-7b-4096-preview', 'groq', 85],\n", - " ['mattshumer/Reflection-Llama-3.1-70B', 'deep_infra', 86],\n", - " ['meta-llama/Llama-2-13b-chat-hf', 'deep_infra', 87],\n", - " ['meta-llama/Llama-2-70b-chat-hf', 'deep_infra', 88],\n", - " ['meta-llama/Llama-2-7b-chat-hf', 'deep_infra', 89],\n", - " ['meta-llama/Meta-Llama-3-70B-Instruct', 'deep_infra', 90],\n", - " ['meta-llama/Meta-Llama-3-8B-Instruct', 'deep_infra', 91],\n", - " ['meta-llama/Meta-Llama-3.1-405B-Instruct', 'deep_infra', 92],\n", - " ['meta-llama/Meta-Llama-3.1-70B-Instruct', 'deep_infra', 93],\n", - " ['meta-llama/Meta-Llama-3.1-8B-Instruct', 'deep_infra', 94],\n", - " ['meta.llama3-1-405b-instruct-v1:0', 'bedrock', 95],\n", - " ['meta.llama3-1-70b-instruct-v1:0', 'bedrock', 96],\n", - " ['meta.llama3-1-8b-instruct-v1:0', 'bedrock', 97],\n", - " ['meta.llama3-70b-instruct-v1:0', 'bedrock', 98],\n", - " ['meta.llama3-8b-instruct-v1:0', 'bedrock', 99],\n", - " ['microsoft/Phi-3-medium-4k-instruct', 'deep_infra', 100],\n", - " ['microsoft/WizardLM-2-7B', 'deep_infra', 101],\n", - " ['microsoft/WizardLM-2-8x22B', 'deep_infra', 102],\n", - " ['mistral-embed', 'mistral', 103],\n", - " ['mistral-large-2402', 'mistral', 104],\n", - " ['mistral-large-2407', 'mistral', 105],\n", - " ['mistral-large-latest', 'mistral', 106],\n", - " ['mistral-medium', 'mistral', 107],\n", - " ['mistral-medium-2312', 'mistral', 108],\n", - " ['mistral-medium-latest', 'mistral', 109],\n", - " ['mistral-small', 'mistral', 110],\n", - " ['mistral-small-2312', 'mistral', 111],\n", - " ['mistral-small-2402', 'mistral', 112],\n", - " ['mistral-small-latest', 'mistral', 113],\n", - " ['mistral-tiny', 'mistral', 114],\n", - " ['mistral-tiny-2312', 'mistral', 115],\n", - " ['mistral-tiny-2407', 'mistral', 116],\n", - " ['mistral-tiny-latest', 'mistral', 117],\n", - " ['mistral.mistral-7b-instruct-v0:2', 'bedrock', 118],\n", - " ['mistral.mistral-large-2402-v1:0', 'bedrock', 119],\n", - " ['mistral.mistral-large-2407-v1:0', 'bedrock', 120],\n", - " ['mistral.mixtral-8x7b-instruct-v0:1', 'bedrock', 121],\n", - " ['mistralai/Mistral-7B-Instruct-v0.1', 'deep_infra', 122],\n", - " ['mistralai/Mistral-7B-Instruct-v0.2', 'deep_infra', 123],\n", - " ['mistralai/Mistral-7B-Instruct-v0.3', 'deep_infra', 124],\n", - " ['mistralai/Mistral-Nemo-Instruct-2407', 'deep_infra', 125],\n", - " ['mistralai/Mixtral-8x22B-Instruct-v0.1', 'deep_infra', 126],\n", - " ['mistralai/Mixtral-8x22B-v0.1', 'deep_infra', 127],\n", - " ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'deep_infra', 128],\n", - " ['mixtral-8x7b-32768', 'groq', 129],\n", - " ['nvidia/Nemotron-4-340B-Instruct', 'deep_infra', 130],\n", - " ['open-codestral-mamba', 'mistral', 131],\n", - " ['open-mistral-7b', 'mistral', 132],\n", - " ['open-mistral-nemo', 'mistral', 133],\n", - " ['open-mistral-nemo-2407', 'mistral', 134],\n", - " ['open-mixtral-8x22b', 'mistral', 135],\n", - " ['open-mixtral-8x22b-2404', 'mistral', 136],\n", - " ['open-mixtral-8x7b', 'mistral', 137],\n", - " ['openbmb/MiniCPM-Llama3-V-2_5', 'deep_infra', 138],\n", - " ['openchat/openchat-3.6-8b', 'deep_infra', 139],\n", - " ['openchat/openchat_3.5', 'deep_infra', 140],\n", - " ['test', 'test', 141]]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from edsl import Model\n", "\n", - "Model.available()" + "# Model.available() # uncomment this line and run it" + ] + }, + { + "cell_type": "markdown", + "id": "391a62e9-ce89-40f3-b43a-bea3d7b8782c", + "metadata": {}, + "source": [ + "To confirm the current default model:" ] }, { "cell_type": "code", "execution_count": 4, + "id": "847fd577-078a-4502-8112-97ee3699cd11", + "metadata": {}, + "outputs": [], + "source": [ + "# Model() # uncomment this line and run it" + ] + }, + { + "cell_type": "markdown", + "id": "4eecad61-9e6d-4b7e-9a70-0bf5546e2f49", + "metadata": {}, + "source": [ + "#### Example survey" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "17cc2398-55be-4865-88f0-e66104c115a2", "metadata": { "editable": true, @@ -482,17 +344,20 @@ "results = survey.by(scenarios).by(agents).by(models).run()\n", "\n", "# Filter, sort, select and print components of the results to inspect\n", - "(results\n", - ".filter(\"activity == 'reading' and persona == 'chef'\")\n", - ".sort_by(\"model\")\n", - ".select(\"model\", \"activity\", \"persona\", \"answer.*\")\n", - ".print(format=\"rich\",\n", - " pretty_labels = ({\"model.model\":\"Model\",\n", - " \"scenario.activity\":\"Activity\",\n", - " \"agent.persona\":\"Agent persona\",\n", - " \"answer.enjoy\":\"Enjoy\",\n", - " \"answer.recent\":\"Recent\"})\n", - " )\n", + "(\n", + " results\n", + " .filter(\"activity == 'reading' and persona == 'chef'\")\n", + " .sort_by(\"model\")\n", + " .select(\"model\", \"activity\", \"persona\", \"answer.*\")\n", + " .print(format=\"rich\",\n", + " pretty_labels = ({\n", + " \"model.model\":\"Model\",\n", + " \"scenario.activity\":\"Activity\",\n", + " \"agent.persona\":\"Agent persona\",\n", + " \"answer.enjoy\":\"Enjoy\",\n", + " \"answer.recent\":\"Recent\"\n", + " })\n", + " )\n", ")" ] }, @@ -514,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "1ab2cc32-015c-49bc-8e53-cc1c70f6d783", "metadata": { "editable": true, @@ -743,17 +608,18 @@ "17 Sure! The most recent time I was reading was j... 4 " ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Convert the Results object to a pandas dataframe\n", - "(results\n", - " .sort_by(\"model\", \"activity\", \"persona\")\n", - " .select(\"model\", \"activity\", \"persona\", \"recent\", \"enjoy\")\n", - " .to_pandas(remove_prefix=True)\n", + "(\n", + " results\n", + " .sort_by(\"model\", \"activity\", \"persona\")\n", + " .select(\"model\", \"activity\", \"persona\", \"recent\", \"enjoy\")\n", + " .to_pandas(remove_prefix=True)\n", ")" ] }, @@ -773,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "7c3f63d0-bc79-4caf-991e-69b92ff29b69", "metadata": { "editable": true, @@ -823,7 +689,7 @@ " 'scenario.activity']" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -848,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "8bdca6c4-0ef6-4daa-ae4f-8b9bdd4a9043", "metadata": { "editable": true, @@ -1077,7 +943,7 @@ "17 Sure! The most recent time I was reading was j... 4 " ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1110,7 +976,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "a6f9233b-5ddc-4850-8ec9-6dd2d6647ecc", "metadata": { "editable": true, @@ -1127,13 +993,13 @@ "text/plain": [ "{'description': None,\n", " 'object_type': 'results',\n", - " 'url': 'https://www.expectedparrot.com/content/05dd1e85-3633-4bba-a964-a2e3fe79cf49',\n", - " 'uuid': '05dd1e85-3633-4bba-a964-a2e3fe79cf49',\n", + " 'url': 'https://www.expectedparrot.com/content/f674ba78-17d5-4628-9b57-ec7c5a96718c',\n", + " 'uuid': 'f674ba78-17d5-4628-9b57-ec7c5a96718c',\n", " 'version': '0.1.33.dev1',\n", " 'visibility': 'public'}" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1152,8 +1018,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "e650fd0b-a0e1-4ddb-8eef-e012737af02a", + "execution_count": 10, + "id": "257c7a6e-a7e8-4b15-9936-afa18c623b21", "metadata": { "editable": true, "slideshow": { @@ -1169,25 +1035,23 @@ "text/plain": [ "{'description': 'Starter Tutorial',\n", " 'object_type': 'notebook',\n", - " 'url': 'https://www.expectedparrot.com/content/41918601-7865-49bf-9cfe-3f48e1f4b1f4',\n", - " 'uuid': '41918601-7865-49bf-9cfe-3f48e1f4b1f4',\n", + " 'url': 'https://www.expectedparrot.com/content/d11a525e-d454-4eb1-bd96-0ab9d771249e',\n", + " 'uuid': 'd11a525e-d454-4eb1-bd96-0ab9d771249e',\n", " 'version': '0.1.33.dev1',\n", " 'visibility': 'public'}" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from edsl import Coop, Notebook\n", - "\n", - "coop = Coop()\n", + "from edsl import Notebook\n", "\n", "notebook = Notebook(path=\"starter_tutorial.ipynb\")\n", "\n", - "coop.create(notebook, description=\"Starter Tutorial\", visibility=\"public\")" + "notebook.push(description=\"Starter Tutorial\", visibility=\"public\")" ] } ],