Skip to content

Commit

Permalink
Update DOCS
Browse files Browse the repository at this point in the history
  • Loading branch information
chucheria committed Apr 11, 2024
1 parent 6ac9a61 commit 2e86a71
Show file tree
Hide file tree
Showing 32 changed files with 2,525 additions and 3,138 deletions.
29 changes: 29 additions & 0 deletions _sources/definition.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,35 @@ Promptmeteo integrates different LLMs through LangChain. This includes models th

Establishing a concrete format for creating prompts in Promptmeteo (`.prompt`) not only facilitates programmatic use but also enables versioning of prompts. This approach aids in understanding changes when they occur and allows for the definition of code tests oriented toward prompt testing. This testing encompasses aspects such as validating language use and ensuring the prompt size is appropriate for the model.

.. code-block:: yaml
TEMPLATE:
"I need you to help me with a text classification task.
{__PROMPT_DOMAIN__}
{__PROMPT_LABELS__}
{__CHAIN_THOUGHT__}
{__ANSWER_FORMAT__}"
PROMPT_DOMAIN:
"The texts you will be processing are from the {__DOMAIN__} domain."
PROMPT_LABELS:
"I want you to classify the texts into one of the following categories:
{__LABELS__}."
PROMPT_DETAIL:
""
CHAIN_THOUGHT:
"Please provide a step-by-step argument for your answer, explain why you
believe your final choice is justified."
ANSWER_FORMAT:
"In your response, include only the name of the class as a single word, in
lowercase, without punctuation, and without adding any other statements or
words."
📋 Current capacilities
----------------------------
Expand Down
2 changes: 2 additions & 0 deletions _sources/index.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Promptmeteo 🔥🧔

Promptmeteo is a Python library for prompt engineering built over LangChain. It simplifies the utilization of large language models (LLMs) for various tasks through a low-code interface. To achieve this, Promptmeteo can employ different LLM models and dynamically generate prompts for specific tasks based on just a few configuration parameters.

Know more about Promptmeteo in `our blog <https://www.paradigmadigital.com/dev/industrializando-ia-generativa-prompt-engineering-promptmeteo/>`_ [SPA].

.. toctree::
:maxdepth: 2
:caption: Contents:
Expand Down
157 changes: 85 additions & 72 deletions _sources/notebooks/01_promptmeteo_usage.ipynb.txt

Large diffs are not rendered by default.

138 changes: 69 additions & 69 deletions _sources/notebooks/03_sentiment_analysis.ipynb.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
"source": [
"import polars as pl\n",
"\n",
"data = pl.read_parquet('../data/amazon_reviews_en/amazon_reviews_multi-test.parquet')\n",
"data = pl.read_parquet(\"../data/amazon_reviews_en/amazon_reviews_multi-test.parquet\")\n",
"data.head()"
]
},
Expand Down Expand Up @@ -141,9 +141,10 @@
],
"source": [
"sql = pl.SQLContext()\n",
"sql.register('data', data)\n",
"sql.register(\"data\", data)\n",
"\n",
"sentiment_data = sql.execute(\"\"\"\n",
"sentiment_data = (\n",
" sql.execute(\"\"\"\n",
" SELECT\n",
" review_body as REVIEW,\n",
" CASE\n",
Expand All @@ -154,7 +155,10 @@
" END AS TARGET,\n",
" FROM data\n",
" WHERE stars!=2 AND stars!=4;\n",
" \"\"\").collect().sample(fraction=1.0, shuffle=True, seed=0)\n",
" \"\"\")\n",
" .collect()\n",
" .sample(fraction=1.0, shuffle=True, seed=0)\n",
")\n",
"\n",
"sentiment_data.head()"
]
Expand Down Expand Up @@ -182,14 +186,16 @@
"metadata": {},
"outputs": [],
"source": [
"import sys; sys.path.append('..')\n",
"import sys\n",
"\n",
"sys.path.append(\"..\")\n",
"from promptmeteo import DocumentClassifier\n",
"\n",
"model = DocumentClassifier(\n",
" language = 'en',\n",
" model_name = 'gpt-3.5-turbo-instruct',\n",
" model_provider_name = 'openai',\n",
" model_provider_token = 'sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' #OPENAI TOKEN\n",
" language=\"en\",\n",
" model_name=\"gpt-3.5-turbo-instruct\",\n",
" model_provider_name=\"openai\",\n",
" model_provider_token=\"sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\", # OPENAI TOKEN\n",
")"
]
},
Expand Down Expand Up @@ -308,13 +314,10 @@
"metadata": {},
"outputs": [],
"source": [
"train_reviews = sentiment_data.head(100).select('REVIEW').to_series().to_list()\n",
"train_targets = sentiment_data.head(100).select('TARGET').to_series().to_list()\n",
"train_reviews = sentiment_data.head(100).select(\"REVIEW\").to_series().to_list()\n",
"train_targets = sentiment_data.head(100).select(\"TARGET\").to_series().to_list()\n",
"\n",
"model = model.train(\n",
" examples = train_reviews,\n",
" annotations = train_targets\n",
")"
"model = model.train(examples=train_reviews, annotations=train_targets)"
]
},
{
Expand Down Expand Up @@ -356,12 +359,16 @@
"import seaborn as sns\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"test_reviews = sentiment_data.head(100).select('REVIEW').to_series().to_list()\n",
"test_targets = sentiment_data.head(100).select('TARGET').to_series().to_list()\n",
"test_reviews = sentiment_data.head(100).select(\"REVIEW\").to_series().to_list()\n",
"test_targets = sentiment_data.head(100).select(\"TARGET\").to_series().to_list()\n",
"\n",
"pred_targets = model.predict(test_reviews)\n",
"\n",
"sns.heatmap(confusion_matrix(test_targets, [i[0] for i in pred_targets]), annot=True, cmap='Blues')"
"sns.heatmap(\n",
" confusion_matrix(test_targets, [i[0] for i in pred_targets]),\n",
" annot=True,\n",
" cmap=\"Blues\",\n",
")"
]
},
{
Expand Down Expand Up @@ -409,19 +416,16 @@
],
"source": [
"model = DocumentClassifier(\n",
" language = 'en',\n",
" model_name = 'google/flan-t5-xxl',\n",
" model_provider_name = 'hf_hub_api',\n",
" model_provider_token = 'hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' #HF API TOKEN\n",
" language=\"en\",\n",
" model_name=\"google/flan-t5-xxl\",\n",
" model_provider_name=\"hf_hub_api\",\n",
" model_provider_token=\"hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\", # HF API TOKEN\n",
")\n",
"\n",
"model = model.train(\n",
" examples = train_reviews,\n",
" annotations = train_targets\n",
")\n",
"model = model.train(examples=train_reviews, annotations=train_targets)\n",
"\n",
"pred_targets = model.predict(test_reviews)\n",
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap='Blues')"
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap=\"Blues\")"
]
},
{
Expand Down Expand Up @@ -500,20 +504,17 @@
],
"source": [
"model = DocumentClassifier(\n",
" language = 'en',\n",
" model_name = 'tiiuae/falcon-7b-instruct',\n",
" model_provider_name = 'hf_hub_api',\n",
" model_provider_token = 'hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' #HF API TOKEN\n",
" language=\"en\",\n",
" model_name=\"tiiuae/falcon-7b-instruct\",\n",
" model_provider_name=\"hf_hub_api\",\n",
" model_provider_token=\"hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\", # HF API TOKEN\n",
")\n",
"\n",
"model = model.train(\n",
" examples = train_reviews,\n",
" annotations = train_targets\n",
")\n",
"model = model.train(examples=train_reviews, annotations=train_targets)\n",
"\n",
"pred_targets = model.predict(test_reviews)\n",
"\n",
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap='Blues')"
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap=\"Blues\")"
]
},
{
Expand Down Expand Up @@ -594,19 +595,16 @@
],
"source": [
"model = DocumentClassifier(\n",
" language = 'en',\n",
" model_name = 'google/flan-t5-small',\n",
" model_provider_name = 'hf_pipeline',\n",
" language=\"en\",\n",
" model_name=\"google/flan-t5-small\",\n",
" model_provider_name=\"hf_pipeline\",\n",
")\n",
"\n",
"model = model.train(\n",
" examples = train_reviews,\n",
" annotations = train_targets\n",
")\n",
"model = model.train(examples=train_reviews, annotations=train_targets)\n",
"\n",
"pred_targets = model.predict(test_reviews)\n",
"\n",
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap='Blues')"
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap=\"Blues\")"
]
},
{
Expand Down Expand Up @@ -682,13 +680,14 @@
"source": [
"import polars as pl\n",
"\n",
"data = pl.read_parquet('../data/amazon_reviews_sp/amazon_reviews_multi-test.parquet')\n",
"data = pl.read_parquet(\"../data/amazon_reviews_sp/amazon_reviews_multi-test.parquet\")\n",
"data.head()\n",
"\n",
"sql = pl.SQLContext()\n",
"sql.register('data', data)\n",
"sql.register(\"data\", data)\n",
"\n",
"sentiment_data = sql.execute(\"\"\"\n",
"sentiment_data = (\n",
" sql.execute(\"\"\"\n",
" SELECT\n",
" review_body as REVIEW,\n",
" CASE\n",
Expand All @@ -699,7 +698,10 @@
" END AS TARGET,\n",
" FROM data\n",
" WHERE stars!=2 AND stars!=4;\n",
" \"\"\").collect().sample(fraction=1.0, shuffle=True, seed=0)\n",
" \"\"\")\n",
" .collect()\n",
" .sample(fraction=1.0, shuffle=True, seed=0)\n",
")\n",
"\n",
"sentiment_data.head()"
]
Expand All @@ -711,11 +713,11 @@
"metadata": {},
"outputs": [],
"source": [
"train_reviews = sentiment_data.head(100).select('REVIEW').to_series().to_list()\n",
"train_targets = sentiment_data.head(100).select('TARGET').to_series().to_list()\n",
"train_reviews = sentiment_data.head(100).select(\"REVIEW\").to_series().to_list()\n",
"train_targets = sentiment_data.head(100).select(\"TARGET\").to_series().to_list()\n",
"\n",
"test_reviews = sentiment_data.tail(100).select('REVIEW').to_series().to_list()\n",
"test_targets = sentiment_data.tail(100).select('TARGET').to_series().to_list()"
"test_reviews = sentiment_data.tail(100).select(\"REVIEW\").to_series().to_list()\n",
"test_targets = sentiment_data.tail(100).select(\"TARGET\").to_series().to_list()"
]
},
{
Expand Down Expand Up @@ -787,20 +789,21 @@
],
"source": [
"model = DocumentClassifier(\n",
" language = 'en',\n",
" model_name = 'gpt-3.5-turbo-instruct',\n",
" model_provider_name = 'openai',\n",
" model_provider_token = 'sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' #OPENAI TOKEN\n",
" language=\"en\",\n",
" model_name=\"gpt-3.5-turbo-instruct\",\n",
" model_provider_name=\"openai\",\n",
" model_provider_token=\"sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\", # OPENAI TOKEN\n",
")\n",
"\n",
"model = model.train(\n",
" examples = train_reviews,\n",
" annotations = train_targets\n",
")\n",
"model = model.train(examples=train_reviews, annotations=train_targets)\n",
"\n",
"pred_targets = model.predict(test_reviews)\n",
"\n",
"sns.heatmap(confusion_matrix(test_targets, [i[0] for i in pred_targets]), annot=True, cmap='Blues')"
"sns.heatmap(\n",
" confusion_matrix(test_targets, [i[0] for i in pred_targets]),\n",
" annot=True,\n",
" cmap=\"Blues\",\n",
")"
]
},
{
Expand Down Expand Up @@ -872,19 +875,16 @@
],
"source": [
"model = DocumentClassifier(\n",
" language = 'es',\n",
" model_name = 'google/flan-t5-small',\n",
" model_provider_name = 'hf_pipeline',\n",
" language=\"es\",\n",
" model_name=\"google/flan-t5-small\",\n",
" model_provider_name=\"hf_pipeline\",\n",
")\n",
"\n",
"model = model.train(\n",
" examples = train_reviews,\n",
" annotations = train_targets\n",
")\n",
"model = model.train(examples=train_reviews, annotations=train_targets)\n",
"\n",
"pred_targets = model.predict(test_reviews)\n",
"\n",
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap='Blues')"
"sns.heatmap(confusion_matrix(test_targets, pred_targets), annot=True, cmap=\"Blues\")"
]
}
],
Expand Down
Loading

0 comments on commit 2e86a71

Please sign in to comment.