Skip to content

Commit

Permalink
precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
maxhniebergall committed Aug 12, 2024
1 parent ef9faca commit 3cd988f
Showing 1 changed file with 70 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@
" # hosts=[\"http://localhost:9200\"]\n",
" cloud_id=ELASTIC_CLOUD_ID,\n",
" api_key=ELASTIC_API_KEY,\n",
" request_timeout=120, \n",
" request_timeout=120,\n",
" max_retries=10,\n",
" retry_on_timeout=True\n",
" retry_on_timeout=True,\n",
")"
]
},
Expand Down Expand Up @@ -205,6 +205,7 @@
"source": [
"print(client.info())\n",
"\n",
"\n",
"# define this now so we can use it later\n",
"def pretty_search_response(response):\n",
" if len(response[\"hits\"][\"hits\"]) == 0:\n",
Expand Down Expand Up @@ -265,15 +266,16 @@
"source": [
"API_KEY = getpass(\"Huggingface API key: \")\n",
"client.inference.put_model(\n",
" inference_id='my_hf_endpoint_object',\n",
" inference_id=\"my_hf_endpoint_object\",\n",
" body={\n",
" \"service\": \"hugging_face\",\n",
" \"service_settings\": {\"api_key\": API_KEY, \n",
" \"url\": \"https://yb0j0ol2xzvro0oc.us-east-1.aws.endpoints.huggingface.cloud\",\n",
" \"similarity\": \"dot_product\"\n",
" },\n",
" \"service_settings\": {\n",
" \"api_key\": API_KEY,\n",
" \"url\": \"https://yb0j0ol2xzvro0oc.us-east-1.aws.endpoints.huggingface.cloud\",\n",
" \"similarity\": \"dot_product\",\n",
" },\n",
" },\n",
" task_type=\"text_embedding\"\n",
" task_type=\"text_embedding\",\n",
")"
]
},
Expand All @@ -296,9 +298,8 @@
],
"source": [
"client.inference.inference(\n",
" inference_id='my_hf_endpoint_object',\n",
" input=\"this is the raw text of my document!\"\n",
" )"
" inference_id=\"my_hf_endpoint_object\", input=\"this is the raw text of my document!\"\n",
")"
]
},
{
Expand Down Expand Up @@ -407,12 +408,12 @@
"source": [
"client.indices.create(\n",
" index=\"hf-endpoint-index\",\n",
" settings = {\n",
" settings={\n",
" \"index\": {\n",
" \"default_pipeline\": \"hf_pipeline\",\n",
" }\n",
" },\n",
" mappings = {\n",
" mappings={\n",
" \"properties\": {\n",
" \"text\": {\"type\": \"text\"},\n",
" \"text_embedding\": {\n",
Expand All @@ -421,7 +422,7 @@
" \"similarity\": \"dot_product\",\n",
" },\n",
" }\n",
" }\n",
" },\n",
")"
]
},
Expand Down Expand Up @@ -455,19 +456,16 @@
],
"source": [
"client.indices.create(\n",
" index=\"hf-semantic-text-index\",\n",
" mappings={\n",
" index=\"hf-semantic-text-index\",\n",
" mappings={\n",
" \"properties\": {\n",
" \"infer_field\": {\n",
" \"type\": \"semantic_text\",\n",
" \"inference_id\": \"my_hf_endpoint_object\"\n",
" \"inference_id\": \"my_hf_endpoint_object\",\n",
" },\n",
" \"text_field\": {\n",
" \"type\": \"text\",\n",
" \"copy_to\": \"infer_field\"\n",
" }\n",
" \"text_field\": {\"type\": \"text\", \"copy_to\": \"infer_field\"},\n",
" }\n",
" }\n",
" },\n",
")"
]
},
Expand All @@ -488,10 +486,29 @@
"metadata": {},
"outputs": [],
"source": [
"langs = ['ar', 'bn', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th', 'zh']\n",
"langs = [\n",
" \"ar\",\n",
" \"bn\",\n",
" \"en\",\n",
" \"es\",\n",
" \"fa\",\n",
" \"fi\",\n",
" \"fr\",\n",
" \"hi\",\n",
" \"id\",\n",
" \"ja\",\n",
" \"ko\",\n",
" \"ru\",\n",
" \"sw\",\n",
" \"te\",\n",
" \"th\",\n",
" \"zh\",\n",
"]\n",
"\n",
"\n",
"all_langs_datasets = [iter(datasets.load_dataset('miracl/miracl-corpus', lang)['train']) for lang in langs]"
"all_langs_datasets = [\n",
" iter(datasets.load_dataset(\"miracl/miracl-corpus\", lang)[\"train\"]) for lang in langs\n",
"]"
]
},
{
Expand Down Expand Up @@ -665,11 +682,13 @@
" for ds in all_langs_datasets:\n",
" text = next(ds, sentinel)\n",
" if text is not sentinel:\n",
" documents.append({\n",
" \"_index\": \"hf-semantic-text-index\",\n",
" \"_source\": {\"text_field\": text['text']},\n",
" })\n",
" # if you are using an ingest pipeline instead of a \n",
" documents.append(\n",
" {\n",
" \"_index\": \"hf-semantic-text-index\",\n",
" \"_source\": {\"text_field\": text[\"text\"]},\n",
" }\n",
" )\n",
" # if you are using an ingest pipeline instead of a\n",
" # semantic text field, use this instead:\n",
" # documents.append(\n",
" # {\n",
Expand All @@ -680,7 +699,7 @@
"\n",
" try:\n",
" response = helpers.bulk(client, documents, raise_on_error=False, timeout=\"60s\")\n",
" print(\"Docs uplaoded:\", (j+1)*MAX_BULK_SIZE)\n",
" print(\"Docs uplaoded:\", (j + 1) * MAX_BULK_SIZE)\n",
"\n",
" except Exception as e:\n",
" print(\"exception:\", str(e))"
Expand All @@ -705,11 +724,9 @@
"source": [
"query = \"English speaking countries\"\n",
"semantic_search_results = client.search(\n",
" index=\"hf-semantic-text-index\",\n",
" query={\"semantic\": {\"field\": \"infer_field\", \"query\": query}},\n",
")\n",
"\n",
" "
" index=\"hf-semantic-text-index\",\n",
" query={\"semantic\": {\"field\": \"infer_field\", \"query\": query}},\n",
")"
]
},
{
Expand Down Expand Up @@ -795,17 +812,14 @@
" task_type=\"rerank\",\n",
" inference_id=\"my_cohere_rerank_endpoint\",\n",
" body={\n",
" \"service\": \"cohere\",\n",
" \"service_settings\": {\n",
" \"service\": \"cohere\",\n",
" \"service_settings\": {\n",
" \"api_key\": \"h2OzeuORCdvJ8eidGYbHmjfeWcecRQN8MYGDHxK1\",\n",
" \"model_id\": \"rerank-english-v3.0\"\n",
" },\n",
" \"task_settings\": {\n",
" \"top_n\": 100,\n",
" \"return_documents\": True\n",
" }\n",
" }\n",
")\n"
" \"model_id\": \"rerank-english-v3.0\",\n",
" },\n",
" \"task_settings\": {\"top_n\": 100, \"return_documents\": True},\n",
" },\n",
")"
]
},
{
Expand All @@ -817,25 +831,20 @@
"source": [
"reranked_search_results = client.search(\n",
" index=\"hf-semantic-text-index\",\n",
" retriever= {\n",
" retriever={\n",
" \"text_similarity_reranker\": {\n",
" \"retriever\": {\n",
" \"standard\": {\n",
" \"query\": {\n",
" \"semantic\": {\n",
" \"field\": \"infer_field\",\n",
" \"query\": query \n",
" }\n",
" \"retriever\": {\n",
" \"standard\": {\n",
" \"query\": {\"semantic\": {\"field\": \"infer_field\", \"query\": query}}\n",
" }\n",
" }\n",
" },\n",
" \"field\": \"text_field\",\n",
" \"inference_id\": \"my_cohere_rerank_endpoint\",\n",
" \"inference_text\": query,\n",
" \"rank_window_size\": 100,\n",
" },\n",
" \"field\": \"text_field\",\n",
" \"inference_id\": \"my_cohere_rerank_endpoint\",\n",
" \"inference_text\": query,\n",
" \"rank_window_size\": 100,\n",
" }\n",
" }\n",
")\n"
" },\n",
")"
]
},
{
Expand Down

0 comments on commit 3cd988f

Please sign in to comment.