Merge pull request #1450 from vespa-engine/freva/data

data.vespa.oath.cloud -> data.vespa-cloud.com
vespa-engine · Sep 5, 2024 · 3b1135d · 3b1135d
2 parents 2d68973 + 38571b7
commit 3b1135d
Show file tree

Hide file tree

Showing 17 changed files with 33 additions and 33 deletions.
diff --git a/billion-scale-image-search/src/main/bash/download_models.sh b/billion-scale-image-search/src/main/bash/download_models.sh
@@ -9,8 +9,8 @@ if [ -f "$FILE" ]; then
     echo "$FILE exists."
 else 
     echo "$FILE does not exist."
-    echo "Downloading model https://data.vespa.oath.cloud/sample-apps-data/clip_text_transformer.onnx" 
+    echo "Downloading model https://data.vespa-cloud.com/sample-apps-data/clip_text_transformer.onnx" 
     curl -L -o $DIR/text_transformer.onnx \
-      https://data.vespa.oath.cloud/sample-apps-data/clip_text_transformer.onnx
+      https://data.vespa-cloud.com/sample-apps-data/clip_text_transformer.onnx
 fi
 
diff --git a/billion-scale-vector-search/README.md b/billion-scale-vector-search/README.md
@@ -85,7 +85,7 @@ It uses the first 10M vectors of the 100M slice sample.
 This sample file is about 1GB (10M vectors):
 <pre data-test="exec">
 $ curl -L -o spacev10m_base.i8bin \
-  https://data.vespa.oath.cloud/sample-apps-data/spacev10m_base.i8bin
+  https://data.vespa-cloud.com/sample-apps-data/spacev10m_base.i8bin
 </pre>
 
 Generate the feed file for the first 10M vectors from the 100M sample. 
@@ -141,7 +141,7 @@ Download the query vectors and the ground truth for the 10M first vectors:
 $ curl -L -o query.i8bin \
   https://github.com/microsoft/SPTAG/raw/main/datasets/SPACEV1B/query.bin
 $ curl -L -o spacev10m_gt100.i8bin \
-  https://data.vespa.oath.cloud/sample-apps-data/spacev10m_gt100.i8bin
+  https://data.vespa-cloud.com/sample-apps-data/spacev10m_gt100.i8bin
 </pre>
 
 Note, initially, the routine above used the query file from https://comp21storage.blob.core.windows.net/publiccontainer/comp21/spacev1b/query.i8bin

diff --git a/commerce-product-ranking/README.md b/commerce-product-ranking/README.md
@@ -89,7 +89,7 @@ $ vespa clone commerce-product-ranking my-app && cd my-app
 Download cross-encoder model:
 <pre data-test="exec">
 $ curl -L -o application/models/title_ranker.onnx \
-    https://data.vespa.oath.cloud/sample-apps-data/title_ranker.onnx
+    https://data.vespa-cloud.com/sample-apps-data/title_ranker.onnx
 </pre>
 
 See [scripts/export-bi-encoder.py](scripts/export-bi-encoder.py) and
@@ -181,7 +181,7 @@ This run file can then be evaluated using the [trec_eval](https://github.com/usn
 Download a pre-processed query-product relevance judgments in TREC format:
 <pre data-test="exec">
 $  curl -L -o test.qrels \
-    https://data.vespa.oath.cloud/sample-apps-data/test.qrels
+    https://data.vespa-cloud.com/sample-apps-data/test.qrels
 </pre>
 
 Install `trec_eval` (your mileage may vary):
@@ -237,7 +237,7 @@ Download a pre-processed feed file with all (1,215,854) products:
 
 <pre>
 $  curl -L -o product-search-products.jsonl.zstd \
-    https://data.vespa.oath.cloud/sample-apps-data/product-search-products.jsonl.zstd
+    https://data.vespa-cloud.com/sample-apps-data/product-search-products.jsonl.zstd
 </pre>
 
 This step is resource intensive as the semantic embedding model encodes 

diff --git a/commerce-product-ranking/application/services.xml b/commerce-product-ranking/application/services.xml
@@ -10,12 +10,12 @@
           <model url="https://huggingface.co/bert-base-uncased/raw/main/tokenizer.json"/>
         </component>
         <component id="title" type="hugging-face-embedder">
-          <transformer-model  url="https://data.vespa.oath.cloud/sample-apps-data/title_encoder.onnx"/>
+          <transformer-model  url="https://data.vespa-cloud.com/sample-apps-data/title_encoder.onnx"/>
           <tokenizer-model url="https://huggingface.co/bert-base-uncased/raw/main/tokenizer.json"/>
           <transformer-output>output_0</transformer-output>
         </component>
         <component id="description" type="hugging-face-embedder">
-          <transformer-model  url="https://data.vespa.oath.cloud/sample-apps-data/description_encoder.onnx"/>
+          <transformer-model  url="https://data.vespa-cloud.com/sample-apps-data/description_encoder.onnx"/>
           <tokenizer-model url="https://huggingface.co/bert-base-uncased/raw/main/tokenizer.json"/>
           <transformer-output>output_0</transformer-output>
         </component>

diff --git a/commerce-product-ranking/notebooks/Train-lightgbm.ipynb b/commerce-product-ranking/notebooks/Train-lightgbm.ipynb
@@ -83,7 +83,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train = pandas.read_parquet('https://data.vespa.oath.cloud/sample-apps-data/product_ranking_train.parquet')"
+    "train = pandas.read_parquet('https://data.vespa-cloud.com/sample-apps-data/product_ranking_train.parquet')"
    ]
   },
   {
@@ -387,8 +387,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_queries = pandas.read_parquet('https://data.vespa.oath.cloud/sample-apps-data/train_query_ids.parquet')['query_id'].unique()\n",
-    "dev_queries = pandas.read_parquet('https://data.vespa.oath.cloud/sample-apps-data/dev_query_ids.parquet')['query_id'].unique()"
+    "train_queries = pandas.read_parquet('https://data.vespa-cloud.com/sample-apps-data/train_query_ids.parquet')['query_id'].unique()\n",
+    "dev_queries = pandas.read_parquet('https://data.vespa-cloud.com/sample-apps-data/dev_query_ids.parquet')['query_id'].unique()"
    ]
   },
   {

diff --git a/commerce-product-ranking/notebooks/Train-xgboost.ipynb b/commerce-product-ranking/notebooks/Train-xgboost.ipynb
@@ -79,7 +79,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train = pandas.read_parquet('https://data.vespa.oath.cloud/sample-apps-data/product_ranking_train.parquet')"
+    "train = pandas.read_parquet('https://data.vespa-cloud.com/sample-apps-data/product_ranking_train.parquet')"
    ]
   },
   {
@@ -123,8 +123,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_queries = pandas.read_parquet('https://data.vespa.oath.cloud/sample-apps-data/train_query_ids.parquet')['query_id'].unique()\n",
-    "dev_queries = pandas.read_parquet('https://data.vespa.oath.cloud/sample-apps-data/dev_query_ids.parquet')['query_id'].unique()"
+    "train_queries = pandas.read_parquet('https://data.vespa-cloud.com/sample-apps-data/train_query_ids.parquet')['query_id'].unique()\n",
+    "dev_queries = pandas.read_parquet('https://data.vespa-cloud.com/sample-apps-data/dev_query_ids.parquet')['query_id'].unique()"
    ]
   },
   {

diff --git a/commerce-product-ranking/notebooks/train_neural.ipynb b/commerce-product-ranking/notebooks/train_neural.ipynb
@@ -200,7 +200,7 @@
    },
    "outputs": [],
    "source": [
-    "train_queries = pd.read_parquet(\"https://data.vespa.oath.cloud/sample-apps-data/train_query_ids.parquet\")['query_id'].unique()"
+    "train_queries = pd.read_parquet(\"https://data.vespa-cloud.com/sample-apps-data/train_query_ids.parquet\")['query_id'].unique()"
    ]
   },
   {

diff --git a/examples/embedding-service/README.md b/examples/embedding-service/README.md
@@ -24,9 +24,9 @@ which takes a string as an input and returns a vector embedding of that string.
 ```
 mkdir -p src/main/application/embedder-models/e5-small-v2
 curl -o src/main/application/embedder-models/e5-small-v2/model.onnx \
-  https://data.vespa.oath.cloud/onnx_models/e5-small-v2/model.onnx
+  https://data.vespa-cloud.com/onnx_models/e5-small-v2/model.onnx
 curl -o src/main/application/embedder-models/e5-small-v2/tokenizer.json \
-  https://data.vespa.oath.cloud/onnx_models/e5-small-v2/tokenizer.json
+  https://data.vespa-cloud.com/onnx_models/e5-small-v2/tokenizer.json
 ```
 4. Add a public certificate: ``vespa auth cert``
 5. Compile and deploy the application: ``mvn install && vespa deploy --wait 600``
@@ -44,8 +44,8 @@ Check out ``services.xml`` for more information.
 3. Download the models:
 ```
 mkdir -p src/main/application/embedder-models/e5-small-v2
-curl -o src/main/application/embedder-models/e5-small-v2/model.onnx https://data.vespa.oath.cloud/onnx_models/e5-small-v2/model.onnx
-curl -o src/main/application/embedder-models/e5-small-v2/tokenizer.json https://data.vespa.oath.cloud/onnx_models/e5-small-v2/tokenizer.json
+curl -o src/main/application/embedder-models/e5-small-v2/model.onnx https://data.vespa-cloud.com/onnx_models/e5-small-v2/model.onnx
+curl -o src/main/application/embedder-models/e5-small-v2/tokenizer.json https://data.vespa-cloud.com/onnx_models/e5-small-v2/tokenizer.json
 ```
 4. Compile and deploy the application: ``mvn install && vespa deploy --wait 300``
 

diff --git a/incremental-search/search-as-you-type/README.md b/incremental-search/search-as-you-type/README.md
@@ -61,7 +61,7 @@ $ mvn clean package -U
 Download feed file:
 <pre data-test="exec">
 $ curl -L -o search-as-you-type-index.jsonl \
-  https://data.vespa.oath.cloud/sample-apps-data/search-as-you-type-index.jsonl 
+  https://data.vespa-cloud.com/sample-apps-data/search-as-you-type-index.jsonl 
 </pre>
 
 Verify that configuration service (deploy api) is ready:

diff --git a/multilingual-search/services.xml b/multilingual-search/services.xml
@@ -7,8 +7,8 @@
 
         <!-- See https://docs.vespa.ai/en/embedding.html#huggingface-embedder -->
         <component id="e5" type="hugging-face-embedder">
-            <transformer-model url="https://data.vespa.oath.cloud/sample-apps-data/m-e5-small/model.onnx"/>
-            <tokenizer-model url="https://data.vespa.oath.cloud/sample-apps-data/m-e5-small/tokenizer.json"/>
+            <transformer-model url="https://data.vespa-cloud.com/sample-apps-data/m-e5-small/model.onnx"/>
+            <tokenizer-model url="https://data.vespa-cloud.com/sample-apps-data/m-e5-small/tokenizer.json"/>
             <!-- E5 prompt instructions -->
             <prepend>
                 <query>query:</query>

diff --git a/retrieval-augmented-generation/services.xml b/retrieval-augmented-generation/services.xml
@@ -13,7 +13,7 @@
     <!-- Comment out this component to avoid downloading the model file during startup -->
     <component id="mistral" class="ai.vespa.llm.clients.LocalLLM">
       <config name="ai.vespa.llm.clients.llm-local-client">
-        <model url="https://data.vespa.oath.cloud/gguf_models/Phi-3-mini-4k-instruct-q4.gguf" />
+        <model url="https://data.vespa-cloud.com/gguf_models/Phi-3-mini-4k-instruct-q4.gguf" />
         <contextSize>4096</contextSize>
         <parallelRequests>1</parallelRequests>
       </config>

diff --git a/text-image-search/README.md b/text-image-search/README.md
@@ -39,7 +39,7 @@ which is suitable for exploration and analysis.
 The other is a stand-alone Vespa application, which is more suitable for production (below).
 The Python sample app includes a streamlit user interface:
 
-[Animation](https://data.vespa.oath.cloud/sample-apps-data/image_demo.gif)
+[Animation](https://data.vespa-cloud.com/sample-apps-data/image_demo.gif)
 
 The application takes a textual description and returns the file names of the
 images that best match the description. The main difference between this app
@@ -144,7 +144,7 @@ $ python3 src/python/clip_feed.py
 Alternatively, instead of computing the embeddings, use the pre-computed embeddings:
 <pre data-test="exec">
 $ curl -L -o flickr-8k-clip-embeddings.jsonl.zst \
-    https://data.vespa.oath.cloud/sample-apps-data/flickr-8k-clip-embeddings.jsonl.zst 
+    https://data.vespa-cloud.com/sample-apps-data/flickr-8k-clip-embeddings.jsonl.zst 
 </pre>
 
 <pre data-test="exec">

diff --git a/text-image-search/src/python/README.md b/text-image-search/src/python/README.md
@@ -51,4 +51,4 @@ Run the app:
 streamlit run app.py
 ```
 
-[Animation](https://data.vespa.oath.cloud/sample-apps-data/image_demo.gif)
+[Animation](https://data.vespa-cloud.com/sample-apps-data/image_demo.gif)
diff --git a/text-image-search/src/sh/download_onnx_model.sh b/text-image-search/src/sh/download_onnx_model.sh
@@ -5,6 +5,6 @@ echo "[INFO] Downloading model into $DIR"
 
 mkdir -p $DIR
 
-echo "Downloading https://data.vespa.oath.cloud/onnx_models/clip_transformer.onnx"
+echo "Downloading https://data.vespa-cloud.com/onnx_models/clip_transformer.onnx"
 curl -L -o $DIR/transformer.onnx \
-https://data.vespa.oath.cloud/onnx_models/clip_transformer.onnx
+https://data.vespa-cloud.com/onnx_models/clip_transformer.onnx
diff --git a/text-video-search/README.md b/text-video-search/README.md
@@ -9,7 +9,7 @@
 
 Build a text-video search from scratch based on CLIP models with Vespa python API.
 
-[See Animation](https://data.vespa.oath.cloud/sample-apps-data/video_demo.gif)
+[See Animation](https://data.vespa-cloud.com/sample-apps-data/video_demo.gif)
 
 ## Create the application from scratch in a Jupyter Notebook
 

diff --git a/text-video-search/src/python/app.py b/text-video-search/src/python/app.py
@@ -32,7 +32,7 @@ def get_video(video_file_name, video_dir):
 
 def get_predefined_queries():
     return get(
-        "https://data.vespa.oath.cloud/blog/ucf101/predefined_queries.txt"
+        "https://data.vespa-cloud.com/blog/ucf101/predefined_queries.txt"
     ).text.split("\n")[:-1]
 
 

diff --git a/use-case-shopping/README.md b/use-case-shopping/README.md
@@ -90,13 +90,13 @@ $ vespa test src/test/application/tests/system-test/product-search-test.json
 
 First, create data feed for products:
 <pre data-test="exec">
-$ curl -L -o meta_sports_20k_sample.json.zst https://data.vespa.oath.cloud/sample-apps-data/meta_sports_20k_sample.json.zst 
+$ curl -L -o meta_sports_20k_sample.json.zst https://data.vespa-cloud.com/sample-apps-data/meta_sports_20k_sample.json.zst 
 $ zstdcat meta_sports_20k_sample.json.zst | ./convert_meta.py > feed_items.json
 </pre>
 
 Next, data feed for reviews:
 <pre data-test="exec">
-$ curl -L -o reviews_sports_24k_sample.json.zst https://data.vespa.oath.cloud/sample-apps-data/reviews_sports_24k_sample.json.zst
+$ curl -L -o reviews_sports_24k_sample.json.zst https://data.vespa-cloud.com/sample-apps-data/reviews_sports_24k_sample.json.zst
 $ zstdcat reviews_sports_24k_sample.json.zst | ./convert_reviews.py > feed_reviews.json
 </pre>