Dump movieId embedding into a CSV

chdb-io · Dec 12, 2023 · 8f904fc · 8f904fc
1 parent de045da
commit 8f904fc
Showing 1 changed file with 23 additions and 0 deletions.
diff --git a/examples/chDB_vector_search.ipynb b/examples/chDB_vector_search.ipynb
@@ -342,6 +342,29 @@
     "similar_movies = model.wv.most_similar(str(input_movie_id), topn=top_k)\n",
     "print(chs.query(f\"SELECT movieId, title FROM movies WHERE movieId IN ({','.join([str(m[0]) for m in similar_movies])})\", \"CSV\"))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "\n",
+    "# Open the CSV file in write mode\n",
+    "with open('movie_embeddings.csv', 'w', newline='') as file:\n",
+    "    writer = csv.writer(file)\n",
+    "\n",
+    "    # Write the header row\n",
+    "    writer.writerow(['movieId', 'embedding'])\n",
+    "\n",
+    "    # Iterate over each movieId and its corresponding embedding\n",
+    "    for movieId in model.wv.index_to_key:\n",
+    "        embedding = model.wv[movieId]\n",
+    "\n",
+    "        # Write the movieId and embedding as a row in the CSV file\n",
+    "        writer.writerow([movieId, embedding])\n"
+   ]
   }
  ],
  "metadata": {