Skip to content

Commit b496fe3

Browse files
authored
Merge pull request #27 from Prismadic/26-featperf-hash-entries
🧲 feat(perf): hashing headers for NATS
2 parents 90ec21f + 5f80270 commit b496fe3

File tree

4 files changed

+28
-30
lines changed

4 files changed

+28
-30
lines changed

examples/distributed_charge/1_process.ipynb

+16-27
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,26 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 4,
66
"metadata": {},
77
"outputs": [],
88
"source": [
99
"from magnet.ize.filings import Processor\n",
10-
"source_data_file = \"./raw/kb_export_clean.csv\"\n",
11-
"export_data_file = \"./data/filings_mistral_nlp.parquet\""
10+
"source_data_file = \"../../raw/kb_export_clean.csv\"\n",
11+
"export_data_file = \"../../data/filings_mistral_nlp.parquet\""
1212
]
1313
},
1414
{
1515
"cell_type": "code",
16-
"execution_count": 3,
16+
"execution_count": 5,
1717
"metadata": {},
1818
"outputs": [
1919
{
2020
"name": "stdout",
2121
"output_type": "stream",
2222
"text": [
23-
"\u001b[96m☕️ WAIT: loading - ./raw/kb_export_clean.csv\u001b[0m\n",
24-
"\u001b[92m🌊 SUCCESS: loaded - ./raw/kb_export_clean.csv\u001b[0m\n"
23+
"\u001b[96m☕️ WAIT: loading - ../raw/kb_export_clean.csv\u001b[0m\n",
24+
"\u001b[91m☠️ FATAL: [Errno 2] No such file or directory: '../raw/kb_export_clean.csv'\u001b[0m\n"
2525
]
2626
}
2727
],
@@ -36,30 +36,19 @@
3636
},
3737
{
3838
"cell_type": "code",
39-
"execution_count": 4,
39+
"execution_count": 3,
4040
"metadata": {},
4141
"outputs": [
4242
{
43-
"name": "stdout",
44-
"output_type": "stream",
45-
"text": [
46-
"\u001b[96m☕️ WAIT: get coffee or tea - 65822 processing...\u001b[0m\n"
47-
]
48-
},
49-
{
50-
"name": "stderr",
51-
"output_type": "stream",
52-
"text": [
53-
"100%|██████████| 65822/65822 [20:08<00:00, 54.48it/s] \n",
54-
"100%|██████████| 65822/65822 [00:02<00:00, 29005.30it/s]\n"
55-
]
56-
},
57-
{
58-
"name": "stdout",
59-
"output_type": "stream",
60-
"text": [
61-
"\u001b[96m☕️ WAIT: saving to ./data/filings_mistral_nlp.parquet\u001b[0m\n",
62-
"\u001b[92m🌊 SUCCESS: saved - ./data/filings_mistral_nlp.parquet\u001b[0m\n"
43+
"ename": "AttributeError",
44+
"evalue": "'NoneType' object has no attribute 'dropna'",
45+
"output_type": "error",
46+
"traceback": [
47+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
48+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
49+
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m clustered_filings\u001b[38;5;241m.\u001b[39mprocess(\n\u001b[1;32m 2\u001b[0m export_data_file\n\u001b[1;32m 3\u001b[0m , nlp\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 4\u001b[0m )\n",
50+
"File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/magnet/ize/filings.py:55\u001b[0m, in \u001b[0;36mProcessor.process\u001b[0;34m(self, path, splitter, nlp)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, path: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, splitter: \u001b[38;5;28many\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, nlp\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m---> 55\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m()\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdf \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
51+
"\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'dropna'"
6352
]
6453
}
6554
],

examples/distributed_charge/2_charge.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"from magnet.ize.filings import Processor\n",
10-
"export_data_file = \"./data/filings_mistral_nlp.parquet\""
10+
"export_data_file = \"../../data/filings_mistral_nlp.parquet\""
1111
]
1212
},
1313
{

magnet/ic/field.py

+11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from nats.errors import TimeoutError
55
from magnet.utils.data_classes import *
66
from nats.js.api import StreamConfig
7+
import xxhash
8+
9+
x = xxhash.xxh64()
710

811
class Charge:
912
"""
@@ -80,6 +83,14 @@ async def pulse(self, payload):
8083
except Exception as e:
8184
_f('fatal', f'invalid JSON\n{e}')
8285
try:
86+
_hash = x(bytes_).hexdigest()
87+
await self.js.publish(
88+
self.category
89+
, bytes_
90+
, headers={
91+
"Nats-Msg-Id": _hash
92+
}
93+
)
8394
await self.js.publish(self.category, bytes_, headers={"Nats-Msg-Id":})
8495
except Exception as e:
8596
_f('fatal', f'could not send data to {self.server}\n{e}')

magnet/utils/data_classes.py

-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ class GeneratedPayload:
2424
result (str): The result generated by the system.
2525
model (str): The model used to generate the payload.
2626
"""
27-
2827
query: str
2928
prompt: str
3029
context: list
@@ -42,7 +41,6 @@ class EmbeddingPayload:
4241
text (list): The text of the data.
4342
model (str): The model used for embedding the text data.
4443
"""
45-
4644
document: str
4745
embedding: list
4846
text: list

0 commit comments

Comments
 (0)