Skip to content

Commit d02b432

Browse files
committed
🚧 fix(wip): back to dedupe debug
1 parent fa86644 commit d02b432

File tree

3 files changed

+8
-30
lines changed

3 files changed

+8
-30
lines changed

magnet/electrode.py

+4-28
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,17 @@
22
from magnet.ize import memory
33
from magnet.ron import llm
44

5+
from magnet.utils.globals import _f
6+
57
class Electrode:
68
def __init__(self, config: dict = None):
7-
self.config = config if config else {
8-
"MILVUS_URI": "192.168.2.69"
9-
, "MILVUS_PORT": 19530
10-
, "MILVUS_USER": "root"
11-
, "MILVUS_PASSWORD": "Rrr/Yp6k#<M19rB3j1>Mi4Ta"
12-
, "NATS_URL": "192.168.2.69"
13-
, "NATS_USER": "my-user"
14-
, "NATS_PASSWORD": "T0pS3cr3t"
15-
, "NATS_CATEGORY": "non_nlp_chunks"
16-
, "NATS_STREAM": "documents"
17-
, "NATS_SESSION": "bge_large_en_v15"
18-
, "DIMENSION": 1024
19-
, "EMBEDDING_MODEL": "BAAI/bge-large-en-v1.5"
20-
, "INDEX": "bge_non_nlp"
21-
, "INDEX_PARAMS": {
22-
'metric_type': 'COSINE',
23-
'index_type':'HNSW',
24-
'params': {
25-
"efConstruction": 40
26-
, "M": 48
27-
},
28-
}
29-
, "JOB_TYPE": "index"
30-
, "JOB_N": 10
31-
, "GENERATION_MODEL": "mistralai/Mistral-7B-Instruct-v0.1"
32-
, "CREATE": True
33-
}
9+
self.config = config if config else _f('fatal', 'no config applied')
3410
async def auto(self):
3511
match self.config['JOB_TYPE']:
3612
case 'index':
3713
self.reso = field.Resonator(f"{self.config['NATS_USER']}:{self.config['NATS_PASSWORD']}@{self.config['NATS_URL']}")
3814
self.embedder = memory.Embedder(self.config, create=self.config["CREATE"])
39-
await self.reso.on(category=self.config['NATS_CATEGORY'], session=self.config['NATS_SESSION'], stream=self.config['NATS_STREAM'])
15+
await self.reso.on(category=self.config['NATS_CATEGORY'], session=self.config['NATS_SESSION'], stream=self.config['NATS_STREAM'], job=True)
4016
await self.reso.listen(cb=self.embedder.index, job_n=self.config['JOB_N'])
4117

4218

magnet/ic/field.py

+2
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ async def worker(self, cb=print):
298298
_f("warn", f'something wrong in your callback function!\n{e}')
299299
except Exception as e:
300300
_f('fatal','invalid JSON')
301+
async def conduct(self, cb=print):
302+
pass
301303

302304
async def info(self, session: str = None):
303305
"""

magnet/ize/memory.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ async def index(self, payload, msg, verbose=False, field=None, charge=False, ins
8383
await self.field.pulse(payload)
8484
await msg.ack_sync()
8585
else:
86-
_f('info', 'embedding exists already') if verbose else None
86+
_f('info', f'embedding exists already\n{payload.text}')
8787
await msg.ack_sync()
8888
except Exception as e:
8989
await msg.term()
@@ -182,4 +182,4 @@ def is_dupe(self, q):
182182
, output_fields=['text', 'document']
183183
, limit=1
184184
)
185-
return True if sum(match[0].distances) == 0.0 and len(match[0])>0 else False
185+
return True if sum(match[0].distances) >= 0.99 and len(match[0])>0 else False

0 commit comments

Comments
 (0)