Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/pip/beautifulsoup4-4.12.3
Browse files Browse the repository at this point in the history
  • Loading branch information
ngupta10 authored Apr 2, 2024
2 parents 2de2709 + e894223 commit eb505f0
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 39 deletions.
19 changes: 11 additions & 8 deletions querent/core/transformers/bert_ner_opensourcellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,17 @@ async def process_tokens(self, data: IngestedTokens):
if self.sample_relationships:
embedding_triples = self.predicate_context_extractor.process_predicate_types(embedding_triples)
for triple in embedding_triples:
graph_json = json.dumps(TripleToJsonConverter.convert_graphjson(triple))
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson(triple))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
if not self.termination_event.is_set():
graph_json = json.dumps(TripleToJsonConverter.convert_graphjson(triple))
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson(triple))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
else:
return
else:
return
else:
Expand Down
19 changes: 11 additions & 8 deletions querent/core/transformers/fixed_entities_set_opensourcellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,17 @@ async def process_tokens(self, data: IngestedTokens):
if self.sample_relationships:
embedding_triples = self.predicate_context_extractor.process_predicate_types(embedding_triples)
for triple in embedding_triples:
graph_json = json.dumps(TripleToJsonConverter.convert_graphjson(triple))
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson(triple))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
if not self.termination_event.is_set():
graph_json = json.dumps(TripleToJsonConverter.convert_graphjson(triple))
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson(triple))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
else:
return
else:
return
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,17 @@ async def process_tokens(self, data: IngestedTokens):
if self.sample_relationships:
embedding_triples = self.predicate_context_extractor.process_predicate_types(embedding_triples)
for triple in embedding_triples:
graph_json = json.dumps(TripleToJsonConverter.convert_graphjson(triple))
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson(triple))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
if not self.termination_event.is_set():
graph_json = json.dumps(TripleToJsonConverter.convert_graphjson(triple))
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson(triple))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
else:
return
except Exception as e:
self.logger.error(f"Invalid {self.__class__.__name__} configuration. Unable to extract predicates using GPT. {e}")
raise Exception(f"An error occurred while extracting predicates using GPT: {e}")
Expand Down
23 changes: 13 additions & 10 deletions querent/core/transformers/gpt_llm_gpt_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,17 +243,20 @@ async def process_tokens(self, data: IngestedTokens):
final_triples = self.remove_duplicate_triplets(final_triples)
if len(final_triples) > 0:
for triple in final_triples:
graph_json = json.dumps(triple)
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
context_embeddings = self.create_emb.get_embeddings([triple['sentence']])[0]
triple['context_embeddings'] = context_embeddings
triple['context'] = triple['sentence']
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson((triple['subject'],json.dumps(triple), triple['object'])))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
if not self.termination_event.is_set():
graph_json = json.dumps(triple)
if graph_json:
current_state = EventState(EventType.Graph,1.0, graph_json, file)
await self.set_state(new_state=current_state)
context_embeddings = self.create_emb.get_embeddings([triple['sentence']])[0]
triple['context_embeddings'] = context_embeddings
triple['context'] = triple['sentence']
vector_json = json.dumps(TripleToJsonConverter.convert_vectorjson((triple['subject'],json.dumps(triple), triple['object'])))
if vector_json:
current_state = EventState(EventType.Vector,1.0, vector_json, file)
await self.set_state(new_state=current_state)
else:
return

except Exception as e:
self.logger.debug(f"Invalid {self.__class__.__name__} configuration. Unable to extract predicates using GPT NER LLM class. {e}")
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ redis==5.0.3
regex==2023.5.5
sentence-transformers==2.2.2
spacy==3.7.2
uvicorn==0.22.0
uvicorn==0.29.0
slack-sdk==3.26.1
pylint==2.17.4
pytest-cov==4.1.0
pytest-mock==3.11.1
tensorflow==2.14.0
transformers==4.36.0
torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu
pymupdf==1.23.26
pymupdf==1.24.0
asyncio==3.4.3
prometheus-client==0.17.1
rdflib==7.0.0
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"regex==2023.5.5",
"sentence-transformers==2.2.2",
"spacy==3.7.2",
"uvicorn==0.22.0",
"uvicorn==0.29.0",
"slack-sdk==3.26.1",
"pylint==2.17.4",
"pytest-cov==4.1.0",
Expand All @@ -50,7 +50,7 @@
"pytest-asyncio==0.23.2",
"pyshacl==0.25.0",
"google-cloud-storage==2.14.0",
"PyMuPDF==1.23.26",
"PyMuPDF==1.24.0",
"pydub==0.25.1",
"SpeechRecognition==3.10.1",
"pytesseract==0.3.10",
Expand Down
2 changes: 1 addition & 1 deletion tests/workflows/test_multiple_collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ async def test_multiple_collectors_all_async():
else:
unique_files.add(ingested_data.file)
counter += 1
assert counter == 86
assert counter == 85
assert len(unique_files) > 1
assert messages > 0

Expand Down

0 comments on commit eb505f0

Please sign in to comment.