Skip to content

Commit

Permalink
Merge pull request #907 from WolframResearch/bugfix/454044
Browse files Browse the repository at this point in the history
Bugfix: Avoid an error message that occurs when giving whitespace strings to `SentenceBERTEmbedding`
  • Loading branch information
rhennigan authored Nov 12, 2024
2 parents 512070d + 1980a25 commit ce2dd59
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 7 deletions.
34 changes: 28 additions & 6 deletions Developer/VectorDatabases/VectorDatabaseBuilder.wl
Original file line number Diff line number Diff line change
Expand Up @@ -536,14 +536,36 @@ createEmbeddings[ string_String ] :=
(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*sentenceBERTEmbedding*)
sentenceBERTEmbedding // beginDefinition;
sentenceBERTEmbedding := getSentenceBERTEmbeddingFunction[ ];

sentenceBERTEmbedding[ args___ ] := (
Needs[ "SemanticSearch`" -> None ];
SemanticSearch`SemanticSearch`Private`SentenceBERTEmbedding @ args
);
(* ::**************************************************************************************************************:: *)
(* ::Subsubsubsection::Closed:: *)
(*getSentenceBERTEmbeddingFunction*)
getSentenceBERTEmbeddingFunction // beginDefinition;

getSentenceBERTEmbeddingFunction[ ] := Enclose[
Module[ { name },

Needs[ "SemanticSearch`" -> None ];

name = ConfirmBy[
SelectFirst[
{
"SemanticSearch`SentenceBERTEmbedding",
"SemanticSearch`SemanticSearch`Private`SentenceBERTEmbedding"
},
NameQ @ # && ToExpression[ #, InputForm, System`Private`HasAnyEvaluationsQ ] &
],
StringQ,
"SymbolName"
];

getSentenceBERTEmbeddingFunction[ ] = Symbol @ name
],
throwInternalFailure
];

sentenceBERTEmbedding // endDefinition;
getSentenceBERTEmbeddingFunction // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
Expand Down
5 changes: 4 additions & 1 deletion Source/Chatbook/PromptGenerators/VectorDatabases.wl
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,10 @@ getAndCacheEmbeddings[ { } ] :=
getAndCacheEmbeddings[ strings: { __String } ] /; $embeddingModel === "SentenceBERT" := Enclose[
Module[ { vectors },
vectors = ConfirmBy[
Developer`ToPackedArray @ sentenceBERTEmbedding @ strings,
If[ AllTrue[ strings, StringMatchQ[ WhitespaceCharacter... ] ],
Developer`ToPackedArray @ Rest @ sentenceBERTEmbedding @ Prepend[ strings, "hello" ],
Developer`ToPackedArray @ sentenceBERTEmbedding @ strings
],
Developer`PackedArrayQ,
"PackedArray"
];
Expand Down

0 comments on commit ce2dd59

Please sign in to comment.