-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.js
198 lines (183 loc) · 7.26 KB
/
script.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import { render, html } from "https://cdn.jsdelivr.net/npm/lit-html@3/+esm";
import { unsafeHTML } from "https://cdn.jsdelivr.net/npm/lit-html@3/directives/unsafe-html.js";
import { Marked } from "https://cdn.jsdelivr.net/npm/marked@13/+esm";
import { parse } from "https://cdn.jsdelivr.net/npm/[email protected]/+esm";
import { asyncLLM } from "https://cdn.jsdelivr.net/npm/asyncllm@2";
import { schemeCategory10 } from "https://cdn.jsdelivr.net/npm/d3-scale-chromatic@3/+esm";
const $documentForm = document.querySelector("#document-form");
const $documentFile = document.querySelector("#document-file");
const $document = document.querySelector("#document");
const $entities = document.querySelector("#entities");
const $filters = document.querySelector("#filters");
const $matches = document.querySelector("#matches");
const marked = new Marked();
const loading = html`<div class="text-center"><div class="spinner-border my-5" role="status"></div></div>`;
let app = "entityextraction";
// Extracted entities as { type: [entity1, entity2, ...] }
let args;
// Latest selected entity
let latestEntity;
// Store entity type colors as {type: color}
let entityColors;
// Store selected entities as {entity: type}
const selectedEntities = new Map();
// Get LLM Foundry token from server
const { token } = await fetch("https://llmfoundry.straive.com/token", { credentials: "include" }).then((res) =>
res.json()
);
const url = "https://llmfoundry.straive.com/login?" + new URLSearchParams({ next: location.href });
render(
token
? html`<button type="submit" class="btn btn-primary mt-3">Analyze</button>`
: html`<a class="btn btn-primary" href="${url}">Log in to try analyze your own documents</a>`,
document.querySelector("#analyze")
);
// Create colors map on demand when entities are processed
function getEntityColors(entityTypes) {
return Object.fromEntries(entityTypes.map((type, i) => [type, schemeCategory10[i % schemeCategory10.length]]));
}
// When "Analyze" button is clicked, call the LLM with the document to extract entities
$documentForm.addEventListener("submit", async (event) => {
event.preventDefault();
const document = $document.value;
const entities = $entities.value
.split("\n")
.map((d) => d.trim())
.filter(Boolean);
// Generate { entityType1: [entity1, entity2, ...], entityType2: [entity1, entity2, ...], ... }
const schema = {
type: "object",
properties: Object.fromEntries(entities.map((entity) => [entity, { type: ["array"], items: { type: "string" } }])),
required: entities,
additionalProperties: false,
};
const tools = [{ type: "function", function: { name: "entities", parameters: schema } }];
const body = {
model: "gpt-4o-mini",
messages: [
{
role: "system",
content: `Call entities({...}) extracting all entities from the document VERBATIM.
The parameters look like: {type1: [entity 1, entity 2, ...], type2: [entity 1, entity 2, ...], ...}
Types must only be:
${entities.join("\n")}
`,
},
{ role: "user", content: [{ type: "text", text: document }] },
],
stream: true,
stream_options: { include_usage: true },
tool_choice: { type: "function", function: { name: "entities" } },
temperature: 0,
tools,
};
render(unsafeHTML(marked.parse(document)), $matches);
render(loading, $filters);
selectedEntities.clear();
for await (const { tools } of asyncLLM("https://llmfoundry.straive.com/openai/v1/chat/completions", {
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${token}:${app}` },
body: JSON.stringify(body),
})) {
if (!tools.length || !tools[0].args) continue;
args = parse(tools[0].args);
renderFilters();
}
});
function renderFilters() {
entityColors = getEntityColors(Object.keys(args));
render(
html`
<div class="accordion" id="entityAccordion">
${Object.entries(args).map(
([type, entities]) => html`
<div class="accordion-item">
<h2 class="accordion-header">
<button
class="accordion-button collapsed fw-bold"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapse-${type.replace(/\s+/g, "-")}"
>
<i class="bi bi-circle-fill me-2" style="color: ${entityColors[type]}"></i> ${type}
(${entities.length})
</button>
</h2>
<div
id="collapse-${type.replace(/\s+/g, "-")}"
class="accordion-collapse collapse"
data-bs-parent="#entityAccordion"
>
<div class="accordion-body p-0">
${entities.map(
(entity) => html`
<button
class="btn btn-link text-start text-decoration-none w-100 py-1 px-3"
@click=${() => toggleEntity(entity, type)}
>
<i
class="bi bi-circle-fill me-2"
style="color: ${selectedEntities.has(entity) ? entityColors[type] : "transparent"}"
></i>
${entity}
</button>
`
)}
</div>
</div>
</div>
`
)}
</div>
`,
$filters
);
}
function updateHighlights() {
let content = $document.value;
// Sort entities by length (longest first) to handle overlapping matches correctly
const sortedEntities = Array.from(selectedEntities.entries()).sort(([a], [b]) => b.length - a.length);
for (const [entity, type] of sortedEntities) {
let escapedEntity = entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&").replace(/\s+/g, "\\s+");
let regex = new RegExp(`(${escapedEntity})`, "g");
// If the entity is not found, try a more permissive regex
if (!content.match(regex)) {
escapedEntity = entity
// Replace ALL punctuation with dots. Any punctuation can match any other punctuation
.replace(/[^\w\s]+/g, "[^\\w\\s]*?")
// Allow spaces to match any whitespace
.replace(/\s+/g, "\\s+");
// Match case-insensitive
regex = new RegExp(`(${escapedEntity})`, "gi");
}
content = content.replace(
regex,
`<span class="entity-highlight" style="background-color: ${entityColors[type]}50; border-bottom: 2px solid ${entityColors[type]}">$1</span>`
);
}
render(unsafeHTML(marked.parse(content)), $matches);
// Scroll to the first match if there are any selections
if (selectedEntities.size > 0 && latestEntity) {
setTimeout(() => {
const firstMatch = $matches.querySelector(".entity-highlight");
if (firstMatch) firstMatch.scrollIntoViewIfNeeded();
}, 100);
}
}
// Replace highlightAndScrollToEntity with these functions
function toggleEntity(entity, type) {
if (selectedEntities.has(entity)) {
selectedEntities.delete(entity);
latestEntity = null;
} else {
selectedEntities.set(entity, type);
latestEntity = entity;
}
updateHighlights();
renderFilters();
}
$documentFile.addEventListener("change", async () => {
const doc = $documentFile.value;
if (!doc) return;
$document.value = await fetch(doc).then((r) => r.text());
});