8
8
record ,
9
9
tokenization ,
10
10
notification ,
11
+ organization ,
11
12
)
12
13
import torch
13
14
import traceback
24
25
from util import daemon , request_util
25
26
from util .decorator import param_throttle
26
27
from util .embedders import get_embedder
27
- from util .notification import send_project_update
28
+ from util .notification import send_project_update , embedding_warning_templates
28
29
import os
29
30
import pandas as pd
30
- from submodules .model .business_objects import embedding , general , organization
31
31
from submodules .s3 import controller as s3
32
32
33
33
logging .basicConfig (level = logging .INFO )
@@ -133,7 +133,7 @@ def prepare_run_encoding(request: data_type.Request, embedding_type: str) -> int
133
133
request .project_id ,
134
134
request .user_id ,
135
135
message ,
136
- " ERROR" ,
136
+ enums . Notification . ERROR . value ,
137
137
enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
138
138
True ,
139
139
)
@@ -142,11 +142,6 @@ def prepare_run_encoding(request: data_type.Request, embedding_type: str) -> int
142
142
f"notification_created:{ request .user_id } " ,
143
143
True ,
144
144
)
145
- embedding .update_embedding_state_failed (
146
- request .project_id ,
147
- embedding_id ,
148
- with_commit = True ,
149
- )
150
145
doc_ock .post_embedding_failed (
151
146
request .user_id , request .config_string
152
147
)
@@ -175,7 +170,7 @@ def run_encoding(
175
170
request .project_id ,
176
171
request .user_id ,
177
172
f"Initializing model { request .config_string } . This can take a few minutes." ,
178
- " INFO" ,
173
+ enums . Notification . INFO . value ,
179
174
enums .NotificationType .EMBEDDING_CREATION_STARTED .value ,
180
175
True ,
181
176
)
@@ -184,7 +179,9 @@ def run_encoding(
184
179
)
185
180
iso2_code = project .get_blank_tokenizer_from_project (request .project_id )
186
181
try :
187
- embedder = get_embedder (request .project_id , embedding_type , request .config_string , iso2_code )
182
+ embedder = get_embedder (
183
+ request .project_id , embedding_type , request .config_string , iso2_code
184
+ )
188
185
except OSError :
189
186
embedding .update_embedding_state_failed (
190
187
request .project_id ,
@@ -201,7 +198,7 @@ def run_encoding(
201
198
request .project_id ,
202
199
request .user_id ,
203
200
message ,
204
- " ERROR" ,
201
+ enums . Notification . ERROR . value ,
205
202
enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
206
203
True ,
207
204
)
@@ -226,7 +223,7 @@ def run_encoding(
226
223
request .project_id ,
227
224
request .user_id ,
228
225
message ,
229
- " ERROR" ,
226
+ enums . Notification . ERROR . value ,
230
227
enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
231
228
True ,
232
229
)
@@ -240,7 +237,7 @@ def run_encoding(
240
237
request .project_id ,
241
238
request .user_id ,
242
239
f"Could not load model { request .config_string } . Please contact the support." ,
243
- " ERROR" ,
240
+ enums . Notification . ERROR . value ,
244
241
enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
245
242
True ,
246
243
)
@@ -283,7 +280,7 @@ def run_encoding(
283
280
request .project_id ,
284
281
request .user_id ,
285
282
f"Started encoding { attribute_name } using model { request .config_string } ." ,
286
- " INFO" ,
283
+ enums . Notification . INFO . value ,
287
284
enums .NotificationType .EMBEDDING_CREATION_STARTED .value ,
288
285
True ,
289
286
)
@@ -324,6 +321,39 @@ def run_encoding(
324
321
initial_count ,
325
322
)
326
323
except Exception :
324
+ for warning_type , idx_list in embedder .get_warnings ().items ():
325
+ # use last record with warning as example
326
+ example_record_id = record_ids [idx_list [- 1 ]]
327
+
328
+ primary_keys = [
329
+ pk .name for pk in attribute .get_primary_keys (request .project_id )
330
+ ]
331
+ if primary_keys :
332
+ example_record_data = record .get (
333
+ request .project_id , example_record_id
334
+ ).data
335
+ example_record_msg = "with primary key: " + ", " .join (
336
+ [str (example_record_data [p_key ]) for p_key in primary_keys ]
337
+ )
338
+ else :
339
+ example_record_msg = " with record id: " + str (example_record_id )
340
+
341
+ warning_msg = embedding_warning_templates [warning_type ].format (
342
+ record_number = len (idx_list ), example_record_msg = example_record_msg
343
+ )
344
+
345
+ notification .create (
346
+ request .project_id ,
347
+ request .user_id ,
348
+ warning_msg ,
349
+ enums .Notification .WARNING .value ,
350
+ enums .NotificationType .EMBEDDING_CREATION_WARNING .value ,
351
+ True ,
352
+ )
353
+ send_project_update (
354
+ request .project_id , f"notification_created:{ request .user_id } " , True
355
+ )
356
+
327
357
embedding .update_embedding_state_failed (
328
358
request .project_id ,
329
359
embedding_id ,
@@ -337,27 +367,51 @@ def run_encoding(
337
367
request .project_id ,
338
368
request .user_id ,
339
369
"Error at runtime. Please contact support." ,
340
- " ERROR" ,
370
+ enums . Notification . ERROR . value ,
341
371
enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
342
372
True ,
343
373
)
344
374
send_project_update (
345
375
request .project_id , f"notification_created:{ request .user_id } " , True
346
376
)
347
377
print (traceback .format_exc (), flush = True )
348
- embedding .update_embedding_state_failed (
349
- request .project_id ,
350
- embedding_id ,
351
- with_commit = True ,
352
- )
353
- send_project_update (
354
- request .project_id ,
355
- f"embedding:{ embedding_id } :state:{ enums .EmbeddingState .FAILED .value } " ,
356
- )
357
378
doc_ock .post_embedding_failed (request .user_id , request .config_string )
358
379
return 500
359
380
360
381
if embedding .get (request .project_id , embedding_id ):
382
+ for warning_type , idx_list in embedder .get_warnings ().items ():
383
+ # use last record with warning as example
384
+ example_record_id = record_ids [idx_list [- 1 ]]
385
+
386
+ primary_keys = [
387
+ pk .name for pk in attribute .get_primary_keys (request .project_id )
388
+ ]
389
+ if primary_keys :
390
+ example_record_data = record .get (
391
+ request .project_id , example_record_id
392
+ ).data
393
+ example_record_msg = "with primary key: " + ", " .join (
394
+ [str (example_record_data [p_key ]) for p_key in primary_keys ]
395
+ )
396
+ else :
397
+ example_record_msg = " with record id: " + str (example_record_id )
398
+
399
+ warning_msg = embedding_warning_templates [warning_type ].format (
400
+ record_number = len (idx_list ), example_record_msg = example_record_msg
401
+ )
402
+
403
+ notification .create (
404
+ request .project_id ,
405
+ request .user_id ,
406
+ warning_msg ,
407
+ enums .Notification .WARNING .value ,
408
+ enums .NotificationType .EMBEDDING_CREATION_WARNING .value ,
409
+ True ,
410
+ )
411
+ send_project_update (
412
+ request .project_id , f"notification_created:{ request .user_id } " , True
413
+ )
414
+
361
415
if embedding_type == "classification" :
362
416
request_util .post_embedding_to_neural_search (
363
417
request .project_id , embedding_id
@@ -376,7 +430,7 @@ def run_encoding(
376
430
request .project_id ,
377
431
request .user_id ,
378
432
f"Finished encoding { attribute_name } using model { request .config_string } ." ,
379
- " SUCCESS" ,
433
+ enums . Notification . SUCCESS . value ,
380
434
enums .NotificationType .EMBEDDING_CREATION_DONE .value ,
381
435
True ,
382
436
)
0 commit comments