From 426ad36375c57bd86b7144df8f8e978ec7cfc7bc Mon Sep 17 00:00:00 2001 From: Peter Schroedl Date: Fri, 2 Aug 2024 16:56:52 +0000 Subject: [PATCH] remove daulet tokenizer, WIP debug --- server/ai_http.go | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/server/ai_http.go b/server/ai_http.go index 3207378838..ac59de363d 100644 --- a/server/ai_http.go +++ b/server/ai_http.go @@ -9,7 +9,6 @@ import ( "strconv" "time" - "github.com/daulet/tokenizers" "github.com/getkin/kin-openapi/openapi3filter" "github.com/livepeer/ai-worker/worker" "github.com/livepeer/go-livepeer/clog" @@ -298,49 +297,8 @@ func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request return orch.TextToSpeech(ctx, v) } - tokenizerJSON := `{ - "architectures": [ - "BertForMaskedLM" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "transformers_version": "4.6.0.dev0", - "type_vocab_size": 2, - "use_cache": true, - "vocab_size": 30522 - }` - - // Convert the JSON string to a byte slice - tokenizerBytes := []byte(tokenizerJSON) - - // Config could also be loaded FromFile - tk, err := tokenizers.FromBytes(tokenizerBytes) - if err != nil { - respondWithError(w, "Bad tokenizer json", http.StatusBadRequest) - return - } - prompt := v.TextInput - e, enc := tk.Encode(*prompt, false) - if e != nil { - respondWithError(w, "Bad Tokenizing", http.StatusBadRequest) - return - } - tok := len(enc) - outPixels = int64(tok * 1000) - // release native resources - defer tk.Close() + outPixels *= 1000 + default: respondWithError(w, "Unknown request type", http.StatusBadRequest) return