Skip to content

Commit 09c60e4

Browse files
FelixKirschJWittmeyer
and
JWittmeyer
authored
integrates model-provider service (#9)
* integrates model-provider service * adds project_id to model-provider request * removes project_id and revision from model-provider api calls * try auto select model dir * adds error message if model is deleted during creation Co-authored-by: JWittmeyer <[email protected]>
1 parent 36c2785 commit 09c60e4

File tree

5 files changed

+150
-1
lines changed

5 files changed

+150
-1
lines changed

check_config_service

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
CONFIG_EXISTS=0
3+
if [ -n "$(docker ps -q -f name=dev-setup_refinery-config_1)" ]
4+
then
5+
if [ "$( docker container inspect -f '{{.State.Status}}' dev-setup_refinery-config_1 )" == "running" ];
6+
then
7+
CONFIG_EXISTS=1
8+
fi
9+
elif [ -n "$(docker ps -q -f name=dev-setup-refinery-config-1)" ]
10+
then
11+
if [ "$( docker container inspect -f '{{.State.Status}}' dev-setup-refinery-config-1 )" == "running" ];
12+
then
13+
CONFIG_EXISTS=1
14+
fi
15+
else
16+
if [ -n "$(docker ps -q -f name=refinery-config)" ];
17+
then
18+
if [ "$( docker container inspect -f '{{.State.Status}}' refinery-config )" == "running" ];
19+
then
20+
CONFIG_EXISTS=1
21+
fi
22+
fi
23+
fi
24+
if [ $CONFIG_EXISTS -eq 0 ]
25+
then
26+
echo "refinery-config couldn't be found - exit"
27+
exit 1
28+
else
29+
echo "refinery-config found -> proceeding"
30+
fi
31+

controller.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from typing import Any, Dict, Iterator, List
2424

2525
from util import daemon, request_util
26+
from util.config_handler import get_config_value
2627
from util.decorator import param_throttle
2728
from util.embedders import get_embedder
2829
from util.notification import send_project_update, embedding_warning_templates
@@ -179,8 +180,15 @@ def run_encoding(
179180
)
180181
iso2_code = project.get_blank_tokenizer_from_project(request.project_id)
181182
try:
183+
if not __is_embedders_internal_model(
184+
request.config_string
185+
) and get_config_value("is_managed"):
186+
config_string = request_util.get_model_path(request.config_string)
187+
else:
188+
config_string = request.config_string
189+
182190
embedder = get_embedder(
183-
request.project_id, embedding_type, request.config_string, iso2_code
191+
request.project_id, embedding_type, config_string, iso2_code
184192
)
185193
except OSError:
186194
embedding.update_embedding_state_failed(
@@ -206,6 +214,30 @@ def run_encoding(
206214
request.project_id, f"notification_created:{request.user_id}", True
207215
)
208216
return 422
217+
except ValueError:
218+
embedding.update_embedding_state_failed(
219+
request.project_id,
220+
embedding_id,
221+
with_commit=True,
222+
)
223+
send_project_update(
224+
request.project_id,
225+
f"embedding:{embedding_id}:state:{enums.EmbeddingState.FAILED.value}",
226+
)
227+
doc_ock.post_embedding_failed(request.user_id, request.config_string)
228+
message = f"Model {request.config_string} was deleted during the creation process."
229+
notification.create(
230+
request.project_id,
231+
request.user_id,
232+
message,
233+
enums.Notification.ERROR.value,
234+
enums.NotificationType.EMBEDDING_CREATION_FAILED.value,
235+
True,
236+
)
237+
send_project_update(
238+
request.project_id, f"notification_created:{request.user_id}", True
239+
)
240+
return 422
209241

210242
if not embedder:
211243
embedding.update_embedding_state_failed(
@@ -503,3 +535,7 @@ def upload_embedding_as_file(
503535
sql_df.to_csv(file_name, mode="a", index=False)
504536
s3.upload_object(org_id, s3_file_name, file_name)
505537
os.remove(file_name)
538+
539+
540+
def __is_embedders_internal_model(model_name: str):
541+
return model_name in ["bag-of-characters", "bag-of-words", "tf-idf"]

start

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/bin/bash
22
trap "echo -ne '\nstopping container...' && docker stop refinery-embedder > /dev/null 2>&1 && echo -ne '\t\t [done]\n'" EXIT
33

4+
source check_config_service
5+
46
HOST_IP=$(docker network inspect bridge --format='{{json .IPAM.Config}}' | grep -o '[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}' | tail -1)
57

68
echo -ne 'stopping old container...'
@@ -11,6 +13,20 @@ echo -ne 'building container...'
1113
docker build -t refinery-embedder-dev -f dev.Dockerfile .
1214
echo -ne '\t\t [done]\n'
1315

16+
MODEL_DIR=${PWD%/*}/dev-setup/model-data/
17+
if [ ! -d "$MODEL_DIR" ]
18+
then
19+
MODEL_DIR=${PWD%/*/*}/dev-setup/model-data/
20+
if [ ! -d "$MODEL_DIR" ]
21+
then
22+
# to include volume for local development, use the dev-setup model data folder:
23+
# alternative use manual logic with
24+
# -v /path/to/dev-setup/model-data:/models \
25+
echo "Can't find model data directory: $MODEL_DIR -> stopping"
26+
exit 1
27+
fi
28+
fi
29+
1430
echo -ne 'starting...'
1531
docker run -d --rm \
1632
--name refinery-embedder \
@@ -20,12 +36,15 @@ docker run -d --rm \
2036
-e S3_SECRET_KEY=r6ywtR33!DMlaL*SUUdy \
2137
-e POSTGRES=postgresql://postgres:onetask@graphql-postgres:5432 \
2238
-e DOC_OCK=http://refinery-doc-ock:80 \
39+
-e MODEL_PROVIDER=http://refinery-model-provider:80 \
2340
-e WS_NOTIFY_ENDPOINT="http://refinery-websocket:8080" \
2441
-e NEURAL_SEARCH=http://refinery-neural-search:80 \
2542
--mount type=bind,source="$(pwd)"/,target=/app \
2643
-v /var/run/docker.sock:/var/run/docker.sock \
44+
-v "$MODEL_DIR":/models \
2745
--network dev-setup_default \
2846
refinery-embedder-dev > /dev/null 2>&1
2947
echo -ne '\t\t\t [done]\n'
3048

49+
3150
docker logs -f refinery-embedder

util/config_handler.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from typing import Dict, Any, Optional, Union
2+
import requests
3+
import json
4+
import time
5+
from util import daemon
6+
7+
__config = None
8+
9+
# meant as a const value since env variables will be removed at some point
10+
REQUEST_URL = "http://refinery-config:80/full_config"
11+
12+
13+
def __get_config() -> Dict[str, Any]:
14+
global __config
15+
if __config:
16+
return __config
17+
refresh_config()
18+
return __config
19+
20+
21+
def refresh_config():
22+
response = requests.get(REQUEST_URL)
23+
if response.status_code == 200:
24+
global __config
25+
__config = json.loads(json.loads(response.text))
26+
daemon.run(invalidate_after, 3600) # one hour
27+
else:
28+
raise Exception(
29+
f"Config service cant be reached -- response.code{response.status_code}"
30+
)
31+
32+
33+
def get_config_value(
34+
key: str, subkey: Optional[str] = None
35+
) -> Union[str, Dict[str, str]]:
36+
config = __get_config()
37+
if key not in config:
38+
raise Exception(f"Key {key} coudn't be found in config")
39+
value = config[key]
40+
41+
if not subkey:
42+
return value
43+
44+
if isinstance(value, dict) and subkey in value:
45+
return value[subkey]
46+
else:
47+
raise Exception(f"Subkey {subkey} coudn't be found in config[{key}]")
48+
49+
50+
def invalidate_after(sec: int) -> None:
51+
time.sleep(sec)
52+
global __config
53+
__config = None

util/request_util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import requests
33

44
NEURAL_SEARCH_BASE_URI = os.getenv("NEURAL_SEARCH")
5+
MODEL_PROVIDER_BASE_URI = os.getenv("MODEL_PROVIDER")
56

67

78
def post_embedding_to_neural_search(project_id: str, embedding_id: str) -> None:
@@ -17,3 +18,12 @@ def delete_embedding_from_neural_search(embedding_id: str) -> None:
1718
url = f"{NEURAL_SEARCH_BASE_URI}/delete_collection"
1819
params = {"embedding_id": embedding_id}
1920
requests.put(url, params=params)
21+
22+
23+
def get_model_path(model_name: str) -> str:
24+
url = f"{MODEL_PROVIDER_BASE_URI}/model_path"
25+
params = {
26+
"model_name": model_name,
27+
}
28+
response = requests.get(url, params=params)
29+
return response.json()

0 commit comments

Comments
 (0)