Skip to content

Commit

Permalink
Rename container name and log file name
Browse files Browse the repository at this point in the history
  • Loading branch information
nailixing committed Jun 10, 2020
1 parent f41d2ca commit 6c834fd
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 15 deletions.
8 changes: 5 additions & 3 deletions singa_auto/admin/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ def __init__(self,
self._base_worker_image = '{}:{}'.format(
os.environ['SINGA_AUTO_IMAGE_WORKER'],
os.environ['SINGA_AUTO_VERSION'])
self._services_manager = ServicesManager(self._meta_store,
container_manager)
self._services_manager = ServicesManager(meta_store=self._meta_store,
container_manager=container_manager,
)

def __enter__(self):
self._meta_store.connect()
Expand Down Expand Up @@ -777,7 +778,8 @@ def get_running_inference_job(self, user_id, app, app_version=-1):
if self.container_model == 'K8S':
_ingress_port = os.environ["INGRESS_EXT_PORT"]
ingress_host = f'{predictor_service.ext_hostname}:{_ingress_port}/{app}'
predictor_host = ",".join([predictor_service.host, ingress_host]) if predictor_service is not None else None
# predictor_host = ",".join([predictor_service.host, ingress_host]) if predictor_service is not None else None
predictor_host = ingress_host if predictor_service is not None else None
elif self.container_model == 'SWARM':
predictor_host = predictor_service.host if predictor_service is not None else None
else:
Expand Down
14 changes: 10 additions & 4 deletions singa_auto/admin/services_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import os
import logging
import re
import traceback
import socket
from collections import defaultdict
Expand Down Expand Up @@ -62,10 +63,12 @@ def __init__(self,
meta_store=None,
container_manager=None,
var_autoforward=None):

if var_autoforward is None:
var_autoforward = ENVIRONMENT_VARIABLES_AUTOFORWARD
self._meta_store: MetaStore = meta_store or MetaStore()
self._container_manager: ContainerManager = container_manager
self.service_app_name = None

# Ensure that environment variable exists, failing fast
for x in var_autoforward:
Expand Down Expand Up @@ -397,7 +400,6 @@ def _create_predictor(self, inference_job, inferenceAppName: str):
inferenceAppName=inferenceAppName
)


self._meta_store.update_inference_job(inference_job,
predictor_service_id=service.id)
self._meta_store.commit()
Expand Down Expand Up @@ -463,7 +465,6 @@ def _stop_service(self, service):
self._meta_store.mark_service_as_stopped(service)
self._meta_store.commit()


def _create_service(self,
service_type,
docker_image,
Expand Down Expand Up @@ -521,8 +522,13 @@ def _create_service(self,
publish_port = (ext_port, container_port)

try:
container_service_name = 'singa-auto-svc-{}-{}'.format(
service_type.lower(), service.id)

service_app_name = re.sub('[^a-zA-Z0-9]', '-', self.service_app_name.lower())
service_app_name = re.sub('-+', '-', service_app_name)

container_service_name = '{}-{}-{}'.format(
service_app_name, service_type.lower(), service.id.split('-')[0])

container_service = self._container_manager.create_service(
service_name=container_service_name,
docker_image=docker_image,
Expand Down
2 changes: 2 additions & 0 deletions singa_auto/admin/view/inference_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def create_inference_job(auth, params):
app_version = -1

with admin:
admin._services_manager.service_app_name = params['app']
return jsonify(admin.create_inference_job(user_id=auth['user_id'],
app=params['app'],
app_version=app_version,
Expand All @@ -56,6 +57,7 @@ def create_inference_job_by_checkpoint(auth, params):
budget = params['budget'] if 'budget' in params else {}
budget = {InferenceBudgetOption.GPU_COUNT: 0, **budget}
with admin:
admin._services_manager.service_app_name = params['model_name']
return jsonify(admin.create_inference_job_by_checkpoint(user_id=auth['user_id'],
budget=budget,
model_name=params['model_name']
Expand Down
1 change: 1 addition & 0 deletions singa_auto/admin/view/train_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def create_train_job(auth, params):
feed_params['train_args'] = params['train_args']

with admin:
admin._services_manager.service_app_name = params['app']
# Ensure that datasets are owned by current user
dataset_attrs = ['train_dataset_id', 'val_dataset_id']
for attr in dataset_attrs:
Expand Down
6 changes: 3 additions & 3 deletions singa_auto/predictor/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def start(self):

def predict(self, queries):
worker_predictions_list = self._get_predictions_from_workers(queries)
print("Getting prediction list")
logger.info("Getting prediction list")
predictions = self._combine_worker_predictions(worker_predictions_list)
return predictions

Expand Down Expand Up @@ -127,7 +127,7 @@ def _get_predictions_from_workers(
worker_ids = []

while len(worker_ids) == 0:
print("Getting free worker from redis...")
logger.info("Getting free worker from redis...")
worker_ids = self._redis_cache.get_workers()
time.sleep(0.5)

Expand All @@ -154,7 +154,7 @@ def _get_predictions_from_workers(
# Record prediction & mark as not pending
query_id_to_predictions[query_id].append(prediction)
pending_queries.remove((query_id, worker_id))
print("Getting prediction result from kafka...")
logger.info("Getting prediction result from kafka...")
time.sleep(PREDICT_LOOP_SLEEP_SECS)

# Reorganize predictions
Expand Down
1 change: 1 addition & 0 deletions singa_auto/redis/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def delete_from_set(self, name, value):

def list_set(self, name):
key = self._get_redis_name(name)
logger.info('Getting the redis key: {}'.format(key))
values = self._redis.smembers(key)
return [self._decode_value(x) for x in values]

Expand Down
4 changes: 2 additions & 2 deletions singa_auto/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ def configure_logging(process_name):
level=logging.INFO,
format='%(asctime)s %(name)s %(levelname)s %(message)s',
datefmt='%d-%b-%y %H:%M:%S',
filename='{}/process_name_{}.log'.format(logs_folder_path,
process_name))
filename='{}/{}.log'.format(logs_folder_path,
process_name))
4 changes: 2 additions & 2 deletions singa_auto/utils/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def run_worker(meta_store, start_worker, stop_worker):
service_id = os.environ['SINGA_AUTO_SERVICE_ID']
service_type = os.environ['SINGA_AUTO_SERVICE_TYPE']
container_id = os.environ.get('HOSTNAME', 'localhost')
configure_logging('{}-SvcID-{}-ContainerID-{}'
.format(curr_time, service_id,container_id))
configure_logging('{}-{}'
.format(container_id, curr_time))

def _sigterm_handler(_signo, _stack_frame):
logger.warn("Terminal signal received: %s, %s" % (_signo, _stack_frame))
Expand Down
1 change: 0 additions & 1 deletion singa_auto/worker/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ def _predict(self, queries: List[Query]) -> List[Prediction]:
try:
predictions = self._model_inst.predict([x.query for x in queries])
except:
print('Error while making predictions:')
logger.error('Error while making predictions:')
logger.error(traceback.format_exc())
predictions = [None for x in range(len(queries))]
Expand Down

0 comments on commit 6c834fd

Please sign in to comment.