diff --git a/assets/Architecture.PNG b/assets/Architecture.PNG deleted file mode 100644 index 9cd6ec2..0000000 Binary files a/assets/Architecture.PNG and /dev/null differ diff --git a/assets/folder_structure.PNG b/assets/folder_structure.PNG deleted file mode 100644 index 08e301e..0000000 Binary files a/assets/folder_structure.PNG and /dev/null differ diff --git a/cfn/params.json b/cfn/params.json index 1806e4e..ed6e3f7 100644 --- a/cfn/params.json +++ b/cfn/params.json @@ -1,22 +1,22 @@ [ { "ParameterKey": "GitHubRepo", - "ParameterValue": "codepipeline-ecr-build-sf-execution" + "ParameterValue": "enter cloned repo name here for codepipeline-ecr-build-sf-execution" }, { "ParameterKey": "GitHubBranch", - "ParameterValue": "master" - }, - { - "ParameterKey": "GitHubToken", - "ParameterValue": "" - }, - { - "ParameterKey": "GitHubUser", - "ParameterValue": "" - }, - { - "ParameterKey": "MlOpsStepFunctionArn", - "ParameterValue": "" - } -] \ No newline at end of file + "ParameterValue": "enter branch - main or xxx" + }, + { + "ParameterKey": "GitHubToken", + "ParameterValue": "enter github token here" + }, + { + "ParameterKey": "GitHubUser", + "ParameterValue": "enter github user here" + }, + { + "ParameterKey": "MlOpsStepFunctionArn", + "ParameterValue": "enter MLOpsStep Functions Arn here" + } +] diff --git a/container/Dockerfile b/container/Dockerfile index da16d59..80e34b1 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -3,24 +3,44 @@ # for serving inferences in a stable way. FROM public.ecr.aws/lts/ubuntu:20.04 +RUN apt update +RUN apt install -y software-properties-common +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt install -y python3.7 +# install pip +RUN apt install -y python3-pip -RUN apt-get -y update && apt-get install -y --no-install-recommends \ - wget \ - python \ - nginx \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get install -y --no-install-recommends nginx ca-certificates +RUN rm -rf /var/lib/apt/lists/* -# Here we get all python packages. -# There's substantial overlap between scipy and numpy that we eliminate by -# linking them together. Likewise, pip leaves the install caches populated which uses -# a significant amount of space. These optimizations save a fair amount of space in the -# image, which reduces start up time. -RUN wget https://bootstrap.pypa.io/pip/2.7/get-pip.py && python get-pip.py && \ - pip install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.20.2 pandas flask gevent gunicorn && \ - (cd /usr/local/lib/python2.7/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) && \ - rm -rf /root/.cache +RUN python3.7 -m pip install langid +RUN python3.7 -m pip install langdetect +RUN python3.7 -m pip install emoji +RUN python3.7 -m pip install nltk==3.2.5 +RUN python3.7 -m pip install pandas==1.1.5 +RUN python3.7 -m pip install numpy==1.19.5 +RUN python3.7 -m pip install torch +RUN python3.7 -m pip install transformers +RUN python3.7 -m pip install flask +#previos versions for package gevent and gunicorn +#RUN python3.7 -m pip install gevent==1.5.0 +#RUN python3.7 -m pip install gunicorn + +#RUN python3.7 -m pip install gevent==1.3.4 +#RUN python3.7 -m pip install gunicorn==19.9.0 + + +#RUN python3.7 -m pip install gevent==20.6.2 +#RUN python3.7 -m pip install gunicorn==20.0.4 + +RUN python3.7 -m pip install gevent==20.9.0 +RUN python3.7 -m pip install gunicorn==20.0.4 + +RUN python3.7 -m pip install sklearn +RUN python3.7 -m pip install gensim +RUN python3.7 -m pip install xlrd +RUN python3.7 -m pip install openpyxl # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE @@ -37,4 +57,3 @@ WORKDIR /opt/program RUN chmod +x /opt/program/train RUN chmod +x /opt/program/serve - diff --git a/container/decision_trees/predictor.py b/container/decision_trees/predictor.py index 3a2fbc1..c536657 100644 --- a/container/decision_trees/predictor.py +++ b/container/decision_trees/predictor.py @@ -6,14 +6,16 @@ import os import json import pickle -import StringIO import sys import signal import traceback - import flask +import io + +import numpy as np import pandas as pd +import torch prefix = '/opt/ml/' model_path = os.path.join(prefix, 'model') @@ -27,21 +29,65 @@ class ScoringService(object): @classmethod def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" + print('test get model -- test+1') if cls.model == None: - with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'r') as inp: - cls.model = pickle.load(inp) + #with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'r') as inp: + # with open(os.path.join(model_path, 'best-LSTM-model-parameters.pkl'), 'rb') as inp: + # cls.model = pickle.load(inp) + #the_model = torch.load(os.path.join(model_path, 'best-LSTM-model-parameters.pt')) + #return cls.model + + cls.model = torch.jit.load(os.path.join(model_path, 'best-LSTM-model-parameters.pth')) + return cls.model @classmethod def predict(cls, input): + print('test predict') """For the input, do the predictions and return them. Args: input (a pandas dataframe): The data on which to do the predictions. There will be one prediction per row in the dataframe""" - clf = cls.get_model() - return clf.predict(input) - + + try: + clf = cls.get_model() + except: + print('did not get the model') + + print('@@@@@@@@@@@@@@@@@@@') + print(type(input)) + print(input) + print(input[0]) + print(input[1]) + print('@@@@@@@@@@@@@@@@@@@') + + + test_input=[[2779, 4496, 1744, 3480, 674, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24]] + + #return clf.predict(input) + + + try: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + x=torch.from_numpy(np.array(input)).to(device) + except Exception as error: + print('Caught this error: ' + repr(error)) + print('error when generating input') + + try: + with torch.no_grad(): + result = clf(x) + except Exception as error: + print('Caught this error: ' + repr(error)) + print('error when returing results for prediction') + + return result + # The flask app for serving predictions app = flask.Flask(__name__) @@ -49,6 +95,7 @@ def predict(cls, input): def ping(): """Determine if the container is working and healthy. In this sample container, we declare it healthy if we can load the model successfully.""" + print('testing ping()') health = ScoringService.get_model() is not None # You can insert a health check here status = 200 if health else 404 @@ -61,23 +108,44 @@ def transformation(): just means one prediction per line, since there's a single column. """ data = None - + print('testing transformation') # Convert from CSV to pandas if flask.request.content_type == 'text/csv': data = flask.request.data.decode('utf-8') - s = StringIO.StringIO(data) + s = io.StringIO(data) data = pd.read_csv(s, header=None) else: return flask.Response(response='This predictor only supports CSV data', status=415, mimetype='text/plain') + + print('#############') + print('aaa') + print('#############1') + print(data) + print('#############2') + print(data[0]) + print('#############3') + print(data.head() ) + print(type(data)) + print('############4') + print('start') + print(len(data[0])) + for x in range(len(data[0])): + print(x) + + print('aaa') print('Invoked with {} records'.format(data.shape[0])) # Do the prediction predictions = ScoringService.predict(data) - + + print('*******') + print(predictions) + print('*******') + # Convert from numpy back to CSV - out = StringIO.StringIO() - pd.DataFrame({'results':predictions}).to_csv(out, header=False, index=False) + out = io.StringIO() + pd.DataFrame({'results':predictions[0]}).to_csv(out, header=False, index=False) result = out.getvalue() return flask.Response(response=result, status=200, mimetype='text/csv') diff --git a/container/decision_trees/serve b/container/decision_trees/serve index 6747fbd..796dfc7 100644 --- a/container/decision_trees/serve +++ b/container/decision_trees/serve @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/python3.7 +#######!/usr/bin/env python # This file implements the scoring service shell. You don't necessarily need to modify it for various # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until diff --git a/container/decision_trees/train b/container/decision_trees/train index f6a216b..f2a3b79 100644 --- a/container/decision_trees/train +++ b/container/decision_trees/train @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/python3.7 +########!/usr/bin/env python # A sample training component that trains a simple scikit-learn decision tree model. # This implementation works in File mode and makes no assumptions about the input file names. @@ -11,10 +12,23 @@ import json import pickle import sys import traceback +import xlrd +import openpyxl +print('testing message1') +import re +import nltk +import langid +import emoji import pandas as pd +from langdetect import detect +from nltk.tokenize import word_tokenize, sent_tokenize, TweetTokenizer +from nltk.stem import WordNetLemmatizer +from nltk.corpus import wordnet +from nltk.corpus import brown +from nltk.corpus import stopwords + -from sklearn import tree # These are the paths to where SageMaker mounts interesting things in your container. @@ -30,6 +44,18 @@ param_path = os.path.join(prefix, 'input/config/hyperparameters.json') channel_name='training' training_path = os.path.join(input_path, channel_name) +print(prefix) +print(input_path) +print(output_path) +print(model_path) +print(param_path) + + +print('*****') +print(training_path) +print('*****') +print('111') + # The function to execute the training. def train(): print('Starting the training.') @@ -37,7 +63,8 @@ def train(): # Read in any hyperparameters that the user passed with the training job with open(param_path, 'r') as tc: trainingParams = json.load(tc) - + + # Take the set of files and read them all into a single pandas dataframe input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ] if len(input_files) == 0: @@ -45,26 +72,312 @@ def train(): 'This usually indicates that the channel ({}) was incorrectly specified,\n' + 'the data specification in S3 was incorrectly specified or the role specified\n' + 'does not have permission to access the data.').format(training_path, channel_name)) - raw_data = [ pd.read_csv(file, header=None) for file in input_files ] - train_data = pd.concat(raw_data) + + + print(input_files[0]) + data1 = pd.read_excel(input_files[0],'first_1k',engine='openpyxl') + data1.rename(columns = {'label':'class'}, inplace = True) + + data2 = pd.read_excel(input_files[0],'second_expanded',engine='openpyxl') + data = pd.concat([data1, data2]) + + data.rename(columns = {'Unnamed: 0':'id'}, inplace = True) + + data3 = pd.read_excel(input_files[0],'listings_summary',engine='openpyxl') + data3 = data3[['id', 'host_name']] + data = pd.merge(data, data3, left_on='listing_id', right_on = 'id',how = 'left') + + corpus = list(data['review']) + name = list(data['host_name']) + label = list(data['class']) + + print('stage 1 done ') + #option: replace host name + for i in range(len(corpus)): + corpus[i] = re.sub(name[i], 'host',str(corpus[i])) + + + for i in range(len(corpus)): + corpus[i] = re.sub(r'[0-9]+', '', str(corpus[i])) + corpus[i] = re.sub(r"http\S+", "", str(corpus[i])) + corpus[i] = emoji.demojize(str(corpus[i]), delimiters = ('','')) + + print('stage 2 done ') + + + #use langid package here + del_list_li = [] + for i in range(len(corpus)): + if langid.classify(corpus[i])[0] != 'en': + del_list_li.append(i) + + import numpy as np + #delete non english sents + corpus_en = np.delete(corpus, del_list_li).tolist() + label_en = np.delete(label, del_list_li).tolist() + + #tokenization: use TweetTokenizer + tknzr = TweetTokenizer() + tokenized = [] + for sent in corpus_en: + tosent = list(tknzr.tokenize(sent)) + #tosent = list(word_tokenize(sent)) #can use word_tokenize as well + tokenized.append(tosent) + + #remove stopword: may change the content of stopwords + nltk.download('stopwords') + sw = stopwords.words('english') + for _ in ['!', ',', '.','?','\r','\n','s','-','(',')',"'"]: #add content + sw.append(_) + sw.remove('with') #remove content + filtered = [] + for sent in tokenized: + filtered.append([w for w in sent if w.lower() not in sw]) + + + print('stage 3 done ') + + #lemmatization: firstly treat all words as 'v' to convert words like 'ate', 'met' + nltk.download('wordnet') + nltk.download('averaged_perceptron_tagger') + + wnl = WordNetLemmatizer() + lemmated_ = [] + for filters in filtered: + sent = [] + for word in filters: + sent.append(wnl.lemmatize(word, 'v')) + lemmated_.append(sent) + + #secondly use POS to convert other words + def get_wordnet_pos(tag): + if tag.startswith('J'): + return wordnet.ADJ + elif tag.startswith('V'): + return wordnet.VERB + elif tag.startswith('N'): + return wordnet.NOUN + elif tag.startswith('R'): + return wordnet.ADV + return None + + + print('stage 4 done ') + + lemmated = [] + for lemmate in lemmated_: + + tagged = nltk.pos_tag(lemmate) + + lemmas_sent = [] + for tag in tagged: + wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN + lemmas_sent.append(wnl.lemmatize(tag[0], pos = wordnet_pos).lower()) #convert into lower case + + lemmated.append(lemmas_sent) + + #the output is lemmated result, if you want to change output, back to previous result like filtered + #same to other code chunk + output = {'review': pd.Series(lemmated), 'class': pd.Series(label_en)} + output = pd.DataFrame(output) + + print(output.head()) + print('------') + print(len(output['class'])) + print('------') + + # added fill na with 0 to handle the error + + output['class'] = output['class'].fillna(0) + print('fixed') + output['class'] = output['class'].astype(int) + + import collections, numpy + + print('stage 4 done ') + + pt_unique, pt_counts = numpy.unique(output['class'], return_counts=True) + dict(zip(pt_unique, pt_counts)) + output=output + for i in range(len(output.review)): + if output['class'][i] == 1: + output['class'][i] = '0' + else: + output['class'][i] = '1' + + import gensim.downloader as api + from tqdm import tqdm + + # load pre-trained model + word_emb_model = api.load('glove-wiki-gigaword-100') + + pretrained_w2v_size = 100 + + word_set = set() + for sent in output.review: + for word in sent: + word_set.add(word) + word_set.add('[PAD]') + word_set.add('[UNKNOWN]') + + word_list = list(word_set) + word_list.sort() + + print('stage 5 done ') + + word_index = {} + ind = 0 + for word in word_list: + word_index[word] = ind + ind += 1 + + seq_length = 50 + + def encode_and_add_padding(sentences, seq_length, word_index): + sent_encoded = [] + for sent in sentences: + temp_encoded = [word_index[word] if word in word_index else word_index['[UNKOWN]'] for word in sent] + if len(temp_encoded) < seq_length: + temp_encoded += [word_index['[PAD]']] * (seq_length - len(temp_encoded)) + else: + temp_encoded = temp_encoded[:seq_length] + sent_encoded.append(temp_encoded) + return sent_encoded + + print('stage 5 done ') + + REVIEW_encoded = encode_and_add_padding(output.review, seq_length, word_index ) + + import numpy as np + emb_dim = word_emb_model.vector_size + + emb_table = [] + for i, word in enumerate(word_list): + if word in word_emb_model: + emb_table.append(word_emb_model[word]) + else: + emb_table.append([0]*emb_dim) + emb_table = np.array(emb_table) + + from sklearn.model_selection import train_test_split + text_train,text_test,label_train,label_test = train_test_split(REVIEW_encoded,output['class'],test_size=0.25,random_state=1) + + vocab_size = len(word_list) + unique_labels = np.unique(output['class']) + n_class = len(unique_labels) + n_hidden = 128 + learning_rate = 0.001 + total_epoch = 10 + + import torch + import torch.nn as nn + import torch.nn.functional as F + import torch.optim as optim + from sklearn.metrics import accuracy_score + + print('stage 6 done ') + + #You can enable GPU here (cuda); or just CPU + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + self.emb = nn.Embedding(vocab_size, emb_dim) + self.emb.weight.data.copy_(torch.from_numpy(emb_table)) + self.emb.weight.requires_grad = False + self.lstm = nn.LSTM(emb_dim, n_hidden, num_layers=2, batch_first =True, dropout=0.2) + self.linear = nn.Linear(n_hidden,n_class) + + def forward(self, x): + x = self.emb(x) + x,_ = self.lstm(x) + x = self.linear(x[:,-1,:]) + return x + + best_train_loss = 100 + best_epoch=0 + + print('before to device') + model = Model().to(device) + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=learning_rate) + + input_torch = torch.from_numpy(np.array(text_train)).to(device) + target_torch = torch.from_numpy(np.array(label_train)).view(-1).to(device) + + # range(total_epoch) + for epoch in range(1): + + model.train() + optimizer.zero_grad() + outputs = model(input_torch) + loss = criterion(outputs, target_torch) + loss.backward() + optimizer.step() + + predicted = torch.argmax(outputs, -1) + acc= accuracy_score(predicted.cpu().numpy(),target_torch.cpu().numpy()) + + print('Epoch: %d, loss: %.5f, train_acc: %.2f' %(epoch + 1, loss.item(), acc)) + if loss.item() < best_train_loss: + #torch.save(model.state_dict(), 'best-LSTM-model-parameters.pt') + #with open(os.path.join(model_path, 'best-LSTM-model-parameters.pkl'), 'wb') as out: + #pickle.dump(model, out) + + x0=np.reshape(text_test[0], (1,seq_length)) + x=torch.from_numpy(x0).to(device) + with torch.no_grad(): + print(model(x)) + traced_cell = torch.jit.trace(Model(), (x)) + + torch.jit.save(traced_cell, os.path.join(model_path, 'best-LSTM-model-parameters.pth')) + + #torch.save(model, 'save/to/path/model.pt') + + best_train_loss = loss.item() + print('model updated') + best_epoch=epoch+1 + + + + print('Finished Training') + +# # lstm_model=Model().to(device) +# # lstm_model.load_state_dict(torch.load('best-LSTM-model-parameters.pt')) + +# with open(os.path.join(model_path, 'best-LSTM-model-parameters.pkl'), 'w') as out: +# pickle.dump(Model(), out) + + # break line --------- + + # below are code for decision tree +# print(input_files) +# if len(input_files) == 0: +# raise ValueError(('There are no files in {}.\n' + +# 'This usually indicates that the channel ({}) was incorrectly specified,\n' + +# 'the data specification in S3 was incorrectly specified or the role specified\n' + +# 'does not have permission to access the data.').format(training_path, channel_name)) +# raw_data = [ pd.read_csv(file, header=None) for file in input_files ] +# train_data = pd.concat(raw_data) - # labels are in the first column - train_y = train_data.ix[:,0] - train_X = train_data.ix[:,1:] +# # labels are in the first column +# train_y = train_data.ix[:,0] +# train_X = train_data.ix[:,1:] - # Here we only support a single hyperparameter. Note that hyperparameters are always passed in as - # strings, so we need to do any necessary conversions. - max_leaf_nodes = trainingParams.get('max_leaf_nodes', None) - if max_leaf_nodes is not None: - max_leaf_nodes = int(max_leaf_nodes) +# # Here we only support a single hyperparameter. Note that hyperparameters are always passed in as +# # strings, so we need to do any necessary conversions. +# max_leaf_nodes = trainingParams.get('max_leaf_nodes', None) +# if max_leaf_nodes is not None: +# max_leaf_nodes = int(max_leaf_nodes) - # Now use scikit-learn's decision tree classifier to train the model. - clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes) - clf = clf.fit(train_X, train_y) +# # Now use scikit-learn's decision tree classifier to train the model. +# clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes) +# clf = clf.fit(train_X, train_y) # save the model - with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'w') as out: - pickle.dump(clf, out) +# with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'w') as out: +# pickle.dump(clf, out) print('Training complete.') except Exception as e: # Write out an error file. This will be returned as the failureReason in the diff --git a/container/local_test/payload.csv b/container/local_test/payload.csv deleted file mode 100644 index d676580..0000000 --- a/container/local_test/payload.csv +++ /dev/null @@ -1,29 +0,0 @@ -5.000000000000000000e+00,3.500000000000000000e+00,1.300000000000000044e+00,2.999999999999999889e-01 -4.500000000000000000e+00,2.299999999999999822e+00,1.300000000000000044e+00,2.999999999999999889e-01 -4.400000000000000355e+00,3.200000000000000178e+00,1.300000000000000044e+00,2.000000000000000111e-01 -5.000000000000000000e+00,3.500000000000000000e+00,1.600000000000000089e+00,5.999999999999999778e-01 -5.099999999999999645e+00,3.799999999999999822e+00,1.899999999999999911e+00,4.000000000000000222e-01 -4.799999999999999822e+00,3.000000000000000000e+00,1.399999999999999911e+00,2.999999999999999889e-01 -5.099999999999999645e+00,3.799999999999999822e+00,1.600000000000000089e+00,2.000000000000000111e-01 -4.599999999999999645e+00,3.200000000000000178e+00,1.399999999999999911e+00,2.000000000000000111e-01 -5.299999999999999822e+00,3.700000000000000178e+00,1.500000000000000000e+00,2.000000000000000111e-01 -5.000000000000000000e+00,3.299999999999999822e+00,1.399999999999999911e+00,2.000000000000000111e-01 -5.500000000000000000e+00,2.600000000000000089e+00,4.400000000000000355e+00,1.199999999999999956e+00 -6.099999999999999645e+00,3.000000000000000000e+00,4.599999999999999645e+00,1.399999999999999911e+00 -5.799999999999999822e+00,2.600000000000000089e+00,4.000000000000000000e+00,1.199999999999999956e+00 -5.000000000000000000e+00,2.299999999999999822e+00,3.299999999999999822e+00,1.000000000000000000e+00 -5.599999999999999645e+00,2.700000000000000178e+00,4.200000000000000178e+00,1.300000000000000044e+00 -5.700000000000000178e+00,3.000000000000000000e+00,4.200000000000000178e+00,1.199999999999999956e+00 -5.700000000000000178e+00,2.899999999999999911e+00,4.200000000000000178e+00,1.300000000000000044e+00 -6.200000000000000178e+00,2.899999999999999911e+00,4.299999999999999822e+00,1.300000000000000044e+00 -5.099999999999999645e+00,2.500000000000000000e+00,3.000000000000000000e+00,1.100000000000000089e+00 -5.700000000000000178e+00,2.799999999999999822e+00,4.099999999999999645e+00,1.300000000000000044e+00 -6.700000000000000178e+00,3.100000000000000089e+00,5.599999999999999645e+00,2.399999999999999911e+00 -6.900000000000000355e+00,3.100000000000000089e+00,5.099999999999999645e+00,2.299999999999999822e+00 -5.799999999999999822e+00,2.700000000000000178e+00,5.099999999999999645e+00,1.899999999999999911e+00 -6.799999999999999822e+00,3.200000000000000178e+00,5.900000000000000355e+00,2.299999999999999822e+00 -6.700000000000000178e+00,3.299999999999999822e+00,5.700000000000000178e+00,2.500000000000000000e+00 -6.700000000000000178e+00,3.000000000000000000e+00,5.200000000000000178e+00,2.299999999999999822e+00 -6.299999999999999822e+00,2.500000000000000000e+00,5.000000000000000000e+00,1.899999999999999911e+00 -6.500000000000000000e+00,3.000000000000000000e+00,5.200000000000000178e+00,2.000000000000000000e+00 -6.200000000000000178e+00,3.399999999999999911e+00,5.400000000000000355e+00,2.299999999999999822e+00 diff --git a/container/local_test/predict.sh b/container/local_test/predict.sh deleted file mode 100644 index 4dea9d4..0000000 --- a/container/local_test/predict.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -payload=$1 -content=${2:-text/csv} - -curl --data-binary @${payload} -H "Content-Type: ${content}" -v http://localhost:8080/invocations diff --git a/container/local_test/serve_local.sh b/container/local_test/serve_local.sh deleted file mode 100644 index d6e2bf9..0000000 --- a/container/local_test/serve_local.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -image=$1 - -docker run -v $(pwd)/test_dir:/opt/ml -p 8080:8080 --rm ${image} serve diff --git a/container/local_test/test_dir/input/config/hyperparameters.json b/container/local_test/test_dir/input/config/hyperparameters.json deleted file mode 100644 index 0967ef4..0000000 --- a/container/local_test/test_dir/input/config/hyperparameters.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/container/local_test/test_dir/input/config/resourceConfig.json b/container/local_test/test_dir/input/config/resourceConfig.json deleted file mode 100644 index e69de29..0000000 diff --git a/container/local_test/test_dir/input/data/training/iris.csv b/container/local_test/test_dir/input/data/training/iris.csv deleted file mode 100644 index 6abe4af..0000000 --- a/container/local_test/test_dir/input/data/training/iris.csv +++ /dev/null @@ -1,150 +0,0 @@ -setosa,5.1,3.5,1.4,0.2 -setosa,4.9,3,1.4,0.2 -setosa,4.7,3.2,1.3,0.2 -setosa,4.6,3.1,1.5,0.2 -setosa,5,3.6,1.4,0.2 -setosa,5.4,3.9,1.7,0.4 -setosa,4.6,3.4,1.4,0.3 -setosa,5,3.4,1.5,0.2 -setosa,4.4,2.9,1.4,0.2 -setosa,4.9,3.1,1.5,0.1 -setosa,5.4,3.7,1.5,0.2 -setosa,4.8,3.4,1.6,0.2 -setosa,4.8,3,1.4,0.1 -setosa,4.3,3,1.1,0.1 -setosa,5.8,4,1.2,0.2 -setosa,5.7,4.4,1.5,0.4 -setosa,5.4,3.9,1.3,0.4 -setosa,5.1,3.5,1.4,0.3 -setosa,5.7,3.8,1.7,0.3 -setosa,5.1,3.8,1.5,0.3 -setosa,5.4,3.4,1.7,0.2 -setosa,5.1,3.7,1.5,0.4 -setosa,4.6,3.6,1,0.2 -setosa,5.1,3.3,1.7,0.5 -setosa,4.8,3.4,1.9,0.2 -setosa,5,3,1.6,0.2 -setosa,5,3.4,1.6,0.4 -setosa,5.2,3.5,1.5,0.2 -setosa,5.2,3.4,1.4,0.2 -setosa,4.7,3.2,1.6,0.2 -setosa,4.8,3.1,1.6,0.2 -setosa,5.4,3.4,1.5,0.4 -setosa,5.2,4.1,1.5,0.1 -setosa,5.5,4.2,1.4,0.2 -setosa,4.9,3.1,1.5,0.2 -setosa,5,3.2,1.2,0.2 -setosa,5.5,3.5,1.3,0.2 -setosa,4.9,3.6,1.4,0.1 -setosa,4.4,3,1.3,0.2 -setosa,5.1,3.4,1.5,0.2 -setosa,5,3.5,1.3,0.3 -setosa,4.5,2.3,1.3,0.3 -setosa,4.4,3.2,1.3,0.2 -setosa,5,3.5,1.6,0.6 -setosa,5.1,3.8,1.9,0.4 -setosa,4.8,3,1.4,0.3 -setosa,5.1,3.8,1.6,0.2 -setosa,4.6,3.2,1.4,0.2 -setosa,5.3,3.7,1.5,0.2 -setosa,5,3.3,1.4,0.2 -versicolor,7,3.2,4.7,1.4 -versicolor,6.4,3.2,4.5,1.5 -versicolor,6.9,3.1,4.9,1.5 -versicolor,5.5,2.3,4,1.3 -versicolor,6.5,2.8,4.6,1.5 -versicolor,5.7,2.8,4.5,1.3 -versicolor,6.3,3.3,4.7,1.6 -versicolor,4.9,2.4,3.3,1 -versicolor,6.6,2.9,4.6,1.3 -versicolor,5.2,2.7,3.9,1.4 -versicolor,5,2,3.5,1 -versicolor,5.9,3,4.2,1.5 -versicolor,6,2.2,4,1 -versicolor,6.1,2.9,4.7,1.4 -versicolor,5.6,2.9,3.6,1.3 -versicolor,6.7,3.1,4.4,1.4 -versicolor,5.6,3,4.5,1.5 -versicolor,5.8,2.7,4.1,1 -versicolor,6.2,2.2,4.5,1.5 -versicolor,5.6,2.5,3.9,1.1 -versicolor,5.9,3.2,4.8,1.8 -versicolor,6.1,2.8,4,1.3 -versicolor,6.3,2.5,4.9,1.5 -versicolor,6.1,2.8,4.7,1.2 -versicolor,6.4,2.9,4.3,1.3 -versicolor,6.6,3,4.4,1.4 -versicolor,6.8,2.8,4.8,1.4 -versicolor,6.7,3,5,1.7 -versicolor,6,2.9,4.5,1.5 -versicolor,5.7,2.6,3.5,1 -versicolor,5.5,2.4,3.8,1.1 -versicolor,5.5,2.4,3.7,1 -versicolor,5.8,2.7,3.9,1.2 -versicolor,6,2.7,5.1,1.6 -versicolor,5.4,3,4.5,1.5 -versicolor,6,3.4,4.5,1.6 -versicolor,6.7,3.1,4.7,1.5 -versicolor,6.3,2.3,4.4,1.3 -versicolor,5.6,3,4.1,1.3 -versicolor,5.5,2.5,4,1.3 -versicolor,5.5,2.6,4.4,1.2 -versicolor,6.1,3,4.6,1.4 -versicolor,5.8,2.6,4,1.2 -versicolor,5,2.3,3.3,1 -versicolor,5.6,2.7,4.2,1.3 -versicolor,5.7,3,4.2,1.2 -versicolor,5.7,2.9,4.2,1.3 -versicolor,6.2,2.9,4.3,1.3 -versicolor,5.1,2.5,3,1.1 -versicolor,5.7,2.8,4.1,1.3 -virginica,6.3,3.3,6,2.5 -virginica,5.8,2.7,5.1,1.9 -virginica,7.1,3,5.9,2.1 -virginica,6.3,2.9,5.6,1.8 -virginica,6.5,3,5.8,2.2 -virginica,7.6,3,6.6,2.1 -virginica,4.9,2.5,4.5,1.7 -virginica,7.3,2.9,6.3,1.8 -virginica,6.7,2.5,5.8,1.8 -virginica,7.2,3.6,6.1,2.5 -virginica,6.5,3.2,5.1,2 -virginica,6.4,2.7,5.3,1.9 -virginica,6.8,3,5.5,2.1 -virginica,5.7,2.5,5,2 -virginica,5.8,2.8,5.1,2.4 -virginica,6.4,3.2,5.3,2.3 -virginica,6.5,3,5.5,1.8 -virginica,7.7,3.8,6.7,2.2 -virginica,7.7,2.6,6.9,2.3 -virginica,6,2.2,5,1.5 -virginica,6.9,3.2,5.7,2.3 -virginica,5.6,2.8,4.9,2 -virginica,7.7,2.8,6.7,2 -virginica,6.3,2.7,4.9,1.8 -virginica,6.7,3.3,5.7,2.1 -virginica,7.2,3.2,6,1.8 -virginica,6.2,2.8,4.8,1.8 -virginica,6.1,3,4.9,1.8 -virginica,6.4,2.8,5.6,2.1 -virginica,7.2,3,5.8,1.6 -virginica,7.4,2.8,6.1,1.9 -virginica,7.9,3.8,6.4,2 -virginica,6.4,2.8,5.6,2.2 -virginica,6.3,2.8,5.1,1.5 -virginica,6.1,2.6,5.6,1.4 -virginica,7.7,3,6.1,2.3 -virginica,6.3,3.4,5.6,2.4 -virginica,6.4,3.1,5.5,1.8 -virginica,6,3,4.8,1.8 -virginica,6.9,3.1,5.4,2.1 -virginica,6.7,3.1,5.6,2.4 -virginica,6.9,3.1,5.1,2.3 -virginica,5.8,2.7,5.1,1.9 -virginica,6.8,3.2,5.9,2.3 -virginica,6.7,3.3,5.7,2.5 -virginica,6.7,3,5.2,2.3 -virginica,6.3,2.5,5,1.9 -virginica,6.5,3,5.2,2 -virginica,6.2,3.4,5.4,2.3 -virginica,5.9,3,5.1,1.8 diff --git a/container/local_test/train_local.sh b/container/local_test/train_local.sh deleted file mode 100644 index e2372c7..0000000 --- a/container/local_test/train_local.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -image=$1 - -mkdir -p test_dir/model -mkdir -p test_dir/output - -rm test_dir/model/* -rm test_dir/output/* - -docker run -v $(pwd)/test_dir:/opt/ml --rm ${image} train diff --git a/container/test.txt b/container/test.txt new file mode 100644 index 0000000..24cf7e7 --- /dev/null +++ b/container/test.txt @@ -0,0 +1,4 @@ +start to test for reading the xlsx file. + + +removed the transfrom step for testing