From 088bef166433156b57e016b203eb24edd3e65944 Mon Sep 17 00:00:00 2001 From: Jacqueline Garrahan Date: Wed, 14 Oct 2020 16:28:58 -0700 Subject: [PATCH 1/7] MNT: Repurpose parsing to use dict --- lume_model/keras/__init__.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/lume_model/keras/__init__.py b/lume_model/keras/__init__.py index 10bd417..e56145e 100644 --- a/lume_model/keras/__init__.py +++ b/lume_model/keras/__init__.py @@ -57,7 +57,7 @@ def __init__( # load model in thread safe manner self._thread_graph = tf.Graph() with self._thread_graph.as_default(): - self.model = load_model( + self._model = load_model( model_file, custom_objects={ "ScaleLayer": ScaleLayer, @@ -89,7 +89,7 @@ def evaluate(self, input_variables: List[InputVariable]) -> List[OutputVariable] # call prediction in threadsafe manner with self._thread_graph.as_default(): - model_output = self.model.predict(formatted_input) + model_output = self._model.predict(formatted_input) output = self.parse_output(model_output) @@ -161,24 +161,17 @@ def _prepare_outputs(self, predicted_output: dict): return list(self.output_variables.values()) def format_input(self, input_dictionary: dict): - """Formats input to be fed into model + """Formats input to be fed into model. For the base KerasModel, inputs should + be assumed in dictionary format. Args: input_dictionary (dict): Dictionary mapping input to value. """ - - vector = [] - for item in self._input_format["order"]: - vector.append(input_dictionary[item]) - - # Convert to numpy array and reshape - vector = np.array(vector) - vector = vector.reshape(tuple(self._input_format["shape"])) - - return vector + return input_dictionary def parse_output(self, model_output): - """Parses model output to create dictionary variable name -> value + """Parses model output to create dictionary variable name -> value. This assumes + that outputs have been labeled during model creation. Args: model_output (np.ndarray): Raw model output @@ -186,12 +179,12 @@ def parse_output(self, model_output): output_dict = {} if self._output_format["type"] == "softmax": - for value, idx in self._output_format["indices"].items(): - softmax_output = list(model_output[idx]) - output_dict[value] = softmax_output.index(max(softmax_output)) + for idx, output_name in enumerate(self._model.output_names): + softmax_output = model_output[idx] + output_dict[output_name] = softmax_output.index(max(softmax_output)) if self._output_format["type"] == "raw": - for value, idx in self._output_format["indices"].items(): - output_dict[value] = model_output[idx] + for idx, output_name in enumerate(self._model.output_names): + output_dict[output_name] = model_output[idx] return output_dict From 7e4ef8873ff0599fecb91efb06a525e67886ac32 Mon Sep 17 00:00:00 2001 From: Jacqueline Garrahan Date: Wed, 14 Oct 2020 16:29:29 -0700 Subject: [PATCH 2/7] MNT: Drop input/output formatting from yaml --- examples/files/iris_config.yaml | 9 --------- lume_model/tests/test_files/iris_config.yaml | 9 --------- 2 files changed, 18 deletions(-) diff --git a/examples/files/iris_config.yaml b/examples/files/iris_config.yaml index 3f60038..794323b 100644 --- a/examples/files/iris_config.yaml +++ b/examples/files/iris_config.yaml @@ -5,17 +5,8 @@ model: tensorflow: 2.3.1 args: model_file: examples/files/iris_model.h5 - input_format: - order: - - SepalLength - - SepalWidth - - PetalLength - - PetalWidth - shape: [1, 4] output_format: type: softmax - indices: - Species: [0] input_variables: diff --git a/lume_model/tests/test_files/iris_config.yaml b/lume_model/tests/test_files/iris_config.yaml index 3f60038..794323b 100644 --- a/lume_model/tests/test_files/iris_config.yaml +++ b/lume_model/tests/test_files/iris_config.yaml @@ -5,17 +5,8 @@ model: tensorflow: 2.3.1 args: model_file: examples/files/iris_model.h5 - input_format: - order: - - SepalLength - - SepalWidth - - PetalLength - - PetalWidth - shape: [1, 4] output_format: type: softmax - indices: - Species: [0] input_variables: From 3fa59f74fbea8825af9774de5876c0a4fa762687 Mon Sep 17 00:00:00 2001 From: Jacqueline Garrahan Date: Wed, 14 Oct 2020 16:30:36 -0700 Subject: [PATCH 3/7] DOC: Drop indexing from docs --- docs/index.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/index.md b/docs/index.md index f717861..3d55886 100644 --- a/docs/index.md +++ b/docs/index.md @@ -99,8 +99,6 @@ Models and variables may be constructed using a yaml configuration file. The con The model section is used for the initialization of model classes. The `model_class` entry is used to specify the model class to initialize. The `model_from_yaml` method will attempt to import the specified class. Additional model-specific requirements may be provided. These requirements will be checked before model construction. Model keyword arguments may be passed via the config file or with the function kwarg `model_kwargs`. All models are assumed to accept `input_variables` and `output_variables` as keyword arguments. -In order to use the `KerasModel` execution class, instructions must be provided to format inputs for model execution and parse the model output. Input formatting in the yaml uses the `order` and `shape` entries to format the model input. The output format requires indexing for each output variable. Similar functionality might be implemented for custom model classes; however, this is not supported out-of-the-box with `lume-model`. - The below example outlines the specification for a model compatible with the `lume-model` keras/tensorflow toolkit. ```yaml @@ -119,10 +117,8 @@ model: shape: [1, 4] output_format: type: softmax - indices: - Species: [0] -``` +``` Variables are constructed the minimal data requirements for inputs/outputs. From cba21a628aba8f5ac8175a1eec2d3ff5a814a5be Mon Sep 17 00:00:00 2001 From: Jacqueline Garrahan Date: Thu, 15 Oct 2020 10:03:00 -0700 Subject: [PATCH 4/7] MNT: Update h5 with model built using functional api --- examples/IrisTraining.ipynb | 198 ++++++++++++++++++++++ examples/files/iris_model.h5 | Bin 25512 -> 29856 bytes lume_model/tests/test_files/iris_model.h5 | Bin 25512 -> 29856 bytes 3 files changed, 198 insertions(+) create mode 100644 examples/IrisTraining.ipynb diff --git a/examples/IrisTraining.ipynb b/examples/IrisTraining.ipynb new file mode 100644 index 0000000..2f69436 --- /dev/null +++ b/examples/IrisTraining.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# iris example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "from tensorflow import keras\n", + "import tensorflow as tf\n", + "\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, Flatten\n", + "from tensorflow.keras.utils import to_categorical\n", + "from sklearn.preprocessing import LabelEncoder\n", + "import pandas as pd\n", + "iris = load_iris()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris[\"data\"][0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.DataFrame(iris.data, columns=iris.feature_names)\n", + "data.columns = [\"SepalLength\", \"SepalWidth\", \"PetalLength\", \"PetalWidth\"]\n", + "\n", + "data[\"Species\"] = iris.target\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = data.sample(frac=0.8,random_state=0)\n", + "test_dataset = data.drop(train_dataset.index)\n", + "train_labels = train_dataset.pop('Species')\n", + "test_labels = test_dataset.pop('Species')\n", + "train_dataset.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# encode class values as integers\n", + "encoder = LabelEncoder()\n", + "encoder.fit(train_labels)\n", + "encoded_Y = encoder.transform(train_labels)\n", + "\n", + "# convert integers to dummy variables (i.e. one hot encoded)\n", + "dummy_y = to_categorical(encoded_Y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + " # define model\n", + "def build_model():\n", + " # create model\n", + " sepal_length_input = keras.Input(shape=(1,), name=\"SepalLength\")\n", + " sepal_width_input = keras.Input(shape=(1,), name=\"SepalWidth\")\n", + " petal_length_input = keras.Input(shape=(1,), name=\"PetalLength\")\n", + " petal_width_input = keras.Input(shape=(1,), name=\"PetalWidth\")\n", + " inputs = [sepal_length_input, sepal_width_input, petal_length_input, petal_width_input]\n", + " merged = keras.layers.concatenate(inputs)\n", + " dense1 = Dense(8, activation='relu')(merged)\n", + " output = Dense(3, activation='softmax', name=\"Species\")(dense1)\n", + "\n", + " # Compile model\n", + " model = keras.Model(inputs=inputs, outputs=[output])\n", + " optimizer = tf.keras.optimizers.Adam()\n", + " model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])\n", + " return model\n", + "\n", + "model = build_model()\n", + "keras.utils.plot_model(model, \"my_first_model_with_shape_info.png\", show_shapes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_stats = train_dataset.describe()\n", + "train_stats = train_stats.transpose()\n", + "train_stats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_x = train_dataset.to_dict(\"series\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)\n", + "\n", + "history = model.fit(train_x, dummy_y, epochs=1000,\n", + " validation_split = 0.2, verbose=1, callbacks=[early_stop])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save(\"files/iris_model.h5\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.input_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.output_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/files/iris_model.h5 b/examples/files/iris_model.h5 index d41296d11f764fd52212602f7f17e72248ba795d..692593d248c2514d27898524a5e420bafedf2d62 100644 GIT binary patch literal 29856 zcmeGlZERcB^*RmUmQWJb(X^D-So??|ZR|8{(tOzIN?Z415NH{wQD?t6FR>fPHvXVr z%9bJMvI&hyLtO=v7APGNDy2wFWRN)37?4ft#MdS?eiRL9%aDKyO{`34?0%eM``z~( zw=O1W{jB!gcRt>^_nv#sx%V7Dw?FaRy6Kt)OBWFKv06zvspPkK`2s#834uWv={~rz zKsyBOXqmXNkgEuzRRg{fQt)0nf(u9c-fa zz-%4RPJx{PNctn)YRJ1+4fX~Saf*yY6T$xA0X3F|W4vguHTYnTwhHW3w(APwp}xZ8 zEF_DFmGr4GCGNdXjm3kJFvTyiSoo97n?zq$-UIe~1%m{N(0`41iw!s-2_TS0~jYx!i1yH+HMgy z7HVb8l+e!5zoRP!eYg(!$~AmwkL@l&9~4dcYN6hnEf@K1%eDMCK8$1F8f5sYv?zW9 z=1w}a=r0}Gyy5okye}3I=m-1d@rlCqs~T20Xp3Q1925visNr}d))R{CWls?T+d1L! zgTe>x{*V%nd&5e<>{1V zZ3#z{iS3Lm$?8%P{(v{g$liEBiL#zM!^u#n*5=$zMRq6lbDW)yTS=_0XOG9DN-WM8 zdz4UIrHGi)+pBhGA#j8{)Tk2Lu7-OP0s8=DG{RGKFx(YMhP%CCx)br?@21RiX$p4- zyNzZb&~tnO9csdG4YKi0ika&@`!^A&0kq4P)Z_x6&12 z7N3YEnZX-*g|aWmYNnlK}jmIE2uCgP$oSGhUo#6 zo{o(E;u{;U(kYQt%A{dXW2vcO4C?DcTu*9XU+gdam6^M zaI8L_WYF2MWH;+8l1%72OPdcH7`46ue##*{M`O1lz!@IGG!CtA1s_pi`)e?!8_o z2bkxRcZZfn*h{JlpNq}G9X4uVTkHk8Xtgi|(0(IRAs^e@$(Dg%{t^Dl%{Eq(sr-N;6tQL>-vq{=P`!H^I`|O*4FNoQvtYnFaJ4j# z(H23UY7?zbG~^W2sZriEz}EG^aGnjuHCbO7KWE=FRTGiOWCB~2%?-M5Ck(Uq%Dtj5MZ^DIZ|`3>=j2CmmZR?41vdtk^*NI*j`HjppQ zbvVTcP!Gi(k12m{X+Vy1HW=PXcrOu0i++eO`l0YDLa_dX^n9(Tms!9&2ssP&ev}uL z-jB>DuT<&yeG1kSk2KG$3G{Pc?aX|oMm-!Db45-pntc0kA~mx7VCt2TKc#-K>t)vm z|9K+y^yoQPWz}nKi&wopxgoi9qVL_C+zSp^+h44G!u{!6&raU5^Gf&THCrdPHN2Uc z|BFTLLmxcls#*NuM6CaiD{^e5d+6uK+s=*dO&$GYd8)tfXRh~uw0iP?uhmW-d%kw^ zZ_nNDy0PvqcX{2rsYu0S>X^T7a$E2Qch_x4UH!FHlaH=CmHK1*Gw#l&C8_GSzL+@d zSTOlw;Jil_S{qSb@KZcG?#{N6s?fG$+E77_=_0gTDU622?(*3>Bf4Y8I zb;y15zE>w-y8b}xM}5x8g^hoQ%B)yn*P72KPGA4Dd*SCVOyHW2U5$5{*6L|%`Q*9B z#{1q`gj;lmNbjD1;o+9{2VZ{a*%Q|p(mx*CJihFc`qnoFZ#1O0udt0jeC=54XKJ+} z{h{)vmPZ$SYkcV*uJYht*MRmE$zT+17pWHW@k>kt zvj2$PhL9StBb-jL>b3IwITf^SnXg`3MF)lZAtq_q;yBX<+cW|zML46v>b7ib?1TOy z!-dyv%U27@gQ9_}em@Rq=*0$d{cgBpgXlR5@grZHacma+sJCK|+mt`IG$6+r8w_t4 z><33%?D}pE*lqeeAlWgF!)&!0kn6kqJX*R8p`TZON2ZiEoG-say-e2%^cUJmc4*Mx6QwGL=&qZnm z^FaA`Koz9YlJ6YL&f0j6oe$%{mW{J6m{4QlC4=-RIX-=+6}x3!f3ri2XJ=i8C4}Sg zrG*T;7N21kcv=2^hVm)aSUw|Y`sMODJDmrvOnOTEMjsc2C*wNJqtCJXN{SYgcK%%M z_aL5x_$RnIGrj2J-!PKqRGaJEJ$0KH=5&W>^Tox{I|ZLs3v+6!=3t7b0XZ(#Kwnna zFR^P3HR$??m)|R+E`MGh`Mrt}uv_$J*^bXkXbCb2Jdavev83qOY+zohCqvYq7HUj}z+?dfv2BXfRQ z4aoZo){g@aXOLSQJqck)L59Ql)s74o*ycJPO=ElmyUb_5Q=OvYblkkaahU9vwgz(9 zZw=TF&WOzV(k~}X=r!0?~9&Lt_R?NPvCLCJ_YuTe6qa@?$WuU9#KDMcg#F@ z)VD7>GheAu_2(lI$+13x`o*^J^H6Pr(5+3QVtrmd+jU8{!|4CLT-h)4?Jw{7#5B?F zG?O-g%U%Py_Lm`_cIw62U(jDGcWe8TA+*ovy)vb=VSQYz;R?aL5%oI{)Fi`&$I0yw zCw-a)(8tMuhF)wS*STA+L*$E#V=!#gSFy)!%AZ>r$Q2hyppJ#3{i^atTaW0m9qcln z{gOR`ALAyD*KA8e1G((iCxzo;?dxcF_k@JLF&`WRc!d3DEcAo%^&I#W^2zotxJ&0M z=Z!U8GtV9MJs6vruhgje^N}`h#B))mE%f&a$KgCQw|}pY>wF~pWxoBTIwC9y?XJ}9 zs?_J3YkxTfaUJzjto;S|4No8TWZa^C4)dzMF;6LC^>ML=s{!*y`TIo~EgE%>; zQTy!YjpF|~!!Ld;@b8s;`Sr!7HDcnnJoBIJ`l754t$m>X;l3%uh1Vv-Z9i^YnJk?*8BZ-~Yb*yX*QH zwVp-WGk~?q_Us)2m>UA4LK-ZKqr;|!BFxhQH)O~J?ePe#3yHJ2Z=O55CH>m7Ukws`oleVeJo;~IO3QmOC zFXf#LvERx&5n^%6J0u0t&H}a@lwA-vC*OkloJ5Oz019J|!}l^`K|zYWJ0lvz6vT6$ z+`{z)eJ_=CNW@b;+<5tXL)Hrno6nsLyF=2$TApD=q0JTwV3%A7 zZut#zQ)~n0$M+kEv)K|*SehdSo-B(k0k-4_fib~mdm3#X)w@D7j{4Vm4GPG?*MzR6 z)Md5q_3s_N?_*tC4pGk8-&i&8P{%auuKuDqjF{DR*4JGKc}T7L$KpJH`Gz{Ox7(pq zwQ8Pz^jMSq`G@44Wv4W&#qVi)P8*$}mvYJKzLQS!&F`EC#^$>2d%Wb=(;LaitNH3% zqA2yUxawxj>B$a?#xrDAkiwobPVcB_*H`2=HLde#Z#pY}!^r2B${Zg39>=}2g!8BV zT+%D_swYn0PaZ6Pft2n^c3n6S;STM2=(zv>K@zj1xe|V@b+yTV(xfiA>r4`D)Es=M zbx`vKu3z7|={(&1xAU#hk*+H1X6Mz2=2Y^zM3h7tHfi=fkGLj<(Pa^2#f$s;=ym0t z{nRJRQ>liXp5B2YUrwQQ;$xR_?>FZd`wy+%)V!7}_;$lhylj*QqwSjhal`^J52t@0 zet!{>eH3+AXL+a-?);Oq?o;I?YU^jeF}@3%59SkbZ?{r6D(a{ge<)>?y4;6-j~>{2 zX=mbqkm~8dalC2^Uh{Z7qkE9|HtzNA?c3|??wd7~O3;4%v;NGFeKKi0rC#4ktg4(q zOx${m$V>o^;<(FGc7Eh0fcF&DacwIW`OImo*q71Q%m>nIXYGBgs4$5% zh0)!P65-n`)XeYs6H;-RvK1S%{$niVdMC3TfBqbfpD$72hNU8WZZgFb!kuG-( z5#_$Wh;>&Iu*@C%h^^Xg;+~-d>k*G4GT&9oh+_q@*n~&tuv&p4D0utbC_Hc3O?=7t zGD4itg?T|g_SKs+afWs*?g%K>)_`egaGjPtsr$4W;q=gG(5Zlp8QEYcm*tp31>X(3 zvS(f8=a@24fG9@Lg6aucP(dsbLxs8q7qT5=gmD4fiS0ZQod8~l_|!1?i% z5QrP5gtmO15;AgVN`UR=8sJHTkIg~2!sc_+DYR^m0b?P94j?$Z`_j^J!%WL;6M}>z zLKNQ1OhRMerWs0jVn%o~he}?DDT*PEKF0R&_X(fV;Q1df*!doT(UvDL!d+q<_ z_AC9lQvx-IlLx!2VC~GcVOP?)Ny5>%G}O<)It20Gb~|9#EQzR}VI>6dlFZbP6g@F@ znYfVE@ZZ&|4Xfz#9N6Kj_oBQc*2*R0wkYeinJoVbLx z3i_u9{ow$L61dQ4nwl{)xQS9uuAd#;gon$E#Uz`5>7Yt3M-%PRIe_836HfPHvXVr z%9bJMvI&hyLtO=v7APGNDy2wFWRN)37?4ft#MdS?eiRL9%aDKyO{`34?0%eM``z~( zw=O1W{jB!gcRt>^_nv#sx%V7Dw?FaRy6Kt)OBWFKv06zvspPkK`2s#834uWv={~rz zKsyBOXqmXNkgEuzRRg{fQt)0nf(u9c-fa zz-%4RPJx{PNctn)YRJ1+4fX~Saf*yY6T$xA0X3F|W4vguHTYnTwhHW3w(APwp}xZ8 zEF_DFmGr4GCGNdXjm3kJFvTyiSoo97n?zq$-UIe~1%m{N(0`41iw!s-2_TS0~jYx!i1yH+HMgy z7HVb8l+e!5zoRP!eYg(!$~AmwkL@l&9~4dcYN6hnEf@K1%eDMCK8$1F8f5sYv?zW9 z=1w}a=r0}Gyy5okye}3I=m-1d@rlCqs~T20Xp3Q1925visNr}d))R{CWls?T+d1L! zgTe>x{*V%nd&5e<>{1V zZ3#z{iS3Lm$?8%P{(v{g$liEBiL#zM!^u#n*5=$zMRq6lbDW)yTS=_0XOG9DN-WM8 zdz4UIrHGi)+pBhGA#j8{)Tk2Lu7-OP0s8=DG{RGKFx(YMhP%CCx)br?@21RiX$p4- zyNzZb&~tnO9csdG4YKi0ika&@`!^A&0kq4P)Z_x6&12 z7N3YEnZX-*g|aWmYNnlK}jmIE2uCgP$oSGhUo#6 zo{o(E;u{;U(kYQt%A{dXW2vcO4C?DcTu*9XU+gdam6^M zaI8L_WYF2MWH;+8l1%72OPdcH7`46ue##*{M`O1lz!@IGG!CtA1s_pi`)e?!8_o z2bkxRcZZfn*h{JlpNq}G9X4uVTkHk8Xtgi|(0(IRAs^e@$(Dg%{t^Dl%{Eq(sr-N;6tQL>-vq{=P`!H^I`|O*4FNoQvtYnFaJ4j# z(H23UY7?zbG~^W2sZriEz}EG^aGnjuHCbO7KWE=FRTGiOWCB~2%?-M5Ck(Uq%Dtj5MZ^DIZ|`3>=j2CmmZR?41vdtk^*NI*j`HjppQ zbvVTcP!Gi(k12m{X+Vy1HW=PXcrOu0i++eO`l0YDLa_dX^n9(Tms!9&2ssP&ev}uL z-jB>DuT<&yeG1kSk2KG$3G{Pc?aX|oMm-!Db45-pntc0kA~mx7VCt2TKc#-K>t)vm z|9K+y^yoQPWz}nKi&wopxgoi9qVL_C+zSp^+h44G!u{!6&raU5^Gf&THCrdPHN2Uc z|BFTLLmxcls#*NuM6CaiD{^e5d+6uK+s=*dO&$GYd8)tfXRh~uw0iP?uhmW-d%kw^ zZ_nNDy0PvqcX{2rsYu0S>X^T7a$E2Qch_x4UH!FHlaH=CmHK1*Gw#l&C8_GSzL+@d zSTOlw;Jil_S{qSb@KZcG?#{N6s?fG$+E77_=_0gTDU622?(*3>Bf4Y8I zb;y15zE>w-y8b}xM}5x8g^hoQ%B)yn*P72KPGA4Dd*SCVOyHW2U5$5{*6L|%`Q*9B z#{1q`gj;lmNbjD1;o+9{2VZ{a*%Q|p(mx*CJihFc`qnoFZ#1O0udt0jeC=54XKJ+} z{h{)vmPZ$SYkcV*uJYht*MRmE$zT+17pWHW@k>kt zvj2$PhL9StBb-jL>b3IwITf^SnXg`3MF)lZAtq_q;yBX<+cW|zML46v>b7ib?1TOy z!-dyv%U27@gQ9_}em@Rq=*0$d{cgBpgXlR5@grZHacma+sJCK|+mt`IG$6+r8w_t4 z><33%?D}pE*lqeeAlWgF!)&!0kn6kqJX*R8p`TZON2ZiEoG-say-e2%^cUJmc4*Mx6QwGL=&qZnm z^FaA`Koz9YlJ6YL&f0j6oe$%{mW{J6m{4QlC4=-RIX-=+6}x3!f3ri2XJ=i8C4}Sg zrG*T;7N21kcv=2^hVm)aSUw|Y`sMODJDmrvOnOTEMjsc2C*wNJqtCJXN{SYgcK%%M z_aL5x_$RnIGrj2J-!PKqRGaJEJ$0KH=5&W>^Tox{I|ZLs3v+6!=3t7b0XZ(#Kwnna zFR^P3HR$??m)|R+E`MGh`Mrt}uv_$J*^bXkXbCb2Jdavev83qOY+zohCqvYq7HUj}z+?dfv2BXfRQ z4aoZo){g@aXOLSQJqck)L59Ql)s74o*ycJPO=ElmyUb_5Q=OvYblkkaahU9vwgz(9 zZw=TF&WOzV(k~}X=r!0?~9&Lt_R?NPvCLCJ_YuTe6qa@?$WuU9#KDMcg#F@ z)VD7>GheAu_2(lI$+13x`o*^J^H6Pr(5+3QVtrmd+jU8{!|4CLT-h)4?Jw{7#5B?F zG?O-g%U%Py_Lm`_cIw62U(jDGcWe8TA+*ovy)vb=VSQYz;R?aL5%oI{)Fi`&$I0yw zCw-a)(8tMuhF)wS*STA+L*$E#V=!#gSFy)!%AZ>r$Q2hyppJ#3{i^atTaW0m9qcln z{gOR`ALAyD*KA8e1G((iCxzo;?dxcF_k@JLF&`WRc!d3DEcAo%^&I#W^2zotxJ&0M z=Z!U8GtV9MJs6vruhgje^N}`h#B))mE%f&a$KgCQw|}pY>wF~pWxoBTIwC9y?XJ}9 zs?_J3YkxTfaUJzjto;S|4No8TWZa^C4)dzMF;6LC^>ML=s{!*y`TIo~EgE%>; zQTy!YjpF|~!!Ld;@b8s;`Sr!7HDcnnJoBIJ`l754t$m>X;l3%uh1Vv-Z9i^YnJk?*8BZ-~Yb*yX*QH zwVp-WGk~?q_Us)2m>UA4LK-ZKqr;|!BFxhQH)O~J?ePe#3yHJ2Z=O55CH>m7Ukws`oleVeJo;~IO3QmOC zFXf#LvERx&5n^%6J0u0t&H}a@lwA-vC*OkloJ5Oz019J|!}l^`K|zYWJ0lvz6vT6$ z+`{z)eJ_=CNW@b;+<5tXL)Hrno6nsLyF=2$TApD=q0JTwV3%A7 zZut#zQ)~n0$M+kEv)K|*SehdSo-B(k0k-4_fib~mdm3#X)w@D7j{4Vm4GPG?*MzR6 z)Md5q_3s_N?_*tC4pGk8-&i&8P{%auuKuDqjF{DR*4JGKc}T7L$KpJH`Gz{Ox7(pq zwQ8Pz^jMSq`G@44Wv4W&#qVi)P8*$}mvYJKzLQS!&F`EC#^$>2d%Wb=(;LaitNH3% zqA2yUxawxj>B$a?#xrDAkiwobPVcB_*H`2=HLde#Z#pY}!^r2B${Zg39>=}2g!8BV zT+%D_swYn0PaZ6Pft2n^c3n6S;STM2=(zv>K@zj1xe|V@b+yTV(xfiA>r4`D)Es=M zbx`vKu3z7|={(&1xAU#hk*+H1X6Mz2=2Y^zM3h7tHfi=fkGLj<(Pa^2#f$s;=ym0t z{nRJRQ>liXp5B2YUrwQQ;$xR_?>FZd`wy+%)V!7}_;$lhylj*QqwSjhal`^J52t@0 zet!{>eH3+AXL+a-?);Oq?o;I?YU^jeF}@3%59SkbZ?{r6D(a{ge<)>?y4;6-j~>{2 zX=mbqkm~8dalC2^Uh{Z7qkE9|HtzNA?c3|??wd7~O3;4%v;NGFeKKi0rC#4ktg4(q zOx${m$V>o^;<(FGc7Eh0fcF&DacwIW`OImo*q71Q%m>nIXYGBgs4$5% zh0)!P65-n`)XeYs6H;-RvK1S%{$niVdMC3TfBqbfpD$72hNU8WZZgFb!kuG-( z5#_$Wh;>&Iu*@C%h^^Xg;+~-d>k*G4GT&9oh+_q@*n~&tuv&p4D0utbC_Hc3O?=7t zGD4itg?T|g_SKs+afWs*?g%K>)_`egaGjPtsr$4W;q=gG(5Zlp8QEYcm*tp31>X(3 zvS(f8=a@24fG9@Lg6aucP(dsbLxs8q7qT5=gmD4fiS0ZQod8~l_|!1?i% z5QrP5gtmO15;AgVN`UR=8sJHTkIg~2!sc_+DYR^m0b?P94j?$Z`_j^J!%WL;6M}>z zLKNQ1OhRMerWs0jVn%o~he}?DDT*PEKF0R&_X(fV;Q1df*!doT(UvDL!d+q<_ z_AC9lQvx-IlLx!2VC~GcVOP?)Ny5>%G}O<)It20Gb~|9#EQzR}VI>6dlFZbP6g@F@ znYfVE@ZZ&|4Xfz#9N6Kj_oBQc*2*R0wkYeinJoVbLx z3i_u9{ow$L61dQ4nwl{)xQS9uuAd#;gon$E#Uz`5>7Yt3M-%PRIe_836H Date: Thu, 15 Oct 2020 10:03:31 -0700 Subject: [PATCH 5/7] MNT: Properly format input and parse output --- lume_model/keras/__init__.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lume_model/keras/__init__.py b/lume_model/keras/__init__.py index e56145e..ec5e964 100644 --- a/lume_model/keras/__init__.py +++ b/lume_model/keras/__init__.py @@ -33,8 +33,8 @@ def __init__( model_file: str, input_variables: Dict[str, InputVariable], output_variables: Dict[str, OutputVariable], - input_format: dict, - output_format: dict, + input_format: dict = {}, + output_format: dict = {}, ) -> None: """Initializes the model and stores inputs/outputs. @@ -42,8 +42,6 @@ def __init__( model_file (str): Path to model file generated with keras.save() input_variables (List[InputVariable]): list of model input variables output_variables (List[OutputVariable]): list of model output variables - input_format (dict): Instructions for building model input - output_format (dict): Instructions for parsing model ouptut """ @@ -167,7 +165,14 @@ def format_input(self, input_dictionary: dict): Args: input_dictionary (dict): Dictionary mapping input to value. """ - return input_dictionary + formatted_dict = {} + for input_variable, value in input_dictionary.items(): + if isinstance(value, (float, int)): + formatted_dict[input_variable] = np.array([value]) + else: + formatted_dict[input_variable] = [value] + + return formatted_dict def parse_output(self, model_output): """Parses model output to create dictionary variable name -> value. This assumes @@ -180,7 +185,7 @@ def parse_output(self, model_output): if self._output_format["type"] == "softmax": for idx, output_name in enumerate(self._model.output_names): - softmax_output = model_output[idx] + softmax_output = list(model_output[idx]) output_dict[output_name] = softmax_output.index(max(softmax_output)) if self._output_format["type"] == "raw": From 265ae2041c7f515aae5bcbdcc27fb29cc87f7534 Mon Sep 17 00:00:00 2001 From: Jacqueline Garrahan Date: Thu, 15 Oct 2020 10:26:30 -0700 Subject: [PATCH 6/7] DOC: Add note about formatting --- docs/index.md | 23 ++++++++++++++++++++++- lume_model/keras/README.md | 27 ++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 3d55886..b00663c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -167,8 +167,29 @@ The `KerasModel` packaged in the toolkit will be compatible with models saved us ### Development requirements: - The model must be trained using the custom scaling layers provided in `lume_model.keras.layers` OR using preprocessing layers packaged with Keras OR the custom layers must be defined during build and made accessible during loading by the user. Custom layers are not supported out-of-the box by this toolkit. -- The keras model must use named input layers such that the model will accept a dictionary input OR the `KerasModel` must be subclassed and the `format_input` and `format_output` member functions must be overwritten with proper formatting of model input from a dictionary mapping input variable names to values and proper output parsing into a dictionary, respectively. +- The keras model must use named input layers such that the model will accept a dictionary input OR the `KerasModel` must be subclassed and the `format_input` and `format_output` member functions must be overwritten with proper formatting of model input from a dictionary mapping input variable names to values and proper output parsing into a dictionary, respectively. This will require use of the Keras functional API for model construction. +An example of a model built using the functional API is given below: + +```python + +sepal_length_input = keras.Input(shape=(1,), name="SepalLength") +sepal_width_input = keras.Input(shape=(1,), name="SepalWidth") +petal_length_input = keras.Input(shape=(1,), name="PetalLength") +petal_width_input = keras.Input(shape=(1,), name="PetalWidth") +inputs = [sepal_length_input, sepal_width_input, petal_length_input, petal_width_input] +merged = keras.layers.concatenate(inputs) +dense1 = Dense(8, activation='relu')(merged) +output = Dense(3, activation='softmax', name="Species")(dense1) + +# Compile model +model = keras.Model(inputs=inputs, outputs=[output]) +optimizer = tf.keras.optimizers.Adam() +model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) + +``` + +Models built in this way will accept inputs in dictionary form mapping variable name to a numpy array of values. ### Configuration file The KerasModel can be instantiated using the utility function `lume_model.utils.model_from_yaml` method. diff --git a/lume_model/keras/README.md b/lume_model/keras/README.md index b4e97e5..4055923 100644 --- a/lume_model/keras/README.md +++ b/lume_model/keras/README.md @@ -6,7 +6,32 @@ The `KerasModel` packaged in the toolkit will be compatible with models saved us ## Development requirements: - The model must be trained using the custom scaling layers provided in `lume_model.keras.layers` OR using preprocessing layers packaged with Keras OR the custom layers must be defined during build and made accessible during loading by the user. Custom layers are not supported out-of-the box by this toolkit. -- The keras model must use named input layers such that the model will accept a dictionary input OR the `KerasModel` must be subclassed and the `format_input` and `format_output` member functions must be overwritten with proper formatting of model input from a dictionary mapping input variable names to values and proper output parsing into a dictionary, respectively. +- The keras model must use named input layers such that the model will accept a dictionary input OR the `KerasModel` must be subclassed and the `format_input` and `format_output` member functions must be overwritten with proper formatting of model input from a dictionary mapping input variable names to values and proper output parsing into a dictionary, respectively. This will require use of the Keras functional API for model construction. + +An example of a model built using the functional API is given below: + +```python +from tensorflow import keras +from tensorflow.keras.layers import Dense +import tensorflow as tf + +sepal_length_input = keras.Input(shape=(1,), name="SepalLength") +sepal_width_input = keras.Input(shape=(1,), name="SepalWidth") +petal_length_input = keras.Input(shape=(1,), name="PetalLength") +petal_width_input = keras.Input(shape=(1,), name="PetalWidth") +inputs = [sepal_length_input, sepal_width_input, petal_length_input, petal_width_input] +merged = keras.layers.concatenate(inputs) +dense1 = Dense(8, activation='relu')(merged) +output = Dense(3, activation='softmax', name="Species")(dense1) + +# Compile model +model = keras.Model(inputs=inputs, outputs=[output]) +optimizer = tf.keras.optimizers.Adam() +model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) + +``` + +Models built in this way will accept inputs in dictionary form mapping variable name to a numpy array of values. ## Configuration file The KerasModel can be instantiated using the utility function `lume_model.utils.model_from_yaml` method. From 26ebec07e33d370ca48dd3b0c4e3df4847bf97f6 Mon Sep 17 00:00:00 2001 From: Jacqueline Garrahan Date: Thu, 15 Oct 2020 10:41:32 -0700 Subject: [PATCH 7/7] MNT: Replace args with kwargs --- examples/files/iris_config.yaml | 3 +-- lume_model/tests/test_files/iris_config.yaml | 2 +- lume_model/utils.py | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/examples/files/iris_config.yaml b/examples/files/iris_config.yaml index 794323b..7f0ce6f 100644 --- a/examples/files/iris_config.yaml +++ b/examples/files/iris_config.yaml @@ -3,12 +3,11 @@ model: model_class: lume_model.keras.KerasModel requirements: tensorflow: 2.3.1 - args: + kwargs: model_file: examples/files/iris_model.h5 output_format: type: softmax - input_variables: SepalLength: name: SepalLength diff --git a/lume_model/tests/test_files/iris_config.yaml b/lume_model/tests/test_files/iris_config.yaml index 794323b..f407b72 100644 --- a/lume_model/tests/test_files/iris_config.yaml +++ b/lume_model/tests/test_files/iris_config.yaml @@ -3,7 +3,7 @@ model: model_class: lume_model.keras.KerasModel requirements: tensorflow: 2.3.1 - args: + kwargs: model_file: examples/files/iris_model.h5 output_format: type: softmax diff --git a/lume_model/utils.py b/lume_model/utils.py index dea4592..f3cf4e1 100644 --- a/lume_model/utils.py +++ b/lume_model/utils.py @@ -177,7 +177,7 @@ def model_from_yaml(config_file, model_class=None, model_kwargs=None): sys.exit() model = None - model_args = { + model_kwargs = { "input_variables": input_variables, "output_variables": output_variables, } @@ -210,29 +210,29 @@ def model_from_yaml(config_file, model_class=None, model_kwargs=None): logger.warning("Module not installed") klass = locate(config["model"]["model_class"]) - if "args" in config["model"]: - model_args.update(config["model"]["args"]) + if "kwargs" in config["model"]: + model_kwargs.update(config["model"]["kwargs"]) if "input_format" in config["model"]: - model_args["input_format"] = config["model"]["input_format"] + model_kwargs["input_format"] = config["model"]["input_format"] if "output_format" in config["model"]: - model_args["output_format"] = config["model"]["output_format"] + model_kwargs["output_format"] = config["model"]["output_format"] try: - model = klass(**model_args) + model = klass(**model_kwargs) except: - logger.exception(f"Unable to load model with args: {model_args}") + logger.exception(f"Unable to load model with args: {model_kwargs}") sys.exit() elif model_class is not None: if model_kwargs: - model_args.update((model_kwargs)) + model_kwargs.update((model_kwargs)) try: - model = model_class(**model_args) + model = model_class(**model_kwargs) except: - logger.exception(f"Unable to load model with args: {model_args}") + logger.exception(f"Unable to load model with args: {model_kwargs}") sys.exit() return model