This repository has been archived by the owner on Apr 2, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 232
/
experiment.py
98 lines (84 loc) · 3.25 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Configures RunConfig."""
import logging
import math
import os
from . import inputs
import tensorflow as tf
import tensorflow_model_analysis as tfma
def create_run_config(args):
"""Create a tf.estimator.RunConfig object.
Args:
args: experiment parameters.
"""
# Configure the distribution strategy if GPUs available.
distribution_strategy = None
# Get the available GPU devices
num_gpus = len(tf.config.list_physical_devices("GPU"))
logging.info("%s GPUs are available.", str(num_gpus))
if num_gpus > 1:
distribution_strategy = tf.distribute.MirroredStrategy()
logging.info("MirroredStrategy will be used for training.")
# Update the batch size
args.batch_size = int(math.ceil(args.batch_size / num_gpus))
# Create RunConfig
return tf.estimator.RunConfig(
tf_random_seed=19831006,
log_step_count_steps=100,
model_dir=args.job_dir,
save_checkpoints_secs=args.eval_frequency_secs,
keep_checkpoint_max=3,
train_distribute=distribution_strategy,
eval_distribute=distribution_strategy)
def run(estimator, args):
"""Train, evaluate, and export the model for serving and evaluating.
Args:
estimator: TensorFlow Estimator.
args: experiment parameters.
"""
# Create TrainSpec
train_spec = tf.estimator.TrainSpec(
input_fn=inputs.make_input_fn(
file_pattern=args.train_files,
mode=tf.estimator.ModeKeys.TRAIN,
batch_size=args.batch_size,
has_header=True),
max_steps=int(args.train_steps))
# Create exporter for a serving model
exporter = tf.estimator.FinalExporter(
"estimate",
inputs.SERVING_INPUT_RECEIVER_FUNCTIONS[args.serving_export_format])
# Create EvalSpec
eval_spec = tf.estimator.EvalSpec(
input_fn=inputs.make_input_fn(
file_pattern=args.eval_files,
mode=tf.estimator.ModeKeys.EVAL,
batch_size=args.batch_size),
steps=args.eval_steps,
exporters=[exporter],
start_delay_secs=0,
throttle_secs=0)
# Train and evaluate
logging.info("Training and evaluating...")
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
# Exporting the model for the TensorFlow Model Analysis
if estimator.config.is_chief:
logging.info("Exporting the model for evaluation...")
tfma.export.export_eval_savedmodel(
estimator=estimator,
export_dir_base=os.path.join(estimator.model_dir, "export/evaluate"),
eval_input_receiver_fn=inputs.EVALUATING_INPUT_RECEIVER_FUNCTIONS[
args.eval_export_format])