diff --git a/deepvariant/dv_config.py b/deepvariant/dv_config.py index 5cda96be..11131086 100644 --- a/deepvariant/dv_config.py +++ b/deepvariant/dv_config.py @@ -360,8 +360,9 @@ def get_config(config_name: str) -> ml_collections.ConfigDict: # Stop training when this many consecutive evaluations yield no improvement. config.early_stopping_patience = 250 - # Weight decay of optimizer + # Optimizer params config.optimizer_weight_decay = 0.0 + config.ema_momentum = 0.99 # An 'iter' refers to a group of train/tune steps run in succession. config.steps_per_iter = 128 diff --git a/deepvariant/train.py b/deepvariant/train.py index 2185c42c..d90e78e6 100644 --- a/deepvariant/train.py +++ b/deepvariant/train.py @@ -49,7 +49,7 @@ from deepvariant import keras_modeling from official.modeling import optimization -_CHECKPOINT_OPTIONS = tf.train.CheckpointOptions(enable_async=True) +_CHECKPOINT_OPTIONS = tf.train.CheckpointOptions() _LEADER = flags.DEFINE_string( 'leader', diff --git a/run-prereq.sh b/run-prereq.sh index 35389c59..177c691c 100755 --- a/run-prereq.sh +++ b/run-prereq.sh @@ -289,6 +289,15 @@ sudo -H NEEDRESTART_MODE=a apt-get install "${APT_ARGS[@]}" libssl-dev libcurl4- # for the debruijn graph sudo -H NEEDRESTART_MODE=a apt-get install "${APT_ARGS[@]}" libboost-graph-dev > /dev/null +# Pin tf-models-official back to 2.11.6 to be closer to +# ${DV_GCP_OPTIMIZED_TF_WHL_VERSION} (which is 2.11.0). +# This is to avoid the issue: +# ValueError: Addons>LAMB has already been registered to +# However, it's important that the protobuf pinning happens after this! +# TODO: Remove this later once the first dependency can be changed +# to ${DV_GCP_OPTIMIZED_TF_WHL_VERSION}. +pip3 install "${PIP_ARGS[@]}" "tf-models-official==2.11.6" + # Just being safe, pin protobuf's version one more time. pip3 install "${PIP_ARGS[@]}" 'protobuf==3.13.0'