From 1deab967c5d6db4817fa9c4aa8694daf00e9c9d1 Mon Sep 17 00:00:00 2001 From: Ian Goodfellow Date: Sat, 2 Nov 2013 08:40:04 -0400 Subject: [PATCH] config --- sample_prop/agent_0.yaml | 77 +++++++++++++++++++++++++++++++ sample_prop/sgd_mnist_9r.yaml | 48 +++++++++++++++++++ sample_prop/sgd_mnist_Lr.yaml | 51 ++++++++++++++++++++ sample_prop/sgd_mnist_new_0.yaml | 49 ++++++++++++++++++++ sample_prop/sgd_mnist_new_1.yaml | 60 ++++++++++++++++++++++++ sample_prop/sgd_mnist_new_2.yaml | 62 +++++++++++++++++++++++++ sample_prop/sgd_mnist_reboot.yaml | 63 +++++++++++++++++++++++++ 7 files changed, 410 insertions(+) create mode 100644 sample_prop/agent_0.yaml create mode 100644 sample_prop/sgd_mnist_9r.yaml create mode 100644 sample_prop/sgd_mnist_Lr.yaml create mode 100644 sample_prop/sgd_mnist_new_0.yaml create mode 100644 sample_prop/sgd_mnist_new_1.yaml create mode 100644 sample_prop/sgd_mnist_new_2.yaml create mode 100644 sample_prop/sgd_mnist_reboot.yaml diff --git a/sample_prop/agent_0.yaml b/sample_prop/agent_0.yaml new file mode 100644 index 00000000..d95279dc --- /dev/null +++ b/sample_prop/agent_0.yaml @@ -0,0 +1,77 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.agent.AgentHive1 { + input_space: !obj:pylearn2.space.VectorSpace { dim: 784 }, + layers: [ + !obj:galatea.sample_prop.agent.LinearAgents { + layer_name: 'h0', + irange: .05, + #max_col_norm: 10., + dim: 500 + }, + !obj:galatea.sample_prop.agent.LinearAgents { + layer_name: 'h1', + irange: .05, + #max_col_norm: 10., + dim: 500 + }, + !obj:pylearn2.models.mlp.Softmax { + layer_name: 'y', + irange: 0.05, + n_classes: 10, + max_col_norm: 10 + } + ] + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 100, + set_batch_size: 1, + learning_rate: .005, + init_momentum: .5, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 50000, + stop: 60000 + } + }, + cost : !obj:galatea.sample_prop.agent.AgentHive1Cost1 { + flip_prob: .01, + }, + termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased + { + channel_name: "valid_y_misclass", + N: 100, + prop_decrease: 0. + } + }, + extensions: [ + !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { + start: 0, + saturate: 200, + final_momentum: .9 + }, + #!obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { + # start: 1, + # saturate: 527, + # decay_factor: 0.006308 + #}, + !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { + channel_name: "valid_y_misclass", + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" + }, + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq : 1 +} + diff --git a/sample_prop/sgd_mnist_9r.yaml b/sample_prop/sgd_mnist_9r.yaml new file mode 100644 index 00000000..f1502dfb --- /dev/null +++ b/sample_prop/sgd_mnist_9r.yaml @@ -0,0 +1,48 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.basic.SimpleModel { + nvis: 784, + num_hid: 500, + num_class: 10 + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 1000, + set_batch_size: 1, + learning_rate: 1e-1, + init_momentum: .5, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 50000, + stop: 60000 + } + }, + cost : !obj:galatea.sample_prop.basic.SamplingCost { + weight_decay_1: .000001, + weight_decay_2: .000001 + }, + }, + extensions: [ !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { + start: 0, + saturate: 200, + final_momentum: .99 + }, + !obj:pylearn2.training_algorithms.sgd.OneOverEpoch { + start: 100, + half_life: 10 + } + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq : 1 +} + diff --git a/sample_prop/sgd_mnist_Lr.yaml b/sample_prop/sgd_mnist_Lr.yaml new file mode 100644 index 00000000..c3ed88c5 --- /dev/null +++ b/sample_prop/sgd_mnist_Lr.yaml @@ -0,0 +1,51 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.basic.SimpleModel2 { + nvis: 784, + num_hid: 500, + num_hid_2: 500, + num_class: 10 + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 100, + set_batch_size: 1, + learning_rate: 5e-2, + init_momentum: .5, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 50000, + stop: 60000 + } + }, + cost : !obj:galatea.sample_prop.basic.SamplingCost3 { + }, + }, + extensions: [ !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { + start: 0, + saturate: 200, + final_momentum: .9 + }, + #!obj:pylearn2.training_algorithms.sgd.OneOverEpoch { + # start: 100, + # half_life: 5 + #}, + !obj:pylearn2.training_algorithms.sgd.PolyakAveraging + { + start: 100 + } + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq : 100 +} + diff --git a/sample_prop/sgd_mnist_new_0.yaml b/sample_prop/sgd_mnist_new_0.yaml new file mode 100644 index 00000000..541296e1 --- /dev/null +++ b/sample_prop/sgd_mnist_new_0.yaml @@ -0,0 +1,49 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.basic.SimpleModel2 { + nvis: 784, + num_hid: 500, + num_hid_2: 500, + num_class: 10, + y_max_col_norm: 10 + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 100, + set_batch_size: 1, + learning_rate: 5e-2, + init_momentum: .5, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 50000, + stop: 60000 + } + }, + cost : !obj:galatea.sample_prop.basic.SamplingCost3 { + }, + }, + extensions: [ + !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { + start: 0, + saturate: 200, + final_momentum: .9 + }, + #!obj:pylearn2.training_algorithms.sgd.OneOverEpoch { + # start: 100, + # half_life: 5 + #}, + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq : 100 +} + diff --git a/sample_prop/sgd_mnist_new_1.yaml b/sample_prop/sgd_mnist_new_1.yaml new file mode 100644 index 00000000..fd94f687 --- /dev/null +++ b/sample_prop/sgd_mnist_new_1.yaml @@ -0,0 +1,60 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.basic.SimpleModel2 { + nvis: 784, + num_hid: 500, + num_hid_2: 500, + num_class: 10, + y_max_col_norm: 10 + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 100, + set_batch_size: 1, + learning_rate: 5e-2, + init_momentum: .5, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 50000, + stop: 60000 + } + }, + cost : !obj:galatea.sample_prop.basic.SamplingCost3 { + }, + termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased + { + channel_name: "valid_y_misclass", + N: 100, + prop_decrease: 0. + } + }, + extensions: [ + !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { + start: 0, + saturate: 200, + final_momentum: .9 + }, + !obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { + start: 1, + saturate: 527, + decay_factor: 0.006308 + }, + !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { + channel_name: "valid_y_misclass", + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" + }, + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq : 100 +} + diff --git a/sample_prop/sgd_mnist_new_2.yaml b/sample_prop/sgd_mnist_new_2.yaml new file mode 100644 index 00000000..61e288c4 --- /dev/null +++ b/sample_prop/sgd_mnist_new_2.yaml @@ -0,0 +1,62 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.basic.SimpleModel2 { + nvis: 784, + num_hid: 500, + num_hid_2: 500, + num_class: 10, + #h0_max_col_norm: 3., + #h1_max_col_norm: 2., + y_max_col_norm: 10 + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 100, + set_batch_size: 1, + learning_rate: .05, + init_momentum: .5, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: "train", + binarize: 1, + one_hot: 1, + start: 50000, + stop: 60000 + } + }, + cost : !obj:galatea.sample_prop.basic.SamplingCost3 { + }, + termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased + { + channel_name: "valid_y_misclass", + N: 100, + prop_decrease: 0. + } + }, + extensions: [ + !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor { + start: 0, + saturate: 200, + final_momentum: .9 + }, + #!obj:pylearn2.training_algorithms.sgd.LinearDecayOverEpoch { + # start: 1, + # saturate: 527, + # decay_factor: 0.006308 + #}, + !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { + channel_name: "valid_y_misclass", + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" + }, + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq : 100 +} + diff --git a/sample_prop/sgd_mnist_reboot.yaml b/sample_prop/sgd_mnist_reboot.yaml new file mode 100644 index 00000000..e6a62e37 --- /dev/null +++ b/sample_prop/sgd_mnist_reboot.yaml @@ -0,0 +1,63 @@ +!obj:pylearn2.train.Train { + dataset: &train !obj:pylearn2.datasets.mnist.MNIST { + which_set: 'train', + one_hot: 1, + start: 0, + stop: 50000 + }, + model: !obj:galatea.sample_prop.basic.SimpleModel2 { + # convert to irange .005 for all 3 layers + nvis: 784, + num_hid: 1200, + num_hid_2: 1200, + num_class: 10, + h0_max_col_norm: 1.9, + h1_max_col_norm: 1.9, + y_max_col_norm: 1.9 + }, + algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { + batch_size: 100, + learning_rate: .1, + learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum { + init_momentum: .5, + }, + monitoring_dataset: + { + 'train' : *train, + 'valid' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: 'train', + one_hot: 1, + start: 50000, + stop: 60000 + }, + 'test' : !obj:pylearn2.datasets.mnist.MNIST { + which_set: 'test', + one_hot: 1, + } + }, + cost : !obj:galatea.sample_prop.basic.SamplingCost3 { + }, + termination_criterion: !obj:pylearn2.termination_criteria.MonitorBased { + channel_name: "valid_y_misclass", + prop_decrease: 0., + N: 100 + }, + update_callbacks: !obj:pylearn2.training_algorithms.sgd.ExponentialDecay { + decay_factor: 1.000004, + min_lr: .000001 + } + }, + extensions: [ + !obj:pylearn2.train_extensions.best_params.MonitorBasedSaveBest { + channel_name: 'valid_y_misclass', + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}_best.pkl" + }, + !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor { + start: 1, + saturate: 250, + final_momentum: .7 + } + ], + save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl", + save_freq: 1 +}