From 2453f6bf408cbe820d48c79bb91310e8adebce14 Mon Sep 17 00:00:00 2001
From: Ian Goodfellow <goodfellow.ian@gmail.com>
Date: Wed, 13 Jul 2011 11:47:51 -0400
Subject: [PATCH] various stuff

---
 energy_functions/energy_function.py           |   2 +-
 energy_functions/scratch.py                   |   8 +
 exploring_estimation_criteria/cos_plot.py     | 200 +++++++++---------
 .../cos_reconsE_nce.yaml                      |   1 +
 .../cos_reconsE_sm.yaml                       |   2 +-
 .../cos_reconsE_smd.yaml                      |  20 +-
 .../experiment_a.yaml                         |   3 +-
 models/febm.py                                |   8 +
 models/local_noise_ebm.py                     |   5 +-
 recons_srbm/make_dataset_2M.py                |   6 +-
 10 files changed, 132 insertions(+), 123 deletions(-)

diff --git a/energy_functions/energy_function.py b/energy_functions/energy_function.py
index 2ab11fce..bbf2d39f 100644
--- a/energy_functions/energy_function.py
+++ b/energy_functions/energy_function.py
@@ -11,7 +11,7 @@ def score(self, X):
 
         X_name = 'X' if X.name is None else X.name
 
-        E = self(X)
+        E = self.free_energy(X)
 
         #There should be one energy value for each example in the batch
         assert len(E.type.broadcastable) == 1
diff --git a/energy_functions/scratch.py b/energy_functions/scratch.py
index ad2b592c..eaf84f9a 100644
--- a/energy_functions/scratch.py
+++ b/energy_functions/scratch.py
@@ -59,6 +59,14 @@ def redo_everything(self):
         #
     #
 
+    def get_weights_format(self):
+        return ['v','h']
+    #
+
+    def get_weights(self, borrow = False):
+        return self.W.get_value(borrow = borrow)
+    #
+
     def encode(self, X):
         X_name = 'X' if X.name is None else X.name
         H = T.nnet.sigmoid(T.dot(X, self.W) + self.bias_hid)
diff --git a/exploring_estimation_criteria/cos_plot.py b/exploring_estimation_criteria/cos_plot.py
index f73fde55..dae9cf35 100644
--- a/exploring_estimation_criteria/cos_plot.py
+++ b/exploring_estimation_criteria/cos_plot.py
@@ -1,6 +1,11 @@
 import numpy as N
 from theano import config
 floatX = config.floatX
+from pylearn2.datasets.cos_dataset import CosDataset
+from pylearn2.gui.graph_2D import Graph2D, HeatMap
+import theano.tensor as T
+from theano import function
+from theano.printing import Print
 
 rows = 500
 cols  = 1000
@@ -8,22 +13,21 @@
 xmin = -6.2
 xmax = 6.2
 
-ymin = -3.3
-ymax = 3.3
-
-from pylearn2.config import yaml_parse
 import sys
 from pylearn2.utils import serial
 
 model = serial.load(sys.argv[1])
 model.redo_theano()
-dataset = yaml_parse.load(model.dataset_yaml_src)
+dataset = CosDataset()
 
+print 'examples seen: '+str(model.examples_seen)
 
 PDF = 0    #pdf
 GPDF = 1   #gradient of pdf
 GDPDF = 2  #gradient direction of pdf
 ENERGY = 3 #energy
+SCORE = 4  #score
+SCORED = 5 #score direction
 
 if len(sys.argv) == 2 or sys.argv[2] == '--pdf':
     t = PDF
@@ -33,127 +37,113 @@
     t = GDPDF
 elif sys.argv[2] == '--energy':
     t = ENERGY
+elif sys.argv[2] == '--score':
+    t = SCORE
+elif sys.argv[2] == '--scored':
+    t = SCORED
+else:
+    raise ValueError('unknown flag '+sys.argv[2])
 
+g = Graph2D( shape = (rows, cols), xlim=(xmin,xmax), ycenter = 0. )
 
 
-def make_img(f):
-    rval = N.zeros((rows,cols,1))
-
-    for i in xrange(cols):
-        print 'col '+str(i)
-        x = xmin + (xmax-xmin)*float(i)/float(cols-1)
-        ys = [ymin+(ymax-ymin)*float(j)/float(rows-1) for j in xrange(rows) ]
-        assert len(ys) == rows
-
-        xs = N.zeros((rows,1))+x
-        assert xs.shape[0] == rows
-        assert xs.shape[1] == 1
-
-        ys = N.asarray(ys)
-        ys = N.asarray([ys]).T
-        assert ys.shape[0] == rows
-        assert ys.shape[1] == 1
-
-        mat = N.hstack( (xs,ys) )
-
-        mat = N.cast[floatX](mat)
-
-        assert mat.shape[0] == rows
-        assert mat.shape[1] == 2
-
-        mf = f(mat)
-
-        assert mf.shape[0] == 500
-        assert len(mf.shape) == 1
+def energy_normalizer(I):
+    I -= I.min()
+    I /= I.max()
+    return  (I*2.0)-1.0
 
-        #try:
-        rval[:,i,0] = mf
-        #except ValueError, e:
-        #    print rval[:,i,0].shape
-        #    print f(mat).shape
-        #    raise e
+def pdf_normalizer(I):
+    assert I.min() >= 0.0
+    I /= I.max()
 
-    assert not N.any(N.isinf(rval))
-    assert not N.any(N.isnan(rval))
+    for i in xrange(I.shape[0]):
+        for j in xrange(I.shape[1]):
+            if N.any(N.isnan(I[i,j,:]) + N.isinf(I[i,j,:])):
+                I[i,j,0] = 1.0
+                I[i,j,1] = 0.0
+                I[i,j,2] = 0.0
 
-    #rval -= rval.mean()
-    #return rval
+    return (I*2.0)-1.0
 
-    if t == ENERGY:
-        rval -= rval.min()
-        rval /= rval.max()
-        return rval
+def gpdf_normalizer(I):
+    I /= N.abs(I).max()
+    return I
 
-    assert rval.min() >= 0.0
-    mx = rval.max()
+def gdpdf_normalizer(I):
+    return I
 
-    rval /= mx
-    rval *= 2.0
-    rval -= 1.0
+X = T.matrix()
 
+class FuckYouTheano:
+    def __init__(self, join_should_be_able_to_do_this):
+        self.f = join_should_be_able_to_do_this
 
-    if t == PDF:
+    def __call__(self, X):
+        rval = N.zeros((X.shape[0],3),dtype=floatX)
+        rval[:,0:2] = self.f(X)
         return rval
 
-    rval2 = N.zeros((rval.shape[0]-1,rval.shape[1]-1,3))
-
-    for i in xrange(rval2.shape[0]):
-        for j in xrange(rval2.shape[1]):
-            #rval2[i,j,0] = N.sign(rval[i+1,j,0]-rval[i,j,0])
-            #rval2[i,j,1] = N.sign(rval[i,j+1,0]-rval[i,j,0])
-
-            rval2[i,j,0] = rval[i+1,j,0]-rval[i,j,0]
-            rval2[i,j,1] = rval[i,j+1,0]-rval[i,j,0]
-            if t == GDPDF:
-                rval2[i,j,:] /= (1e-15+N.sqrt((rval2[i,j,:] ** 2.).sum()))
-            #
-        #
-    #
-
-    if t == GPDF:
-        rval2 /= N.abs(rval2).max()
-
+def grad_func( pdf ):
+    grad = T.grad(pdf.sum(), X)
+    return FuckYouTheano(function([X],grad))
 
-    return rval2
+def grad_dir_func( pdf ):
+    grad = T.grad(pdf.sum(), X)
+    grad = Print('before',attrs=['min','max'])(grad)
+    grad /= T.sqrt(1e-15+T.sum(T.sqr(grad),axis=1).dimshuffle(0,'x'))
+    grad = Print('after',attrs=['min','max'])(grad)
+    return FuckYouTheano(function([X],grad))
 
-def pdf(mat):
-    rval = model.E_X_batch_func(mat)
-    #assert not N.any(N.isinf(rval))
-    #assert not N.any(N.isnan(rval))
-    assert rval.shape[0] == mat.shape[0]
-    assert len(rval.shape) == 1
 
-    #print (rval.min(),rval.max())
+if t not in [ENERGY, SCORE, SCORED]:
+    g.components.append(HeatMap( f = function([X], model.free_energy(X)),  normalizer = None ))
+    offset = g.render().mean()
+#
 
-    if t != ENERGY:
-        rval = N.exp(-rval)
-    #assert not N.any(N.isinf(rval))
-    #assert not N.any(N.isnan(rval))
-
-    return rval
+if t == ENERGY:
+    df = dataset.free_energy_func
+    mfe = model.free_energy(X)
+    mf = function([X],mfe)
+    normalizer = energy_normalizer
+elif t == PDF:
+    df = dataset.pdf_func
+
+    mfe = model.free_energy(X)
+    mfe = Print('model free energy',attrs=['min','max'])(mfe)
+    mf = function([X], T.exp(-mfe+offset))
+
+    normalizer = pdf_normalizer
+elif t == GPDF:
+    df = grad_func(dataset.pdf(X))
+    mf = grad_func(T.exp(-model.free_energy(X)+offset))
+
+    normalizer = gpdf_normalizer
+elif t == GDPDF:
+    df = grad_dir_func(dataset.pdf(X))
+    mf = grad_dir_func(T.exp(-model.free_energy(X)+offset))
+
+    normalizer = gdpdf_normalizer
+elif t == SCORE:
+    df = grad_func(- dataset.free_energy(X))
+    mf = grad_func( - model.free_energy(X))
+
+    normalizer = gpdf_normalizer
+elif t == SCORED:
+    df = grad_dir_func(- dataset.free_energy(X))
+    mf = grad_dir_func( - model.free_energy(X))
+
+    normalizer = gdpdf_normalizer
+else:
+    assert False
 
 
+g.components.append(HeatMap(f = df, normalizer = normalizer))
+dimg = g.render()
 
-print 'making dataset image'
-if t == ENERGY:
-    dimg = make_img(dataset.energy)
-else:
-    dimg = make_img(dataset.pdf)
-print 'making model image'
-mimg = make_img(pdf)
-
-mn = min([dimg.min(),mimg.min()])
-dimg -= mn
-mimg -= mn
-mx = max([dimg.max(),mimg.max()])
-dimg /= mx
-mimg /= mx
-
-dimg *= 2.
-dimg -= 1.
-mimg *= 2.
-mimg -= 1.
+g.components.pop()
+g.components.append(HeatMap(f = mf, normalizer = normalizer))
 
+mimg = g.render()
 
 from pylearn2.gui.patch_viewer import PatchViewer
 
diff --git a/exploring_estimation_criteria/cos_reconsE_nce.yaml b/exploring_estimation_criteria/cos_reconsE_nce.yaml
index 29501cbd..168769f9 100644
--- a/exploring_estimation_criteria/cos_reconsE_nce.yaml
+++ b/exploring_estimation_criteria/cos_reconsE_nce.yaml
@@ -4,6 +4,7 @@
     "model": !obj:pylearn2.models.normalized_ebm.NormalizedEBM {
                 "init_logZ" : 0.,
                 "learn_logZ" : 1,
+                "logZ_lr_scale" : .001,
                 "ebm": !obj:galatea.models.febm.FEBM {
                 "energy_function": !obj:galatea.energy_functions.scratch.recons_model_1 {
                         "nvis" : 2,
diff --git a/exploring_estimation_criteria/cos_reconsE_sm.yaml b/exploring_estimation_criteria/cos_reconsE_sm.yaml
index 532877c2..72acb9b1 100644
--- a/exploring_estimation_criteria/cos_reconsE_sm.yaml
+++ b/exploring_estimation_criteria/cos_reconsE_sm.yaml
@@ -16,7 +16,7 @@
                         }
         },
     "algorithm": !obj:pylearn2.training_algorithms.sgd.SGD {
-               "learning_rate" : .02,
+               "learning_rate" : .002,
                "batch_size" : 5,
                "batches_per_iter" : 1000,
                "monitoring_batches" : 10,
diff --git a/exploring_estimation_criteria/cos_reconsE_smd.yaml b/exploring_estimation_criteria/cos_reconsE_smd.yaml
index ffbf3a1e..d7f59e66 100644
--- a/exploring_estimation_criteria/cos_reconsE_smd.yaml
+++ b/exploring_estimation_criteria/cos_reconsE_smd.yaml
@@ -5,29 +5,29 @@
                 "energy_function": !obj:galatea.energy_functions.scratch.recons_model_1 {
                         "nvis" : 2,
                         "nhid" : 400,
-                        "irange"  : .5,
-                        "init_bias_hid" : 0.0,
+                        "irange"  : 1.8,
+                        "init_bias_hid" : 0.,
 
-                        "init_vis_prec" : 5.,
-                        "vis_prec_lr_scale" : .1,
+                        "init_vis_prec" : 1.,
+                        "vis_prec_lr_scale" : .001,
                         "learn_vis_prec" : 1.,
 
-                        "init_delta" : -0.5
+                        "init_delta" : 0.0
                         }
         },
     "algorithm": !obj:pylearn2.training_algorithms.sgd.SGD {
                "learning_rate" : .0002,
-               "batch_size" : 5,
-               "batches_per_iter" : 1000,
-               "monitoring_batches" : 10,
+               "batch_size" : 50,
+               "batches_per_iter" : 10000,
+               "monitoring_batches" : 100,
                "monitoring_dataset" : !obj:pylearn2.datasets.cos_dataset.CosDataset {},
                "cost" : !obj:pylearn2.costs.ebm_estimation.SMD {
                         "corruptor": !obj:pylearn2.corruption.GaussianCorruptor {
-                                "stdev": .01
+                                "stdev": .1
                         }
                }
         },
-    "save_path": "cos_recons_E_smd.pkl"
+    "save_path": "cos_reconsE_smd.pkl"
 }
 
 
diff --git a/exploring_estimation_criteria/experiment_a.yaml b/exploring_estimation_criteria/experiment_a.yaml
index 5f203cf6..c2cc5ff9 100644
--- a/exploring_estimation_criteria/experiment_a.yaml
+++ b/exploring_estimation_criteria/experiment_a.yaml
@@ -1,5 +1,4 @@
-#this learns a model with the right gradient direction everywhere, but totally wrong global structure
-
+#like snapshot 1, but with few hidden units, to see what happens less capacity
 !obj:pylearn2.scripts.train.Train {
     "dataset": !obj:pylearn2.datasets.cos_dataset.CosDataset {},
     "model": !obj:galatea.models.local_noise_ebm.LocalNoiseEBM {
diff --git a/models/febm.py b/models/febm.py
index 1b24c734..7b415cff 100644
--- a/models/febm.py
+++ b/models/febm.py
@@ -26,6 +26,14 @@ def get_params(self):
         return self.energy_function.get_params()
     #
 
+    def get_weights(self):
+        return self.energy_function.get_weights()
+    #
+
+    def get_weights_format(self):
+        return self.energy_function.get_weights_format()
+    #
+
     def redo_theano(self):
         X = T.matrix()
 
diff --git a/models/local_noise_ebm.py b/models/local_noise_ebm.py
index 7e06a7ff..628ff433 100644
--- a/models/local_noise_ebm.py
+++ b/models/local_noise_ebm.py
@@ -171,6 +171,9 @@ def p_h_given_v(self, V):
 
         return T.nnet.sigmoid(self.b + T.dot(V,self.W))
 
+    def free_energy(self, V):
+        return self.batch_free_energy(V)
+
     def batch_free_energy(self, V):
 
         if self.energy_function == 'gaussian-binary rbm':
@@ -279,7 +282,7 @@ def redo_theano(self):
         #corrupted = corrupted * norm_ratio_shuffled
         #corrupted.name = 'postnorm_corrupted'
 
-
+        print "NOT USING NORM RESCALING"
 
         self.corruption_func = function([X],corrupted)
 
diff --git a/recons_srbm/make_dataset_2M.py b/recons_srbm/make_dataset_2M.py
index 639b86ff..aa1ff704 100644
--- a/recons_srbm/make_dataset_2M.py
+++ b/recons_srbm/make_dataset_2M.py
@@ -1,6 +1,6 @@
-from framework.utils import serial
-from framework.datasets import cifar10
-from framework.datasets import preprocessing
+from pylearn2.utils import serial
+from pylearn2.datasets import cifar10
+from pylearn2.datasets import preprocessing
 
 train = cifar10.CIFAR10(which_set="train")