diff --git a/README.md b/README.md
index 9828715..adce894 100644
--- a/README.md
+++ b/README.md
@@ -1,69 +1,26 @@
-# Brut
+# Brut-v1.1
 
-This repository contains the code and manuscript text used in the paper
+This repository contains an updated version of Brut (https://github.com/ChrisBeaumont/brut) used in the paper
 
-*The Milky Way Project: Leveraging Citizen Science and Machine Learning to Detect Interstellar Bubbles. Beaumont, Goodman, Kendrew, Williams, Simpson 2014, ApJS, in press ([arXiv link](http://arxiv.org/abs/1406.2692))*
+Assessing the Performance of a Machine Learning Algorithm in Identifying Bubbles in Dust Emission, ApJ in press ([arXiv link](https://arxiv.org/abs/1711.03480))* 
 
-The `v1` tag represents the state of the code at the time of publication.
+We make slight changes on the modules that Brut import. The current version Brut can be successfully run with the following libraries
 
-Data associated with this project is also archived at [The Dataverse](http://thedata.harvard.edu/dvn/dv/brut) (doi:10.7910/DVN/26463)
+* astropy '2.0.2'
+* h5py '2.7.0'
+* matplotlib '2.0.2'
+* numpy '1.13.3'
+* scipy '1.0.0'
+* skimage '0.13.0'
+* sklearn '0.19.1'
+* cloud '2.8.5'
 
-## High level summary
+We update the retrained model in models/ directory.
 
-Brut uses a database of known bubbles (from the [Milky Way Project](http://www.milkywayproject.org/)) and Spitzer images from our galaxy to build an automatic bubble classifier. The classifier is based on the Random Forest algorithm, and uses the [WiseRF](http://docs.wise.io/wiserf_python.html) implementation of this algorithm.
-
-The main question that Brut attempts to answer is "does this image contain a bubble?" The images presented to Brut are 2-color square postage stamps extracted from 8 and 24 micron Spitzer images of the Galactic plane.
-
-The [picloud](http://www.picloud.com/) platform was used to perform some of the computation in parallel, in the cloud.
-
-If you want to dig into the details of how the model is built, start with the Makefile in the scripts/ directory.
 
 ## Organization
 
-### bubbly/
-Contains the python library used to fit Random Forest classification models to Spitzer images
-
-### figures/
-Contains code to generate figures in the paper
-
-### notebooks/
-Contains several IPython notebooks in various states of organization -- some are polished documents describing aspects of the analysis, others are temporary workbooks.
-
-### paper/
-Contains the manuscript text itself
-
-### scripts/
-Python scripts to fit models and generate other derived data products
-
-
-## Reproduction
-
-This repository is MIT Licensed.
-
-To reproduce the figures and models generated for the paper, type:
-
-```
-python setup.py develop
-cd bubbly/data && make
-cd ../../paper && make
-```
-
-Though I promise you you'll have to play with dependencies to get this all set up :)
-
-## Dependencies
-
-Brut is built on top of several python libraries, and uses data from the GLIMPSE and MIPSGAL surveys from the Spitzer Space Telescope. You'll need the following libraries
-
-* aplpy
-* astropy
-* h5py
-* IPython
-* matplotlib
-* numpy
-* scipy
-* skimage
-* sklearn
-* picloud
-* WiseRF
+### models/
+Contains the original training model and the model retrained on synthetic images and the orignial traning set.
+The synthetic bubble images can be found here (https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/OSMNDG).
 
-In addition, you need to download the GLIMPSE and MIPSGAL mosaic data. The Makefile inside bubbly/data does this.
diff --git a/bubbly/extractors.py b/bubbly/extractors.py
index b6f93cc..8bc4493 100644
--- a/bubbly/extractors.py
+++ b/bubbly/extractors.py
@@ -7,7 +7,7 @@
 import numpy as np
 from scipy.optimize import fmin_l_bfgs_b as minimize
 from skimage.morphology import disk
-from skimage.filter.rank import percentile_autolevel
+from skimage.filters.rank import autolevel_percentile
 from skimage.feature import daisy
 
 from .field import get_field
@@ -60,6 +60,25 @@ def extract(self, lon, l, b, r, **kwargs):
         rgb = self._preprocess_rgb(rgb)
         return self._extract_rgb(rgb)
 
+        
+    def extract_xd(self, lon, l, b, r, **kwargs):
+        kwargs.setdefault('limits', [1, 97])
+        kwargs.setdefault('shp', self.shp)
+#        kwargs.setdefault('i3', True)
+
+        rgb = get_field(lon).extract_stamp(l, b, r, **kwargs)
+
+        if rgb is None:
+            raise ValueError("Field is out of bounds")
+        elif (rgb[:, :, 1] == 0).mean() > 0.1:
+            raise ValueError("Field has no green channel")
+
+#        rgb = self._preprocess_rgb(rgb)
+        rgb = self._preprocess_rgb(rgb)
+#        return self._extract_rgb(rgb)
+        return rgb
+        
+        
     def _extract_rgb(self, rgb):
         raise NotImplementedError()
 
@@ -182,7 +201,22 @@ def _prepare_templates(self, shp):
         ts *= (s / 2) / 20
         self.rings = np.column_stack(np.exp(-(r - rr) ** 2 / tt ** 2).ravel()
                                      for rr, tt in product(rs, ts))
+        
+    def _normal_templates_coeff(self, shp):
+        if self._template_shp == shp:
+            return
+        self._template_shp = shp
+
+        s = shp[0]
+        y, x = np.mgrid[0:s, 0:s].astype(np.float)
+        r = np.hypot(y - s / 2, x - s / 2)
 
+        rs = np.linspace(1., s / 2, 7)
+        ts = np.array([2, 4, 6, 8, 10, 15, 20]).astype(np.float)
+        ts *= (s / 2) / 20
+        return np.column_stack(np.exp(-(r - rr) ** 2 / tt ** 2).ravel()
+                                     for rr, tt in product(rs, ts))  
+        
 
     def _extract_rgb(self, rgb):
         self._prepare_templates(rgb.shape)
@@ -197,6 +231,8 @@ def _extract_rgb(self, rgb):
                             np.dot(rnorm, self.rings),
                             np.dot(gnorm, self.rings),
                             np.dot(rnorm - gnorm, self.rings)])
+#        result = np.hstack([np.dot(r, self.rings),
+#                            np.dot(rnorm, self.rings)])
         return result.reshape(1, -1)
 
 
@@ -204,6 +240,9 @@ class DaisyExtractor(Extractor):
     def _extract_rgb(self, rgb):
         kwargs = dict(step=rgb.shape[0]/5, radius=rgb.shape[0] / 10, rings=2,
                       histograms=6, orientations=8)
+                
+        self.daisyextractor_xd=np.hstack(daisy(rgb[:, :, i], **kwargs).ravel() for i in [0, 1])
+
         return np.hstack(daisy(rgb[:, :, i], **kwargs).ravel() for i in [0, 1])
 
 
@@ -212,12 +251,49 @@ def __init__(self, orig):
         self.orig = orig
 
     def extract(self, lon, l, b, r):
+        
         return np.hstack((self.orig.extract(lon, l, b, r),
                           self.orig.extract(lon, l, b, r / 2),
                           self.orig.extract(lon, l, b, r * 2),
                           self.orig.extract(lon, l, b + r / 2, r)))
 
-
+    def extract_xd_mve(self, lon, l, b, r):
+        
+        return self.orig.extract_xd(lon, l, b, r)
+
+    def extract_xd_daisy(self, lon, l, b, r):
+        methodname = '_extract_rgb'       
+        a = DaisyExtractor()
+        method_1 = getattr(a, methodname)
+        return method_1(self.orig.extract_xd(lon, l, b, r))
+    
+    def extract_xd_ring(self, lon, l, b, r):
+        methodname = '_extract_rgb'       
+        a = RingExtractor()
+        method_1 = getattr(a, methodname)
+        return method_1(self.orig.extract_xd(lon, l, b, r))
+    
+    def extract_xd_compression(self, lon, l, b, r):
+        methodname = '_extract_rgb'       
+        a = CompressionExtractor()
+        method_1 = getattr(a, methodname)
+        return method_1(self.orig.extract_xd(lon, l, b, r))
+    
+    def extract_xd_wavelet(self, lon, l, b, r):
+        methodname = '_extract_rgb'       
+        a = MultiWaveletExtractor()
+        method_1 = getattr(a, methodname)
+        return method_1(self.orig.extract_xd(lon, l, b, r))
+    
+    def extract_xd_ring_templates(self, lon, l, b, r):        
+        methodname = '_normal_templates_coeff'       
+        a = RingExtractor()
+        method_1 = getattr(a, methodname)
+        return method_1(self.orig.extract_xd(lon, l, b, r).shape)
+    
+
+
+        
 class CompositeExtractor(Extractor):
     composite_classes = []
 
@@ -258,5 +334,5 @@ def enhance_contrast(rgb):
     s = rgb.shape
     d = disk(s[0] / 5)
     for i in range(3):
-        rgb[:, :, i] = percentile_autolevel(rgb[:, :, i], d, p0=.1, p1=.9)
+        rgb[:, :, i] = autolevel_percentile(rgb[:, :, i], d, p0=.1, p1=.9)
     return rgb
diff --git a/bubbly/util.py b/bubbly/util.py
index ae9aac6..6bd5760 100644
--- a/bubbly/util.py
+++ b/bubbly/util.py
@@ -3,7 +3,7 @@
 import logging
 
 from skimage.transform import resize
-from sklearn.metrics import recall_score, auc_score
+from sklearn.metrics import recall_score, roc_auc_score
 import numpy as np
 
 
@@ -56,9 +56,14 @@ def scale(x, mask=None, limits=None):
     if mask is None:
         lo, hi = np.percentile(x, limits)
     else:
-        lo, hi = np.percentile(x[mask], limits)
-
+        if x[mask].size>0:
+#            print x[mask].shape
+            lo, hi = np.percentile(x[mask], limits)
+        else:
+            lo,hi=0,0
+        
     x = (np.clip(x, lo, hi) - lo) / (hi - lo)
+#    return  x[mask].shape
     return (np.sqrt(x) * 255).astype(np.uint8)
 
 
@@ -67,7 +72,7 @@ def resample(arr, shape):
     # skimage's resize needs scaled data
     lo, hi = np.nanmin(arr), np.nanmax(arr)
     arr = (arr - lo) / (hi - lo)
-    result = resize(arr, shape, mode='nearest')
+    result = resize(arr, shape, mode='edge')
     return result * (hi - lo) + lo
 
 
diff --git a/bubbly/wiserf.py b/bubbly/wiserf.py
index 4d94420..f3cbbaa 100644
--- a/bubbly/wiserf.py
+++ b/bubbly/wiserf.py
@@ -1,6 +1,7 @@
 import os
 
-import PyWiseRF
+#import PyWiseRF
+from sklearn.ensemble import RandomForestClassifier
 import cloud
 import numpy as np
 
@@ -14,7 +15,12 @@ def test():
     clf = WiseRF().fit(x, y)
     return clf
 
-class WiseRF(PyWiseRF.WiseRF):
+#class WiseRF(PyWiseRF.WiseRF):
+#    def decision_function(self, x):
+#        p = self.predict_proba(x)
+#        return p[:, 1] - p[:, 0]
+
+class WiseRF(RandomForestClassifier):
     def decision_function(self, x):
         p = self.predict_proba(x)
         return p[:, 1] - p[:, 0]
diff --git a/models/README.md b/models/README.md
index d59a1fd..12eaefd 100644
--- a/models/README.md
+++ b/models/README.md
@@ -1,5 +1,14 @@
 # Models
 
-These files contain model input and output data. They are included in
-this repository, but are all generated from other scripts in the
-``scripts/`` and ``notebooks/`` directory.
\ No newline at end of file
+These files contain model input and output data. 
+
+* "full_classifier_retrain_xd_all_gini_noise_0722.dat": retrained on the synthetic bubbles with and without noise and the original training set from MWP
+* "full_classifier_retrain_xd_all_nonnoise_gini_0722.dat": retrained on the synthetic bubbles without noise and the original training set from MWP
+* "full_classifier_xd_only_sim_non_noi_1102.dat": retrained on the synthetic bubbles without noise
+* "full_classifier_xd_only_simulation_1029.dat": retrained on the synthetic bubbles with and without noise 
+* "full_classifier_xd_reduceMWP_simulation_1025.dat": retrained on the synthetic bubbles with and without noise and half of the original training set from MWP
+* "full_classifier.dat": original training
+
+
+
+
diff --git a/models/full_classifier_retrain_xd_all_gini_noise_0722.dat b/models/full_classifier_retrain_xd_all_gini_noise_0722.dat
new file mode 100644
index 0000000..c04a8e3
Binary files /dev/null and b/models/full_classifier_retrain_xd_all_gini_noise_0722.dat differ
diff --git a/models/full_classifier_retrain_xd_all_nonnoise_gini_0722.dat b/models/full_classifier_retrain_xd_all_nonnoise_gini_0722.dat
new file mode 100644
index 0000000..18275bf
Binary files /dev/null and b/models/full_classifier_retrain_xd_all_nonnoise_gini_0722.dat differ
diff --git a/models/full_classifier_xd_only_sim_non_noi_1102.dat b/models/full_classifier_xd_only_sim_non_noi_1102.dat
new file mode 100644
index 0000000..788c668
Binary files /dev/null and b/models/full_classifier_xd_only_sim_non_noi_1102.dat differ
diff --git a/models/full_classifier_xd_only_simulation_1029.dat b/models/full_classifier_xd_only_simulation_1029.dat
new file mode 100644
index 0000000..8db9ce1
Binary files /dev/null and b/models/full_classifier_xd_only_simulation_1029.dat differ
diff --git a/models/full_classifier_xd_reduceMWP_simulation_1025.dat b/models/full_classifier_xd_reduceMWP_simulation_1025.dat
new file mode 100644
index 0000000..2e985d9
Binary files /dev/null and b/models/full_classifier_xd_reduceMWP_simulation_1025.dat differ
diff --git a/scripts/build_full_classifier.py b/scripts/build_full_classifier.py
index 7d4a647..f4016bf 100644
--- a/scripts/build_full_classifier.py
+++ b/scripts/build_full_classifier.py
@@ -1,35 +1,119 @@
 import json
 import cPickle as pickle
+import numpy as np
 
 from bubbly.model import Model, ModelGroup
 from bubbly.extractors import MultiViewExtractor, ManyManyExtractors
-from bubbly.dr1 import WideLocationGenerator
+from bubbly.dr1 import WideLocationGenerator,LocationGenerator
 from bubbly.wiserf import WiseRF
+#from sklearn.ensemble import RandomForestClassifier
 
-
+def add_traningset_1(data,lon):
+    for ctt_l in range(10):
+            for ctt_b in range(4):
+                data['pos'].append([lon, lon%360-0.95+ctt_l*0.1, (ctt_b-1)*0.1, 0.046])
+    return data
+            
+def add_traningset_2(data,lon):
+    for ctt_l in range(10):
+            for ctt_b in range(4):
+                data['pos'].append([lon, lon%360-0.95+ctt_l*0.1, (ctt_b-1)*0.1, 0.038])
+    return data
+    
+def add_traningset_neg(data,lon):
+    for ctt_l in range(20):
+        for ctt_b in range(8):
+            data['neg'].append([lon, lon%360-0.95+ctt_l*0.1, (ctt_b-3.5)*0.1, 0.046])     
+    return data
+    
+                
 def make_model(mod3):
     params = {'max_features': 'auto',
-              'min_samples_split': 4,
               'n_jobs': 2,
-              'criterion': 'infogain',
+              'min_samples_split': 4,
+#              'criterion': 'infogain',
+              'criterion': 'gini',    ###  entropy
+#              'criterion': 'entropy',    ###  
               'n_estimators': 800}
     ex = MultiViewExtractor(ManyManyExtractors())
     loc = WideLocationGenerator(mod3)
+#    clf = RandomForestClassifier(**params)
     clf = WiseRF(**params)
     return Model(ex, loc, clf)
 
 
 def train_model(model, mod3):
-    data = json.load(open('../models/training_data_%i.json' % mod3))
+#    data = json.load(open('../models/training_data_%i.json' % mod3))
+    data = json.load(open('../models/training_dataxdno_%i.json' % mod3))
+#    data = json.load(open('../models/training_dataxd_%i.json' % mod3))
+
+
+#    if mod3==0:
+#        for lon_all in np.array([71,82,74,76])+360:
+#            data=add_traningset_1(data,np.int(lon_all))
+#        for lon_all in np.array([73,85,77])+360:
+#            data=add_traningset_2(data,np.int(lon_all))
+#        data=add_traningset_neg(data,82)
+#    if mod3==1:
+#        for lon_all in np.array([71,72,74,86])+360:
+#            data=add_traningset_1(data,np.int(lon_all))
+#        for lon_all in np.array([83,75,77])+360:
+#            data=add_traningset_2(data,np.int(lon_all))    
+#        data=add_traningset_neg(data,83)
+#    if mod3==2:
+#        for lon_all in np.array([81,72,84,76])+360:
+#            data=add_traningset_1(data,np.int(lon_all))
+#        for lon_all in np.array([73,75,87])+360:
+#            data=add_traningset_2(data,np.int(lon_all))  
+#        data=add_traningset_neg(data,82)
+        
+    if mod3==0:
+        for lon_all in np.array([71,82,74,76,121,112,124,116])+360:
+#        for lon_all in np.array([121,112,124,116])+360:
+            data=add_traningset_1(data,np.int(lon_all))
+        for lon_all in np.array([73,85,77,113,115,127])+360:
+#        for lon_all in np.array([113,115,127])+360:
+            data=add_traningset_2(data,np.int(lon_all))
+        data=add_traningset_neg(data,82)
+    if mod3==1:
+        for lon_all in np.array([71,72,74,86,111,122,114,116])+360:
+#        for lon_all in np.array([111,122,114,116])+360:
+            data=add_traningset_1(data,np.int(lon_all))
+        for lon_all in np.array([83,75,77,113,125,117])+360:
+#        for lon_all in np.array([113,125,117])+360:
+            data=add_traningset_2(data,np.int(lon_all))    
+        data=add_traningset_neg(data,83)
+    if mod3==2:
+        for lon_all in np.array([81,72,84,76,111,112,114,126])+360:
+#        for lon_all in np.array([111,112,114,126])+360:
+            data=add_traningset_1(data,np.int(lon_all))
+        for lon_all in np.array([73,75,87,123,115,117])+360:
+#        for lon_all in np.array([123,115,117])+360:
+            data=add_traningset_2(data,np.int(lon_all))  
+        data=add_traningset_neg(data,82)
+         
+    
     model.fit(data['pos'], data['neg'])
     return model
 
-
+#"""
 def main():
 
     models = [train_model(make_model(i), i) for i in [0, 1, 2]]
+#    models = [train_model(make_model(i), i) for i in [0]]
     mg = ModelGroup(*models)
-    mg.save('../models/full_classifier.dat')
+#    mg.save('../models/full_classifier_retrain_xd_all_0417_noise.dat')
+#    mg.save('../models/full_classifier_retrain_xd_all_entropy_0430.dat')
+#    mg.save('../models/full_classifier_retrain_xd_all_gini_0528.dat')
+#    mg.save('../models/full_classifier_xd_entropy_0528.dat')
+#    mg.save('../models/full_classifier_xd_reduceMWP_simulation_1025.dat')
+#    mg.save('../models/full_classifier_xd_only_simulation_1029.dat')
+#    mg.save('../models/full_classifier_xd_retrain_noise_1030.dat')
+    mg.save('../models/full_classifier_xd_only_sim_non_noi_1102.dat')
+#    mg.save('../models/full_classifier_retrain_xd_all_gini_0528.dat')
+#    mg.save('../models/full_classifier_xd_all_entropy_0430.dat')
+#    mg.save('../models/full_classifier_xd_all_0417.dat')
 
 if __name__ == "__main__":
     main()
+