Adding ARMA reseeding option (#958)

* add arma test to raven * change tests name * adding pk unit tests * save draft * pk possible * unit test draft working * seed fix * final check * add signal.csv * add engine test * change int into void in randomclass cpp * change signal length * revised * change segmeted gold * change npseed * change reseed copies as bool * change reseed on varma * fix varmatest * fix test * unordered xml * test more data on varma * change maxtime on varma test * change into org tests * change segmented xml tests * change segmented period
idaholab · May 3, 2019 · b7f3f4c · b7f3f4c
1 parent c493b48
commit b7f3f4c
Show file tree

Hide file tree

Showing 20 changed files with 4,324 additions and 2,083 deletions.
diff --git a/crow/include/distributions/randomClass.h b/crow/include/distributions/randomClass.h
@@ -26,11 +26,16 @@ class RandomClassImpl;
 class RandomClass {
   RandomClassImpl *_rng;
   const double _range;
+  unsigned int _counter;
+  unsigned int _seed;
 public:
   RandomClass();
   ~RandomClass();
-  void seed(unsigned int seed);
+  void seed(unsigned long int seed);
   double random();
+  int get_rng_state();
+  void forward_seed(unsigned int counts);
+  int get_rng_seed();
 };
 
 #endif /* RANDOMCLASS_H */
diff --git a/crow/src/distributions/randomClass.cxx b/crow/src/distributions/randomClass.cxx
@@ -24,16 +24,35 @@ class RandomClassImpl {
 };
 
 RandomClass::RandomClass() : _rng(new RandomClassImpl()), _range(_rng->_backend.max() - _rng->_backend.min()) {
+    _counter=0;
+    _seed=0;
 }
 
-void RandomClass::seed(unsigned int seed) {
+void RandomClass::seed(unsigned long int seed) {
+
+    _counter = 0;
+    _seed = seed;
     _rng->_backend.seed(seed);
   }
 
 double RandomClass::random() {
+    _counter++;
     return (_rng->_backend()-_rng->_backend.min())/_range;
   }
 
+int RandomClass::get_rng_state() {
+    return _counter;
+}
+
+void RandomClass::forward_seed(unsigned int counts){
+    _counter = counts;
+    _rng->_backend.discard(counts);
+}
+
+int RandomClass::get_rng_seed(){
+  return _seed;
+}
+
 RandomClass::~RandomClass(){
   delete _rng;
 }
diff --git a/framework/Models/ROM.py b/framework/Models/ROM.py
@@ -191,7 +191,7 @@ class cls. This one seems a bit excessive, are all of these for this class?
     inputSpecification.addSub(InputData.parameterInputFactory("P", InputData.IntegerType))
     inputSpecification.addSub(InputData.parameterInputFactory("Q", InputData.IntegerType))
     inputSpecification.addSub(InputData.parameterInputFactory("seed", InputData.IntegerType))
-    inputSpecification.addSub(InputData.parameterInputFactory("reseedCopies", InputData.StringType))
+    inputSpecification.addSub(InputData.parameterInputFactory("reseedCopies", InputData.BoolType))
     inputSpecification.addSub(InputData.parameterInputFactory("Fourier", contentType=InputData.FloatListType))
     inputSpecification.addSub(InputData.parameterInputFactory("preserveInputCDF", contentType=InputData.BoolType))
     ### ARMA zero filter

diff --git a/framework/SupervisedLearning/ARMA.py b/framework/SupervisedLearning/ARMA.py
@@ -29,13 +29,14 @@
 import collections
 import numpy as np
 import statsmodels.api as sm # VARMAX is in sm.tsa
+import functools
 from statsmodels.tsa.arima_model import ARMA as smARMA
 from scipy.linalg import solve_discrete_lyapunov
 from sklearn import linear_model
 #External Modules End--------------------------------------------------------------------------------
 
 #Internal Modules------------------------------------------------------------------------------------
-from utils import randomUtils, xmlUtils, mathUtils
+from utils import randomUtils, xmlUtils, mathUtils,utils
 import Distributions
 from .SupervisedLearning import supervisedLearning
 #Internal Modules End--------------------------------------------------------------------------------
@@ -73,7 +74,8 @@ def __init__(self,messageHandler,**kwargs):
     self.Q                 = kwargs.get('Q', 3) # moving average lag
     self.segments          = kwargs.get('segments', 1)
     # data manipulation
-    self.reseedCopies      = kwargs.get('reseedCopies',True)
+    reseed=kwargs.get('reseedCopies',str(True)).lower()
+    self.reseedCopies      = reseed not in utils.stringsThatMeanFalse()
     self.outTruncation = {'positive':set(),'negative':set()} # store truncation requests
     self.pivotParameterID  = kwargs['pivotParameter']
     self.pivotParameterValues = None  # In here we store the values of the pivot parameter (e.g. Time)
@@ -107,14 +109,18 @@ def __init__(self,messageHandler,**kwargs):
     self.normEngine.lowerBoundUsed = False
     self.normEngine.initializeDistribution()
 
-    # check for correlation
-    correlated = kwargs.get('correlate',None)
-    if correlated is not None:
-      # FIXME set the numpy seed
+    self.setEngine(randomUtils.newRNG(),seed=self.seed,count=0)
+
+    # FIXME set the numpy seed
       ## we have to do this because VARMA.simulate does not accept a random number generator,
       ## but instead uses numpy directly.  As a result, for now, we have to seed numpy.
       ## Because we use our RNG to set the seed, though, it should follow the global seed still.
-      self.raiseADebug('Setting Numpy seed to',self.seed)
+    self.raiseADebug('Setting ARMA seed to',self.seed)
+    randomUtils.randomSeed(self.seed,engine=self.randomEng)
+
+    # check for correlation
+    correlated = kwargs.get('correlate',None)
+    if correlated is not None:
       np.random.seed(self.seed)
       # store correlated targets
       corVars = [x.strip() for x in correlated.split(',')]
@@ -196,11 +202,10 @@ def __getstate__(self):
       @ In, None
       @ Out, d, dict, stateful dictionary
     """
-    d = copy.copy(self.__dict__)
-    # set up a seed for the next pickled iteration
-    if self.reseedCopies:
-      rand = randomUtils.randomIntegers(1,int(2**20),self)
-      d['random seed'] = rand
+    d = supervisedLearning.__getstate__(self)
+    eng=d.pop("randomEng")
+    randCounts = eng.get_rng_state()
+    d['crow_rng_counts'] = randCounts
     return d
 
   def __setstate__(self,d):
@@ -209,13 +214,12 @@ def __setstate__(self,d):
       @ In, d, dict, stateful dictionary
       @ Out, None
     """
-    seed = d.pop('random seed',None)
-    if seed is not None:
-      self.reseed(seed)
-    self.__dict__ = d
-    # set VARMA numpy seed
-    self.raiseADebug('Setting Numpy seed to',self.seed)
-    np.random.seed(self.seed)
+    rngCounts = d.pop('crow_rng_counts')
+    self.__dict__.update(d)
+    self.setEngine(randomUtils.newRNG(),seed=None,count=rngCounts)
+    if self.reseedCopies:
+      randd = np.random.randint(1,2e9)
+      self.reseed(randd)
 
   def __trainLocal__(self,featureVals,targetVals):
     """
@@ -375,7 +379,7 @@ def __evaluateLocal__(self,featureVals):
               result = self.varmaResult[1]
               sample = self._generateARMASignal(result,
                                                 numSamples = self.notZeroFilterMask.sum(),
-                                                randEngine = self.normEngine.rvs)
+                                                randEngine = self.randomEng)
               zeroedSample = np.zeros((self.notZeroFilterMask.sum(),1))
               zeroedSample[:,0] = sample
             correlatedSample = True # placeholder, signifies we've sampled the correlated distribution
@@ -408,15 +412,17 @@ def __evaluateLocal__(self,featureVals):
         if target == self.zeroFilterTarget:
           sample = self._generateARMASignal(result,
                                             numSamples = self.zeroFilterMask.sum(),
-                                            randEngine = self.normEngine.rvs)
+                                            randEngine = self.randomEng)
+
           ## if so, then expand result into signal space (functionally, put back in all the zeros)
           signal = np.zeros(len(self.pivotParameterValues))
           signal[self.zeroFilterMask] = sample
         else:
           ## if not, no extra work to be done here!
           sample = self._generateARMASignal(result,
                                             numSamples = len(self.pivotParameterValues),
-                                            randEngine = self.normEngine.rvs)
+                                            randEngine = self.randomEng)
+
           signal = sample
       # END creating base signal
       # DEBUG adding arbitrary variables for debugging, TODO find a more elegant way, leaving these here as markers
@@ -473,7 +479,8 @@ def reseed(self,seed):
       @ In, seed, int, new seed to use
       @ Out, None
     """
-    randomUtils.randomSeed(seed)
+    randomUtils.randomSeed(seed,engine=self.randomEng)
+    self.seed=seed
 
   ### UTILITY METHODS ###
   def _computeNumberOfBins(self,data):
@@ -498,7 +505,7 @@ def _denormalizeThroughCDF(self, data, params):
     denormed = self._sampleICDF(denormed, params)
     return denormed
 
-  def _generateARMASignal(self, model, numSamples=None, randEngine=None):
+  def _generateARMASignal(self, model, numSamples=None,randEngine=None):
     """
       Generates a synthetic history from fitted parameters.
       @ In, model, statsmodels.tsa.arima_model.ARMAResults, fitted ARMA such as otained from _trainARMA
@@ -508,10 +515,14 @@ def _generateARMASignal(self, model, numSamples=None, randEngine=None):
     """
     if numSamples is None:
       numSamples =  len(self.pivotParameterValues)
+    if randEngine is None:
+      randEngine=self.randomEng
     hist = sm.tsa.arma_generate_sample(ar = np.append(1., -model.arparams),
                                        ma = np.append(1., model.maparams),
                                        nsample = numSamples,
-                                       distrvs = randEngine,
+                                       distrvs = functools.partial(randomUtils.randomNormal,engine=randEngine),
+                                       # functool.partial provide the random number generator as a function
+                                       # with normal distribution and take engine as the positional arguments keywords.
                                        sigma = np.sqrt(model.sigma2),
                                        burnin = 2*max(self.P,self.Q)) # @epinas, 2018
     return hist
@@ -1129,3 +1140,19 @@ def __returnCurrentSettingLocal__(self):
     """
     pass
 
+  def setEngine(self,eng,seed=None,count=None):
+    """
+     Set up the random engine for arma
+     @ In, eng, instance, random number generator
+     @ In, seed, int, optional, the seed, if None then use the global seed from ARMA
+     @ In, count, int, optional, advances the state of the generator, if None then use the current ARMA.randomEng count
+     @ Out, None
+    """
+    if seed is None:
+      seed=self.seed
+    seed=abs(seed)
+    eng.seed(seed)
+    if count is None:
+      count=self.randomEng.get_rng_state()
+    eng.forward_seed(count)
+    self.randomEng=eng
diff --git a/framework/SupervisedLearning/SupervisedLearning.py b/framework/SupervisedLearning/SupervisedLearning.py
@@ -118,6 +118,28 @@ def __init__(self,messageHandler,**kwargs):
     #these need to be declared in the child classes!!!!
     self.amITrained         = False
 
+  def __getstate__(self):
+    """
+      This function return the state of the ROM
+      @ In, None
+      @ Out, state, dict, it contains all the information needed by the ROM to be initialized
+    """
+    state = copy.copy(self.__dict__)
+    state['initOptionDict'].pop('paramInput',None)
+    ## capture what is normally pickled
+    if not self.amITrained:
+      supervisedEngineObj = state.pop("supervisedContainer",None)
+      del supervisedEngineObj
+    return state
+
+  def __setstate__(self, d):
+    """
+      Initialize the ROM with the data contained in newstate
+      @ In, d, dict, it contains all the information needed by the ROM to be initialized
+      @ Out, None
+    """
+    self.__dict__.update(d)
+
   def initialize(self,idict):
     """
       Initialization method

diff --git a/framework/utils/randomUtils.py b/framework/utils/randomUtils.py
@@ -117,7 +117,8 @@ def randomSeed(value,seedBoth=False,engine=None):
       np.random.seed(value+1) # +1 just to prevent identical seed sets
   if stochasticEnv== 'numpy' and replaceGlobalEnv:
     npStochEnv= engine
-  print('randomUtils: Global random number seed has been changed to',value)
+  if replaceGlobalEnv:
+    print('randomUtils: Global random number seed has been changed to',value)
 
 def random(dim=1,samples=1,keepMatrix=False,engine=None):
   """

diff --git a/rook/XMLDiff.py b/rook/XMLDiff.py
@@ -204,7 +204,11 @@ def fail_message(*args):
   note = ''
   for unmatched, close in matchvals.items():
     #print the path without a match
-    note += 'No match for '+'/'.join(list(m.tag for m in unmatched))+'\n'
+    path = '/'.join(list(m.tag for m in unmatched))
+    note += 'No match for gold node {}\n'.format(path)
+    note += '               tag: {}\n'.format(unmatched[-1].tag)
+    note += '              attr: {}\n'.format(unmatched[-1].attrib)
+    note += '              text: {}\n'.format(unmatched[-1].text)
     #print the tree of the nearest match
     note += '  Nearest unused match: '
     close = sorted(list(close.items()), key=lambda x: x[1], reverse=True)

diff --git a/tests/framework/ROM/TimeSeries/ARMA/gold/PreserveCdf/input_durs_0.csv b/tests/framework/ROM/TimeSeries/ARMA/gold/PreserveCdf/input_durs_0.csv
@@ -1,11 +1,11 @@
 bins_Signal,counts_Signal
-14.283117998,3
-11.2435880846,12
-8.20405817128,27
-5.16452825794,43
+14.28311799796,3
+11.243588084619999,12
+8.204058171279998,27
+5.164528257939997,43
 2.1249983446,55
--0.91453156874,65
--3.95406148208,73
--6.99359139542,90
--10.0331213088,95
--13.0726512221,100
+-0.9145315687400011,65
+-3.954061482080002,73
+-6.993591395420001,90
+-10.033121308760002,95
+-13.072651222100001,100
diff --git a/tests/framework/ROM/TimeSeries/ARMA/gold/PreserveCdf/metric.csv b/tests/framework/ROM/TimeSeries/ARMA/gold/PreserveCdf/metric.csv
@@ -1,2 +1,2 @@
 metric_counts_Gold_counts_Signal
-0.82
+0.6300000000000001
diff --git a/tests/framework/ROM/TimeSeries/ARMA/gold/Segmented/rommeta.csv b/tests/framework/ROM/TimeSeries/ARMA/gold/Segmented/rommeta.csv
@@ -0,0 +1,54 @@
+RAVEN_sample_ID,segment_number,seg_index_start,seg_index_end,seg_Time_start,seg_Time_end
+0,0,0,167,0.0,601200.0
+0,1,168,335,604800.0,1206000.0
+0,2,336,503,1209600.0,1810800.0
+0,3,504,671,1814400.0,2415600.0
+0,4,672,839,2419200.0,3020400.0
+0,5,840,1007,3024000.0,3625200.0
+0,6,1008,1175,3628800.0,4230000.0
+0,7,1176,1343,4233600.0,4834800.0
+0,8,1344,1511,4838400.0,5439600.0
+0,9,1512,1679,5443200.0,6044400.0
+0,10,1680,1847,6048000.0,6649200.0
+0,11,1848,2015,6652800.0,7254000.0
+0,12,2016,2183,7257600.0,7858800.0
+0,13,2184,2351,7862400.0,8463600.0
+0,14,2352,2519,8467200.0,9068400.0
+0,15,2520,2687,9072000.0,9673200.0
+0,16,2688,2855,9676800.0,10278000.0
+0,17,2856,3023,10281600.0,10882800.0
+0,18,3024,3191,10886400.0,11487600.0
+0,19,3192,3359,11491200.0,12092400.0
+0,20,3360,3527,12096000.0,12697200.0
+0,21,3528,3695,12700800.0,13302000.0
+0,22,3696,3863,13305600.0,13906800.0
+0,23,3864,4031,13910400.0,14511600.0
+0,24,4032,4199,14515200.0,15116400.0
+0,25,4200,4367,15120000.0,15721200.0
+0,26,4368,4535,15724800.0,16326000.0
+0,27,4536,4703,16329600.0,16930800.0
+0,28,4704,4871,16934400.0,17535600.0
+0,29,4872,5039,17539200.0,18140400.0
+0,30,5040,5207,18144000.0,18745200.0
+0,31,5208,5375,18748800.0,19350000.0
+0,32,5376,5543,19353600.0,19954800.0
+0,33,5544,5711,19958400.0,20559600.0
+0,34,5712,5879,20563200.0,21164400.0
+0,35,5880,6047,21168000.0,21769200.0
+0,36,6048,6215,21772800.0,22374000.0
+0,37,6216,6383,22377600.0,22978800.0
+0,38,6384,6551,22982400.0,23583600.0
+0,39,6552,6719,23587200.0,24188400.0
+0,40,6720,6887,24192000.0,24793200.0
+0,41,6888,7055,24796800.0,25398000.0
+0,42,7056,7223,25401600.0,26002800.0
+0,43,7224,7391,26006400.0,26607600.0
+0,44,7392,7559,26611200.0,27212400.0
+0,45,7560,7727,27216000.0,27817200.0
+0,46,7728,7895,27820800.0,28422000.0
+0,47,7896,8063,28425600.0,29026800.0
+0,48,8064,8231,29030400.0,29631600.0
+0,49,8232,8399,29635200.0,30236400.0
+0,50,8400,8567,30240000.0,30841200.0
+0,51,8568,8735,30844800.0,31446000.0
+0,52,8736,8759,31449600.0,31532400.0