Added verbose

relund · Apr 29, 2015 · 2dd9451 · 2dd9451
1 parent dc8638c
commit 2dd9451
Show file tree

Hide file tree

Showing 8 changed files with 179 additions and 46 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ export(loadMDP)
 export(plotHypergraph)
 export(policyIteAve)
 export(policyIteDiscount)
+export(randomHMDP)
 export(removeAction)
 export(resetActions)
 export(setActionWeight)

diff --git a/R/randomHMDP.R b/R/randomHMDP.R
@@ -0,0 +1,85 @@
+#' Generate a "random" HMDP stored in a set of binary files.
+#'
+#' @param prefix A character string with the prefix added to til file(s).
+#' @param levels Number of levels.
+#' @param timeHorizon The time horizon for each level (vector). For the founder the timehorizon can be Inf.
+#' @param states Number of states at each stage at a given level (vector of length levels)
+#' @param actions Min and max number of actions at a state.
+#' @param childProcessPr Probability of creating a child process when define action.
+#' @param rewards Min and max reward used.
+#' @param durations Min and max duration used.
+#'
+#' @return NULL
+#'
+#' @author Lars Relund \email{lars@@relund.dk}
+#'
+#' @export
+randomHMDP<-function(prefix="", levels=3, timeHorizon=c(Inf,3,4), states=c(2,4,5), actions=c(1,2),
+                     childProcessPr = 0.5, rewards=c(0,100), durations=c(1,10) ) 
+{
+
+   # gen finite timehorizon process
+   genProcess<-function(levels, timeHorizon, states, actions, childProcessPr, rewards, durations, statesFather) {
+      w$process()
+         for(l1 in 1:timeHorizon[1]-1 ) {
+            w$stage()
+               for (s1 in 1:states[1]-1) {
+                  w$state(s1)
+                     aSize = sample(actions[1]:actions[2],1)
+                     for (a1 in 1:aSize-1) {
+                        if (levels>1) isChild = rbinom(1,1,childProcessPr)==1 else isChild = FALSE
+                        if (isChild) idx<-sample(1:states[2]-1,states[2]/2) else idx<-sample(1:states[1]-1,states[1]/2)
+                        pr<-rep(1/length(idx),length(idx))
+                        if (isChild) scp<-rep(2,length(idx)) else scp<-rep(1,length(idx))
+                        w$action(label=a1, weights=c(sample(rewards[1]:rewards[2],1), sample(durations[1]:durations[2],1)), prob = as.vector( t(matrix(c(scp,idx,pr), ncol=3)) ))
+                           if (isChild) genProcess(levels-1, timeHorizon[2:length(timeHorizon)], states[2:length(states)], actions, childProcessPr, rewards, durations, states[1])
+                        w$endAction()
+                     }
+                  w$endState()
+               }
+            w$endStage()
+         }
+         w$stage()   # last stage
+            for (s1 in 1:states[1]-1) {
+               w$state(s1)
+                  idx<-sample(1:statesFather-1,statesFather/2)
+                  pr<-rep(1/length(idx),length(idx))
+                  scp<-rep(0,length(idx))
+                  w$action(label=a1, weights=c(sample(rewards[1]:rewards[2],1), sample(durations[1]:durations[2],1)), 
+                           prob = as.vector( t(matrix(c(scp,idx,pr), ncol=3)) ))
+                  w$endAction()
+               w$endState()
+            }
+         w$endStage()
+      w$endProcess()
+   }
+
+   w<-binaryMDPWriter(prefix)
+   w$setWeights(c("Reward","Duration"))
+   if (!is.infinite(timeHorizon[1])) genProcess(levels, timeHorizon, states, actions, childProcessPr, rewards, durations)
+   else {
+      w$process()
+         w$stage()
+            for (s1 in 1:states[1]) {
+               w$state(s1)
+                  aSize = sample(actions[1]:actions[2],1)
+                  for (a1 in 1:aSize-1) {
+                     if (levels>1) isChild = rbinom(1,1,childProcessPr)==1 else isChild = FALSE
+                     if (isChild) idx<-sample(1:states[2]-1,states[2]/2) else idx<-sample(1:states[1]-1,states[1]/2) 
+                     pr<-rep(1/length(idx),length(idx))
+                     if (isChild) scp<-rep(2,length(idx)) else scp<-rep(1,length(idx))
+                     #print(as.vector( t(matrix(c(scp,idx,pr), ncol=3)) ))
+                     w$action(label=a1, weights=c(sample(rewards[1]:rewards[2],1), sample(durations[1]:durations[2],1)), 
+                              prob = as.vector( t(matrix(c(scp,idx,pr), ncol=3)) ))
+                        if (isChild) genProcess(levels-1, timeHorizon[2:length(timeHorizon)], 
+                                                states[2:length(states)], actions, childProcessPr, rewards, durations, states[1])
+                     w$endAction()
+                  }
+
+               w$endState()
+            }
+         w$endStage()
+      w$endProcess()
+   }
+   w$closeWriter()
+}
diff --git a/man/MDP.Rd b/man/MDP.Rd
@@ -8,15 +8,9 @@
 \description{
 Create and optimize MDPs or hierarchical MDPs with discrete time steps and state space.
 }
-\section{History}{
-
-}
-
 \section{To do}{
 
 
-Dynamic hypergraph data type
-
 Nested loading in memory (specify a HMDP with special actions containing child + father jump actions)
 Idea when define the proc with an external nested process use
 w$includeProcess(prefix, transPr, index) (specify the child jump action)
@@ -30,28 +24,6 @@ w$endIncludeProcess()
 
 The hgf then is formed with a subprocess mimic the 1. and last stage of the external proc, i.e. we include the jump pr in the hgf
 We need a new binary file "externalProcess.bin" for storing the nested process in the format "n0 s0 a0 n1 s1 prefix -1 ..." which specify which stage contain the states corresponding to the 1. stage of the nested process (it is here the nested hfg must be loaded and calculated
-
-
-
-Change precision when reading trans pr integers from hgf file. Infact better to change the loading procedure!
-
-getActionXX must be changed since is not precise enough (read numbers from a text string).
-
-For function getPolicy add labels.actions = T and labels.states = T and getW = T
-
-Specifiy how to calculate the discount factor (discrete or continious)
-
-Split prob into 3 values when define the MDP
-
-Update policy ite such that can start with a specified policy.
-
-Index must start from 1 (R style).
-
-MDPtoolbox style loading of model.
-
-Possiblity to specify a model without a duration weight.
-
-Value iteration under ave criterion
 }
 \author{
 Lars Relund \email{lars@relund.dk}

diff --git a/man/binaryMDPWriter.Rd b/man/binaryMDPWriter.Rd
@@ -23,22 +23,49 @@ Binary files are efficent for storing large models. Compared to the HMP (XML)
 format the binary files use less storage space and loading the model is faster.
 }
 \details{
-The functions which can be used are: \itemize{
+The functions which can be used are:
+\itemize{
   \item{\code{setWeights(labels, ...)}: }{Set the labels of the weights used in the actions.
-     \code{labels} is a vector of label names, \code{...} are not used.
-     The function must be called before starting building the model.}
-  \item{\code{process()}: }{Starts a (sub)process.}
-  \item{\code{endProcess()}: }{Ends a (sub)process.}
-  \item{\code{stage(label=NULL)}: }{Starts a stage. Currently \code{label} are not used in the binary format.}
-  \item{\code{endStage()}: }{Ends a (sub)process.}
-  \item{\code{state(label=NULL)}: }{Starts a state. Returns (invisible) the states index number sIdx.}
-  \item{\code{endState()}: }{Ends a stage.}
-  \item{\code{action(label=NULL, weights, prob, ...)}: }{Starts an action. Parameter \code{weights} must be a vector of action weights,
-     \code{prob} must contain triples of (scope,idx,pr) (see the description of actionIdx.bin below), \code{...} is currently not used.}
-  \item{\code{endAction()}: }{Ends an action.}
-  \item{\code{closeWriter()}: }{Close the writer. Must be called when the model description has finished.}}
-
-Seven binary files are created using the following format:\itemize{
+  \code{labels} is a vector of label names, \code{...} are not used. The function must be called
+  before starting building the model.}
+
+\item{\code{process()}: }{Starts a (sub)process.}
+
+\item{\code{endProcess()}: }{Ends a (sub)process.}
+
+\item{\code{stage(label=NULL)}: }{Starts a stage. Currently \code{label} are not used in the
+  binary format.}
+
+\item{\code{endStage()}: }{Ends a (sub)process.}
+
+\item{\code{state(label=NULL)}: }{Starts a state. Returns (invisible) the states index number
+  sIdx.}
+
+\item{\code{endState()}: }{Ends a stage.}
+
+\item{\code{action(label=NULL, weights, prob, ...)}: }{Starts an action. Parameter
+  \code{weights} must be a vector of action weights, \code{prob} must contain triples of
+  (scope,idx,pr) (see the description of actionIdx.bin below), \code{...} is currently not used.}
+
+\item{\code{endAction()}: }{Ends an action.}
+
+\item{\code{includeProcess(prefix, label=NULL, weights, prob, termStates)}: }{Include an
+  external process. External processes will only be loaded in memory when needed. That is,
+  external processes is usefull when considering large models and have problems with memory.
+  Parameter \code{prefix} is the prefix of the external process. The next parameters specify the
+  child jump action to the process, i.e. \code{weights} must be a vector of action weights,
+  \code{prob} must contain triples of (scope,idx,pr) (see the description of actionIdx.bin
+  below), Finally \code{termStates} must specify the number of states at the last stage in the
+  external process. Note that inside an \code{includeProcess ... endIncludeProcess} you must
+  specify the father jump actions of the last stage in the external process. An external process
+  is represented using its first and last stage, together with its jump actions.}
+
+\item{\code{endIncludeProcess()}: }{Ends an includeProcess.}
+
+\item{\code{closeWriter()}: }{Close the writer. Must be called when the model description has
+  finished.}}
+
+Eight binary files are created using the following format:\itemize{
 \item{stateIdx.bin: }{File of integers containing the indexes defining all states in the format
 "n0 s0 -1 n0 s0 a0 n1 s1 -1 n0 s0 a0 n1 s1 a1 n2 s2 -1 n0 s0 ...". Here -1 is
 used to indicate that a new state is considered (new line).}
@@ -70,7 +97,11 @@ actions in actionIdx.bin. The format is "p1 p2 p3 -1 p1 -1 p1 p2 -1 ...". Here -
 indicate that a new action is considered (new line).} \item{externalProcesses.bin: }{File of
 characters containing links to the external processes. The format is "n0 s0 prefix -1 n0 s0 a0 n1
 s1 prefix -1 ...". Here -1 is used to indicate that a new external process is considered for the
-stage defined by the indexes.}}
+stage defined by the indexes.}
+\item{externalProcesses.bin: }{File of characters in the format "stageStr prefix stageStr prefix
+..." Here stageStr corresponds to the index (e.g. n0 s0 a0 n1) of the stage corresponding to the
+first stage in the external process and prefix to the prefix of the external process. Note no
+delimiter is used.}}
 }
 \note{
 Note all indexes are starting from zero (C/C++ style).

diff --git a/man/loadMDP.Rd b/man/loadMDP.Rd
@@ -18,6 +18,8 @@ files storing the model.}
 \item{eps}{The sum of the transition probabilities must at most differ eps from one.}
 
 \item{check}{Check if the MDP seems correct.}
+
+\item{verbose}{More output when running algorithms.}
 }
 \value{
 A list containing relevant information about the model and a pointer \code{ptr} to the model object in memory.

diff --git a/man/randomHMDP.Rd b/man/randomHMDP.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2 (4.1.0): do not edit by hand
+% Please edit documentation in R/randomHMDP.R
+\name{randomHMDP}
+\alias{randomHMDP}
+\title{Generate a "random" HMDP stored in a set of binary files.}
+\usage{
+randomHMDP(prefix = "", levels = 3, timeHorizon = c(Inf, 3, 4),
+  states = c(2, 4, 5), actions = c(1, 2), childProcessPr = 0.5,
+  rewards = c(0, 100), durations = c(1, 10))
+}
+\arguments{
+\item{prefix}{A character string with the prefix added to til file(s).}
+
+\item{levels}{Number of levels.}
+
+\item{timeHorizon}{The time horizon for each level (vector). For the founder the timehorizon can be Inf.}
+
+\item{states}{Number of states at each stage at a given level (vector of length levels)}
+
+\item{actions}{Min and max number of actions at a state.}
+
+\item{childProcessPr}{Probability of creating a child process when define action.}
+
+\item{rewards}{Min and max reward used.}
+
+\item{durations}{Min and max duration used.}
+}
+\description{
+Generate a "random" HMDP stored in a set of binary files.
+}
+\author{
+Lars Relund \email{lars@relund.dk}
+}
+
diff --git a/man/valueIte.Rd b/man/valueIte.Rd
@@ -5,7 +5,7 @@
 \title{Perform value iteration on the MDP.}
 \usage{
 valueIte(mdp, w, dur = NULL, rate = 0.1, rateBase = 1, times = 10,
-  eps = 1e-05, termValues = NULL)
+  eps = 1e-05, termValues = NULL, g = NULL)
 }
 \arguments{
 \item{mdp}{The MDP loaded using \link{loadMDP}.}
@@ -23,6 +23,8 @@ valueIte(mdp, w, dur = NULL, rate = 0.1, rateBase = 1, times = 10,
 \item{eps}{Stopping criterion. If max(w(t)-w(t+1))<epsilon then stop the algorithm, i.e the policy becomes epsilon optimal (see [1] p161).}
 
 \item{termValues}{The terminal values used (values of the last stage in the MDP).}
+
+\item{g}{Average reward. If specified then do a single iteration using the opdate equations under average reward criterion with the specified \code g value.}
 }
 \value{
 NULL (invisible)

diff --git a/src/hmdp.cc b/src/hmdp.cc
@@ -524,12 +524,15 @@ void HMDP::ValueIteInfDiscount(uInt times, flt epsilon, idx idxW, idx idxDur,
 	// find founder states at stage zero and last stage
 	pairZero = stages.equal_range("0");
 	pairLast = stages.equal_range("1");
-	if (iniValues.size()!=stages.count("1"))
+	if (iniValues.size()!=stages.count("1")) {
         log << "Error initial values vector does not have the same size as the states that must be assigned the values!\n";
+        return;
+	}
 	for (ite=pairLast.first, iteV=iniValues.begin(); ite!=pairLast.second; ++ite, ++iteV) // set last to zero
 		H.itsNodes[(ite->second)+1].SetW(idxW,*(iteV));
 
 	for (i=1; i<=times; ++i) {
+        //cout << "Ite: " << i << endl;
 		HT.CalcHTacyclic(H,idxW,idxPred,idxMult,idxDur,rate,rateBase);
 		if(MaxDiffFounder(idxW,pairZero,pairLast)<epsilon) break;
 		if (i<times) {    // set next stage to stage zero values
@@ -810,11 +813,14 @@ void HMDP::PolicyIteDiscount(const idx idxW, const idx idxDur, const flt &rate,
 			firstRun = false;
 			SetR(r,idxW,pairZero);
 		}
+		//cout << "r mat: " << r << endl;
 		FounderPrDiscount(P,idxW,idxDur,rate,rateBase,pairZero,pairLast);
+		//cout << "P mat: " << P << endl;
 		// Now solve equations w = r + Pw -> (I-P)w = r
 		matAlg.IMinusP(P);
 		if (matAlg.LASolve(P,w,r)) {log << " Error: can not solve system equations. Is the model fulfilling the model assumptions (e.g. unichain)? " << endl; break;}
 		//cout << "r=" << endl << r << endl << "P=" << endl << P << endl << "w=" << endl << w << endl;
+		//cout << "w mat: " << w << endl;
 		for (ite=pairLast.first, i=0; ite!=pairLast.second; ++ite, ++i) // set last to w values
 			H.itsNodes[(ite->second)+1].SetW(idxW,w(i,0));
 		//if (k==10) break;