diff --git a/NAMESPACE b/NAMESPACE index cda0714..2c58555 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ export(loadMDP) export(plotHypergraph) export(policyIteAve) export(policyIteDiscount) +export(randomHMDP) export(removeAction) export(resetActions) export(setActionWeight) diff --git a/R/randomHMDP.R b/R/randomHMDP.R new file mode 100644 index 0000000..a35f6ae --- /dev/null +++ b/R/randomHMDP.R @@ -0,0 +1,85 @@ +#' Generate a "random" HMDP stored in a set of binary files. +#' +#' @param prefix A character string with the prefix added to til file(s). +#' @param levels Number of levels. +#' @param timeHorizon The time horizon for each level (vector). For the founder the timehorizon can be Inf. +#' @param states Number of states at each stage at a given level (vector of length levels) +#' @param actions Min and max number of actions at a state. +#' @param childProcessPr Probability of creating a child process when define action. +#' @param rewards Min and max reward used. +#' @param durations Min and max duration used. +#' +#' @return NULL +#' +#' @author Lars Relund \email{lars@@relund.dk} +#' +#' @export +randomHMDP<-function(prefix="", levels=3, timeHorizon=c(Inf,3,4), states=c(2,4,5), actions=c(1,2), + childProcessPr = 0.5, rewards=c(0,100), durations=c(1,10) ) +{ + + # gen finite timehorizon process + genProcess<-function(levels, timeHorizon, states, actions, childProcessPr, rewards, durations, statesFather) { + w$process() + for(l1 in 1:timeHorizon[1]-1 ) { + w$stage() + for (s1 in 1:states[1]-1) { + w$state(s1) + aSize = sample(actions[1]:actions[2],1) + for (a1 in 1:aSize-1) { + if (levels>1) isChild = rbinom(1,1,childProcessPr)==1 else isChild = FALSE + if (isChild) idx<-sample(1:states[2]-1,states[2]/2) else idx<-sample(1:states[1]-1,states[1]/2) + pr<-rep(1/length(idx),length(idx)) + if (isChild) scp<-rep(2,length(idx)) else scp<-rep(1,length(idx)) + w$action(label=a1, weights=c(sample(rewards[1]:rewards[2],1), sample(durations[1]:durations[2],1)), prob = as.vector( t(matrix(c(scp,idx,pr), ncol=3)) )) + if (isChild) genProcess(levels-1, timeHorizon[2:length(timeHorizon)], states[2:length(states)], actions, childProcessPr, rewards, durations, states[1]) + w$endAction() + } + w$endState() + } + w$endStage() + } + w$stage() # last stage + for (s1 in 1:states[1]-1) { + w$state(s1) + idx<-sample(1:statesFather-1,statesFather/2) + pr<-rep(1/length(idx),length(idx)) + scp<-rep(0,length(idx)) + w$action(label=a1, weights=c(sample(rewards[1]:rewards[2],1), sample(durations[1]:durations[2],1)), + prob = as.vector( t(matrix(c(scp,idx,pr), ncol=3)) )) + w$endAction() + w$endState() + } + w$endStage() + w$endProcess() + } + + w<-binaryMDPWriter(prefix) + w$setWeights(c("Reward","Duration")) + if (!is.infinite(timeHorizon[1])) genProcess(levels, timeHorizon, states, actions, childProcessPr, rewards, durations) + else { + w$process() + w$stage() + for (s1 in 1:states[1]) { + w$state(s1) + aSize = sample(actions[1]:actions[2],1) + for (a1 in 1:aSize-1) { + if (levels>1) isChild = rbinom(1,1,childProcessPr)==1 else isChild = FALSE + if (isChild) idx<-sample(1:states[2]-1,states[2]/2) else idx<-sample(1:states[1]-1,states[1]/2) + pr<-rep(1/length(idx),length(idx)) + if (isChild) scp<-rep(2,length(idx)) else scp<-rep(1,length(idx)) + #print(as.vector( t(matrix(c(scp,idx,pr), ncol=3)) )) + w$action(label=a1, weights=c(sample(rewards[1]:rewards[2],1), sample(durations[1]:durations[2],1)), + prob = as.vector( t(matrix(c(scp,idx,pr), ncol=3)) )) + if (isChild) genProcess(levels-1, timeHorizon[2:length(timeHorizon)], + states[2:length(states)], actions, childProcessPr, rewards, durations, states[1]) + w$endAction() + } + + w$endState() + } + w$endStage() + w$endProcess() + } + w$closeWriter() +} \ No newline at end of file diff --git a/man/MDP.Rd b/man/MDP.Rd index 6837343..d44b8cf 100644 --- a/man/MDP.Rd +++ b/man/MDP.Rd @@ -8,15 +8,9 @@ \description{ Create and optimize MDPs or hierarchical MDPs with discrete time steps and state space. } -\section{History}{ - -} - \section{To do}{ -Dynamic hypergraph data type - Nested loading in memory (specify a HMDP with special actions containing child + father jump actions) Idea when define the proc with an external nested process use w$includeProcess(prefix, transPr, index) (specify the child jump action) @@ -30,28 +24,6 @@ w$endIncludeProcess() The hgf then is formed with a subprocess mimic the 1. and last stage of the external proc, i.e. we include the jump pr in the hgf We need a new binary file "externalProcess.bin" for storing the nested process in the format "n0 s0 a0 n1 s1 prefix -1 ..." which specify which stage contain the states corresponding to the 1. stage of the nested process (it is here the nested hfg must be loaded and calculated - - - -Change precision when reading trans pr integers from hgf file. Infact better to change the loading procedure! - -getActionXX must be changed since is not precise enough (read numbers from a text string). - -For function getPolicy add labels.actions = T and labels.states = T and getW = T - -Specifiy how to calculate the discount factor (discrete or continious) - -Split prob into 3 values when define the MDP - -Update policy ite such that can start with a specified policy. - -Index must start from 1 (R style). - -MDPtoolbox style loading of model. - -Possiblity to specify a model without a duration weight. - -Value iteration under ave criterion } \author{ Lars Relund \email{lars@relund.dk} diff --git a/man/binaryMDPWriter.Rd b/man/binaryMDPWriter.Rd index bd01c87..0807f58 100644 --- a/man/binaryMDPWriter.Rd +++ b/man/binaryMDPWriter.Rd @@ -23,22 +23,49 @@ Binary files are efficent for storing large models. Compared to the HMP (XML) format the binary files use less storage space and loading the model is faster. } \details{ -The functions which can be used are: \itemize{ +The functions which can be used are: +\itemize{ \item{\code{setWeights(labels, ...)}: }{Set the labels of the weights used in the actions. - \code{labels} is a vector of label names, \code{...} are not used. - The function must be called before starting building the model.} - \item{\code{process()}: }{Starts a (sub)process.} - \item{\code{endProcess()}: }{Ends a (sub)process.} - \item{\code{stage(label=NULL)}: }{Starts a stage. Currently \code{label} are not used in the binary format.} - \item{\code{endStage()}: }{Ends a (sub)process.} - \item{\code{state(label=NULL)}: }{Starts a state. Returns (invisible) the states index number sIdx.} - \item{\code{endState()}: }{Ends a stage.} - \item{\code{action(label=NULL, weights, prob, ...)}: }{Starts an action. Parameter \code{weights} must be a vector of action weights, - \code{prob} must contain triples of (scope,idx,pr) (see the description of actionIdx.bin below), \code{...} is currently not used.} - \item{\code{endAction()}: }{Ends an action.} - \item{\code{closeWriter()}: }{Close the writer. Must be called when the model description has finished.}} - -Seven binary files are created using the following format:\itemize{ + \code{labels} is a vector of label names, \code{...} are not used. The function must be called + before starting building the model.} + +\item{\code{process()}: }{Starts a (sub)process.} + +\item{\code{endProcess()}: }{Ends a (sub)process.} + +\item{\code{stage(label=NULL)}: }{Starts a stage. Currently \code{label} are not used in the + binary format.} + +\item{\code{endStage()}: }{Ends a (sub)process.} + +\item{\code{state(label=NULL)}: }{Starts a state. Returns (invisible) the states index number + sIdx.} + +\item{\code{endState()}: }{Ends a stage.} + +\item{\code{action(label=NULL, weights, prob, ...)}: }{Starts an action. Parameter + \code{weights} must be a vector of action weights, \code{prob} must contain triples of + (scope,idx,pr) (see the description of actionIdx.bin below), \code{...} is currently not used.} + +\item{\code{endAction()}: }{Ends an action.} + +\item{\code{includeProcess(prefix, label=NULL, weights, prob, termStates)}: }{Include an + external process. External processes will only be loaded in memory when needed. That is, + external processes is usefull when considering large models and have problems with memory. + Parameter \code{prefix} is the prefix of the external process. The next parameters specify the + child jump action to the process, i.e. \code{weights} must be a vector of action weights, + \code{prob} must contain triples of (scope,idx,pr) (see the description of actionIdx.bin + below), Finally \code{termStates} must specify the number of states at the last stage in the + external process. Note that inside an \code{includeProcess ... endIncludeProcess} you must + specify the father jump actions of the last stage in the external process. An external process + is represented using its first and last stage, together with its jump actions.} + +\item{\code{endIncludeProcess()}: }{Ends an includeProcess.} + +\item{\code{closeWriter()}: }{Close the writer. Must be called when the model description has + finished.}} + +Eight binary files are created using the following format:\itemize{ \item{stateIdx.bin: }{File of integers containing the indexes defining all states in the format "n0 s0 -1 n0 s0 a0 n1 s1 -1 n0 s0 a0 n1 s1 a1 n2 s2 -1 n0 s0 ...". Here -1 is used to indicate that a new state is considered (new line).} @@ -70,7 +97,11 @@ actions in actionIdx.bin. The format is "p1 p2 p3 -1 p1 -1 p1 p2 -1 ...". Here - indicate that a new action is considered (new line).} \item{externalProcesses.bin: }{File of characters containing links to the external processes. The format is "n0 s0 prefix -1 n0 s0 a0 n1 s1 prefix -1 ...". Here -1 is used to indicate that a new external process is considered for the -stage defined by the indexes.}} +stage defined by the indexes.} +\item{externalProcesses.bin: }{File of characters in the format "stageStr prefix stageStr prefix +..." Here stageStr corresponds to the index (e.g. n0 s0 a0 n1) of the stage corresponding to the +first stage in the external process and prefix to the prefix of the external process. Note no +delimiter is used.}} } \note{ Note all indexes are starting from zero (C/C++ style). diff --git a/man/loadMDP.Rd b/man/loadMDP.Rd index ac7e33f..8572fe3 100644 --- a/man/loadMDP.Rd +++ b/man/loadMDP.Rd @@ -18,6 +18,8 @@ files storing the model.} \item{eps}{The sum of the transition probabilities must at most differ eps from one.} \item{check}{Check if the MDP seems correct.} + +\item{verbose}{More output when running algorithms.} } \value{ A list containing relevant information about the model and a pointer \code{ptr} to the model object in memory. diff --git a/man/randomHMDP.Rd b/man/randomHMDP.Rd new file mode 100644 index 0000000..8867d20 --- /dev/null +++ b/man/randomHMDP.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2 (4.1.0): do not edit by hand +% Please edit documentation in R/randomHMDP.R +\name{randomHMDP} +\alias{randomHMDP} +\title{Generate a "random" HMDP stored in a set of binary files.} +\usage{ +randomHMDP(prefix = "", levels = 3, timeHorizon = c(Inf, 3, 4), + states = c(2, 4, 5), actions = c(1, 2), childProcessPr = 0.5, + rewards = c(0, 100), durations = c(1, 10)) +} +\arguments{ +\item{prefix}{A character string with the prefix added to til file(s).} + +\item{levels}{Number of levels.} + +\item{timeHorizon}{The time horizon for each level (vector). For the founder the timehorizon can be Inf.} + +\item{states}{Number of states at each stage at a given level (vector of length levels)} + +\item{actions}{Min and max number of actions at a state.} + +\item{childProcessPr}{Probability of creating a child process when define action.} + +\item{rewards}{Min and max reward used.} + +\item{durations}{Min and max duration used.} +} +\description{ +Generate a "random" HMDP stored in a set of binary files. +} +\author{ +Lars Relund \email{lars@relund.dk} +} + diff --git a/man/valueIte.Rd b/man/valueIte.Rd index f4c29e1..9a2d8e7 100644 --- a/man/valueIte.Rd +++ b/man/valueIte.Rd @@ -5,7 +5,7 @@ \title{Perform value iteration on the MDP.} \usage{ valueIte(mdp, w, dur = NULL, rate = 0.1, rateBase = 1, times = 10, - eps = 1e-05, termValues = NULL) + eps = 1e-05, termValues = NULL, g = NULL) } \arguments{ \item{mdp}{The MDP loaded using \link{loadMDP}.} @@ -23,6 +23,8 @@ valueIte(mdp, w, dur = NULL, rate = 0.1, rateBase = 1, times = 10, \item{eps}{Stopping criterion. If max(w(t)-w(t+1))second)+1].SetW(idxW,*(iteV)); for (i=1; i<=times; ++i) { + //cout << "Ite: " << i << endl; HT.CalcHTacyclic(H,idxW,idxPred,idxMult,idxDur,rate,rateBase); if(MaxDiffFounder(idxW,pairZero,pairLast) (I-P)w = r matAlg.IMinusP(P); if (matAlg.LASolve(P,w,r)) {log << " Error: can not solve system equations. Is the model fulfilling the model assumptions (e.g. unichain)? " << endl; break;} //cout << "r=" << endl << r << endl << "P=" << endl << P << endl << "w=" << endl << w << endl; + //cout << "w mat: " << w << endl; for (ite=pairLast.first, i=0; ite!=pairLast.second; ++ite, ++i) // set last to w values H.itsNodes[(ite->second)+1].SetW(idxW,w(i,0)); //if (k==10) break;