diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/PExplicit.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/PExplicit.java index 40d0ef5ca..7a0d9d5e4 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/PExplicit.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/PExplicit.java @@ -98,6 +98,8 @@ private static void setup() { RandomNumberGenerator.setup(PExplicitGlobal.getConfig().getRandomSeed()); MemoryMonitor.setup(PExplicitGlobal.getConfig().getMemLimit()); TimeMonitor.setup(PExplicitGlobal.getConfig().getTimeLimit()); + // initialize stats writer + StatWriter.Initialize(); } /** diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/RuntimeExecutor.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/RuntimeExecutor.java index 5659ad821..0b1f61e60 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/RuntimeExecutor.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/RuntimeExecutor.java @@ -112,9 +112,9 @@ private static void process(boolean resume) throws Exception { String schFile = PExplicitGlobal.getConfig().getOutputFolder() + "/" + PExplicitGlobal.getConfig().getProjectName() + "_0_0.schedule"; PExplicitLogger.logInfo(String.format("Writing buggy trace in %s", schFile)); - scheduler.schedule.writeToFile(schFile); + scheduler.getSchedule().writeToFile(schFile); - ReplayScheduler replayer = new ReplayScheduler(scheduler.schedule); + ReplayScheduler replayer = new ReplayScheduler(scheduler.getSchedule()); PExplicitGlobal.setScheduler(replayer); try { replayer.run(); @@ -188,9 +188,6 @@ public static void replay() throws Exception { } public static void run() throws Exception { - // initialize stats writer - StatWriter.Initialize(); - if (PExplicitGlobal.getConfig().getSearchStrategyMode() == SearchStrategyMode.Replay) { replay(); } else { diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/commandline/PExplicitOptions.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/commandline/PExplicitOptions.java index d95600b6d..24a2c4caf 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/commandline/PExplicitOptions.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/commandline/PExplicitOptions.java @@ -224,7 +224,7 @@ public class PExplicitOptions { .hasArg() .argName("Mode (string)") .build(); - addOption(choiceSelect); + addHiddenOption(choiceSelect); /* * Help menu options diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/MachineLocalState.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/MachineLocalState.java index bc3ad8ce9..d38ec6770 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/MachineLocalState.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/MachineLocalState.java @@ -1,23 +1,30 @@ package pexplicit.runtime.machine; import lombok.Getter; -import lombok.Setter; +import org.apache.commons.lang3.tuple.ImmutablePair; +import pexplicit.values.PEvent; import java.io.Serializable; import java.util.List; +import java.util.Set; /** * Represents the local state of a machine */ +@Getter public class MachineLocalState implements Serializable { /** * List of values of all local variables (including internal variables like currentState, FIFO queue, etc.) */ - @Getter - @Setter - private List locals; + private final List locals; + private final Set observedEvents; + private final Set> happensBeforePairs; + private final int timelineHash; - public MachineLocalState(List locals) { + public MachineLocalState(List locals, Set observedEvents, Set> happensBeforePairs) { this.locals = locals; + this.observedEvents = observedEvents; + this.happensBeforePairs = happensBeforePairs; + this.timelineHash = happensBeforePairs.hashCode(); } } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/PMachine.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/PMachine.java index f3c141623..a17ebed7e 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/PMachine.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/machine/PMachine.java @@ -178,9 +178,6 @@ public List getLocalVarNames() { result.add("_started"); result.add("_halted"); - result.add("_observedEvents"); - result.add("_happensBefore"); - result.add("_blockedBy"); result.add("_blockedStateExit"); result.add("_blockedNewStateEntry"); @@ -218,9 +215,6 @@ public List getLocalVarValues() { result.add(started); result.add(halted); - result.add(observedEvents); - result.add(happensBeforePairs); - result.add(blockedBy); result.add(blockedStateExit); result.add(blockedNewStateEntry); @@ -262,9 +256,6 @@ public List copyLocalVarValues() { result.add(started); result.add(halted); - result.add(new HashSet<>(observedEvents)); - result.add(new HashSet<>(happensBeforePairs)); - result.add(blockedBy); result.add(blockedStateExit); result.add(blockedNewStateEntry); @@ -307,9 +298,6 @@ protected int setLocalVarValues(List values) { started = (boolean) values.get(idx++); halted = (boolean) values.get(idx++); - observedEvents = (Set) values.get(idx++); - happensBeforePairs = (Set>) values.get(idx++); - blockedBy = (PContinuation) values.get(idx++); blockedStateExit = (State) values.get(idx++); blockedNewStateEntry = (State) values.get(idx++); @@ -336,11 +324,13 @@ protected int setLocalVarValues(List values) { } public MachineLocalState copyMachineState() { - return new MachineLocalState(copyLocalVarValues()); + return new MachineLocalState(copyLocalVarValues(), observedEvents, happensBeforePairs); } public void setMachineState(MachineLocalState input) { setLocalVarValues(input.getLocals()); + observedEvents = input.getObservedEvents(); + happensBeforePairs = input.getHappensBeforePairs(); } /** diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Schedule.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Schedule.java index f790525db..4689c21a3 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Schedule.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Schedule.java @@ -30,6 +30,7 @@ public class Schedule implements Serializable { * Step state at the start of a scheduler step. * Used in stateful backtracking */ + @Getter @Setter private transient StepState stepBeginState = null; diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Scheduler.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Scheduler.java index 2aeaad867..e968cb12d 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Scheduler.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/Scheduler.java @@ -25,7 +25,8 @@ public abstract class Scheduler implements SchedulerInterface { /** * Current schedule */ - public final Schedule schedule; + @Getter + protected final Schedule schedule; /** * Step number */ diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/ExplicitSearchScheduler.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/ExplicitSearchScheduler.java index e809a7be2..0417fe0c7 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/ExplicitSearchScheduler.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/ExplicitSearchScheduler.java @@ -14,7 +14,6 @@ import pexplicit.runtime.scheduler.Scheduler; import pexplicit.runtime.scheduler.choice.ScheduleChoice; import pexplicit.runtime.scheduler.choice.SearchUnit; -import pexplicit.runtime.scheduler.explicit.choiceselector.ChoiceSelectorMode; import pexplicit.runtime.scheduler.explicit.choiceselector.ChoiceSelectorQL; import pexplicit.runtime.scheduler.explicit.strategy.*; import pexplicit.utils.exceptions.PExplicitRuntimeException; @@ -160,6 +159,19 @@ protected void runIteration() throws TimeoutException { if (scheduleTerminated) { // schedule terminated, check for deadlock checkDeadlock(); + // update timeline + Integer timelineHash = stepState.getTimelineHash(); + if (!timelines.contains(timelineHash)) { + // add new timeline + timelines.add(timelineHash); + // print new timeline +// stepState.printTimeline(timelineHash, choiceNumber, String.format("%d. New timeline %d @%d::%d", +// SearchStatistics.iteration, timelines.size(), stepNumber, choiceNumber)); + if (PExplicitGlobal.getChoiceSelector() instanceof ChoiceSelectorQL choiceSelectorQL) { + // reward new timeline + choiceSelectorQL.rewardNewTimeline(this); + } + } } if (!skipLiveness) { // check for liveness @@ -198,16 +210,6 @@ protected void runStep() throws TimeoutException { return; } - // update timeline - Integer timelineHash = stepState.getTimelineHash(); - if (!timelines.contains(timelineHash)) { -// stepState.printTimeline(timelineHash, timelines.size()); - timelines.add(timelineHash); - } - if (PExplicitGlobal.getConfig().getChoiceSelectorMode() == ChoiceSelectorMode.QL) { - PExplicitGlobal.getChoiceSelector().startStep(this); - } - if (PExplicitGlobal.getConfig().getStatefulBacktrackingMode() != StatefulBacktrackingMode.None && stepNumber != 0) { schedule.setStepBeginState(stepState.copyState()); @@ -348,7 +350,7 @@ public PMachine getNextScheduleChoice() { } // pick a choice - int selected = PExplicitGlobal.getChoiceSelector().selectChoice(choices); + int selected = PExplicitGlobal.getChoiceSelector().selectChoice(this, choices); result = PExplicitGlobal.getGlobalMachine(choices.get(selected)); PExplicitLogger.logCurrentScheduleChoice(result, stepNumber, choiceNumber); @@ -412,7 +414,7 @@ public PValue getNextDataChoice(List> input_choices) { } // pick a choice - int selected = PExplicitGlobal.getChoiceSelector().selectChoice(choices); + int selected = PExplicitGlobal.getChoiceSelector().selectChoice(this, choices); result = choices.get(selected); PExplicitLogger.logCurrentDataChoice(result, stepNumber, choiceNumber); @@ -495,6 +497,7 @@ private void setChildTask(SearchUnit unit, int choiceNum, SearchTask parentTask, newTask.writeToFile(); parentTask.addChild(newTask); + searchStrategy.getPendingTasks().add(newTask.getId()); searchStrategy.addNewTask(newTask); } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/StepState.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/StepState.java index b3989653e..50f95f158 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/StepState.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/StepState.java @@ -91,8 +91,8 @@ public int getMachineCount(Class type) { return result; } - public void printTimeline(int hash, int idx) { - PExplicitLogger.logVerbose(String.format("---- Timeline %d @%d ------", idx, hash)); + public void printTimeline(int hash, int idx, String comment) { + PExplicitLogger.logVerbose(String.format("----\n%s\tTimeline %d @%d\n-----", comment, idx, hash)); for (PMachine m : machineSet) { PExplicitLogger.logVerbose(String.format(" %s -> %s", m, m.getHappensBeforePairs())); } @@ -102,7 +102,10 @@ public Integer getTimelineHash() { List features = new ArrayList<>(); for (PMachine m : machineSet) { features.add(m.hashCode()); - features.add(m.getHappensBeforePairs().hashCode()); + MachineLocalState ms = machineLocalStates.get(m); + if (ms != null) { + features.add(ms.getTimelineHash()); + } } return features.hashCode(); } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQL.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQL.java index a73a59864..0c8dad624 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQL.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQL.java @@ -1,88 +1,84 @@ package pexplicit.runtime.scheduler.explicit.choiceselector; import lombok.Getter; +import pexplicit.runtime.PExplicitGlobal; import pexplicit.runtime.logger.PExplicitLogger; -import pexplicit.runtime.machine.PMachine; +import pexplicit.runtime.scheduler.Schedule; +import pexplicit.runtime.scheduler.choice.Choice; +import pexplicit.runtime.scheduler.choice.ScheduleChoice; import pexplicit.runtime.scheduler.explicit.ExplicitSearchScheduler; +import pexplicit.runtime.scheduler.explicit.StatefulBacktrackingMode; +import pexplicit.utils.random.RandomNumberGenerator; import java.io.Serializable; -import java.util.ArrayList; import java.util.List; public class ChoiceQL implements Serializable { @Getter - private static final int defaultQValue = 0; - private static final double ALPHA = 0.5; - private static final double GAMMA = 0.05; + private static final double defaultQValue = 1.0; + private static final double ALPHA = 0.3; + private static final double GAMMA = 0.2; + private static final double STEP_PENALTY_REWARD = -1.0; + private static final double NEW_TIMELINE_REWARD = 1.0; private final ChoiceQTable qValues; - private final List currActions = new ArrayList<>(); - /** - * Details about the current step - */ - private Integer currState = 0; - private int currNumTimelines = 0; public ChoiceQL() { qValues = new ChoiceQTable(); } - private void rewardAction(Object action, int reward) { - ChoiceQTable.ChoiceQStateEntry stateEntry = qValues.get(currState); + private void rewardAction(int state, Object action, double reward) { + ChoiceQTable.ChoiceQStateEntry stateEntry = qValues.get(state); ChoiceQTable.ChoiceQClassEntry classEntry = stateEntry.get(action.getClass()); - int maxQ = classEntry.getMaxQ(); - int oldVal = classEntry.get(action); - int newVal = (int) ((1 - ALPHA) * oldVal + ALPHA * (reward + GAMMA * maxQ)); + double maxQ = classEntry.getMaxQ(); + double oldVal = classEntry.get(action); + double newVal = ((1 - ALPHA) * oldVal + ALPHA * (reward + GAMMA * maxQ)); classEntry.update(action, newVal); } - private void setStateTimelineAbstraction(ExplicitSearchScheduler sch) { - List features = new ArrayList<>(); - for (PMachine m : sch.getStepState().getMachineSet()) { - features.add(m.hashCode()); - features.add(m.getHappensBeforePairs().hashCode()); + public int select(int state, List choices) { + // Compute the total and minimum weight + double totalWeight = 0.0; + double minWeight = Double.MAX_VALUE; + for (int i = 0; i < choices.size(); i++) { + Object choice = choices.get(i); + double weight = qValues.get(state, choice.getClass(), choice); + totalWeight += weight; + if (weight < minWeight) { + minWeight = weight; + } } - currState = features.hashCode(); - } - public void startStep(ExplicitSearchScheduler sch) { -// printQTable(); - - // set reward amount - int reward = -100; - if (sch.getTimelines().size() > currNumTimelines) { - reward = 100; - } - // reward last actions - for (Object action : currActions) { - rewardAction(action, reward); + // Now choose a weighted random item + int idx = 0; + for (double r = RandomNumberGenerator.getInstance().getRandomDouble() * totalWeight; idx < choices.size() - 1; idx++) { + Object choice = choices.get(idx); + double weight = qValues.get(state, choice.getClass(), choice); + r -= weight; + if (r <= 0.0) { + break; + } } + return idx; + } - // set number of timelines at start of step - currNumTimelines = sch.getTimelines().size(); - - // set state at start of step - setStateTimelineAbstraction(sch); - - // reset current actions at start of step - currActions.clear(); + public void penalizeSelected(int state, Object action) { + // give a negative reward to the selected choice + rewardAction(state, action, STEP_PENALTY_REWARD); } - public int selectChoice(List choices) { - int maxVal = Integer.MIN_VALUE; - int selected = 0; - for (int i = 0; i < choices.size(); i++) { - Object choice = choices.get(i); - int val = qValues.get(currState, choice.getClass(), choice); - if (val > maxVal) { - maxVal = val; - selected = i; + public void rewardScheduleChoices(ExplicitSearchScheduler sch) { + Schedule schedule = sch.getSchedule(); + for (int cIdx : sch.getSearchStrategy().getCurrTask().getSearchUnits().keySet()) { + int state = 0; + Choice choice = schedule.getChoice(cIdx); + if (PExplicitGlobal.getConfig().getStatefulBacktrackingMode() != StatefulBacktrackingMode.None) { + ScheduleChoice scheduleChoice = schedule.getScheduleChoiceAt(cIdx); + if (scheduleChoice != null && scheduleChoice.getChoiceState() != null) { + state = scheduleChoice.getChoiceState().getTimelineHash(); + } } + rewardAction(state, choice.getCurrent(), NEW_TIMELINE_REWARD); } - return selected; - } - - public void addChoice(Object choice) { - currActions.add(choice); } public int getNumStates() { @@ -121,10 +117,10 @@ public void printQTable() { } Object bestAction = classEntry.getBestAction(); if (bestAction != null) { - int maxQ = classEntry.get(bestAction); + double maxQ = classEntry.get(bestAction); PExplicitLogger.logVerbose( String.format( - " %s [%s] -> %s -> %d\t%s", + " %s [%s] -> %s -> %.2f\t%s", stateStr, cls.getSimpleName(), bestAction, maxQ, classEntry)); } } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQTable.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQTable.java index 3489ff353..1fc1dc655 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQTable.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceQTable.java @@ -1,6 +1,7 @@ package pexplicit.runtime.scheduler.explicit.choiceselector; import lombok.Getter; +import pexplicit.runtime.machine.PMachine; import java.io.Serializable; import java.util.*; @@ -8,7 +9,7 @@ public class ChoiceQTable implements Serializable { private final Map> table = new HashMap<>(); - public int get(S state, Class cls, A action) { + public double get(S state, Class cls, A action) { if (!table.containsKey(state)) { table.put(state, new ChoiceQStateEntry()); } @@ -111,7 +112,7 @@ public String toString() { public static class ChoiceQStateEntry implements Serializable { private final Map table = new HashMap<>(); - public int get(Class cls, A action) { + public double get(Class cls, A action) { if (!table.containsKey(cls)) { table.put(cls, new ChoiceQClassEntry()); } @@ -145,21 +146,27 @@ public String toString() { } public static class ChoiceQClassEntry implements Serializable { - private final Map table = new HashMap<>(); + private final Map table = new HashMap<>(); - public int get(A action) { + public double get(A action) { + if (action instanceof PMachine machine) { + action = (A) machine.getPid(); + } if (!table.containsKey(action)) { table.put(action, ChoiceQL.getDefaultQValue()); } return table.get(action); } - public void update(A action, int val) { + public void update(A action, double val) { + if (action instanceof PMachine machine) { + action = (A) machine.getPid(); + } assert (table.containsKey(action)); table.put(action, val); } - public int getMaxQ() { + public double getMaxQ() { if (table.isEmpty()) { return ChoiceQL.getDefaultQValue(); } else { @@ -169,7 +176,7 @@ public int getMaxQ() { public A getBestAction() { if (!table.isEmpty()) { - int maxQ = getMaxQ(); + double maxQ = getMaxQ(); for (A action : table.keySet()) { if (get(action) == maxQ) { return action; @@ -187,10 +194,10 @@ public int size() { public String toString() { StringBuilder out = new StringBuilder(); out.append("{ "); - for (Map.Entry entry : table.entrySet()) { + for (Map.Entry entry : table.entrySet()) { out.append(entry.getKey().toString()); out.append(" -> "); - out.append(String.format("%d", entry.getValue())); + out.append(String.format("%.2f", entry.getValue())); out.append(", "); } out.append(" }"); diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelector.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelector.java index 380c7440c..4f9fea32a 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelector.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelector.java @@ -6,8 +6,13 @@ import java.util.List; public abstract class ChoiceSelector implements Serializable { - public abstract int selectChoice(List choices); + protected abstract int select(ExplicitSearchScheduler sch, List choices); - public void startStep(ExplicitSearchScheduler sch) { + public int selectChoice(ExplicitSearchScheduler sch, List choices) { + if (choices.size() == 1) { + return 0; + } else { + return select(sch, choices); + } } } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorQL.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorQL.java index 6cccab44f..7b622f75d 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorQL.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorQL.java @@ -2,7 +2,10 @@ import lombok.Getter; import lombok.Setter; +import pexplicit.runtime.PExplicitGlobal; import pexplicit.runtime.scheduler.explicit.ExplicitSearchScheduler; +import pexplicit.runtime.scheduler.explicit.StatefulBacktrackingMode; +import pexplicit.runtime.scheduler.explicit.StepState; import pexplicit.utils.random.RandomNumberGenerator; import java.util.List; @@ -13,7 +16,7 @@ public class ChoiceSelectorQL extends ChoiceSelector { @Getter private static final ChoiceQL choiceQL = new ChoiceQL(); @Setter - private static double EPSILON_DECAY_FACTOR = 0.999; + private static double EPSILON_DECAY_FACTOR = 0.99999; private static double epsilon = EPSILON_MAX; private final ChoiceSelector choiceSelectorExplore; @@ -21,24 +24,31 @@ public ChoiceSelectorQL() { choiceSelectorExplore = new ChoiceSelectorRandom(); } - public int selectChoice(List choices) { + protected int select(ExplicitSearchScheduler sch, List choices) { + int state = 0; + if (PExplicitGlobal.getConfig().getStatefulBacktrackingMode() != StatefulBacktrackingMode.None) { + StepState stepState = sch.getSchedule().getStepBeginState(); + if (stepState != null) { + state = stepState.getTimelineHash(); + } + } + decayEpsilon(); double randNum = RandomNumberGenerator.getInstance().getRandomDouble(); int selected = -1; if (randNum <= epsilon) { // explore - selected = choiceSelectorExplore.selectChoice(choices); + selected = choiceSelectorExplore.select(sch, choices); } else { // exploit - selected = choiceQL.selectChoice(choices); + selected = choiceQL.select(state, choices); } - choiceQL.addChoice(choices.get(selected)); + choiceQL.penalizeSelected(state, choices.get(selected)); return selected; } - @Override - public void startStep(ExplicitSearchScheduler sch) { - choiceQL.startStep(sch); + public void rewardNewTimeline(ExplicitSearchScheduler sch) { + choiceQL.rewardScheduleChoices(sch); } private void decayEpsilon() { diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorRandom.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorRandom.java index 5c3484ad0..0c5141c0f 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorRandom.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/choiceselector/ChoiceSelectorRandom.java @@ -1,5 +1,6 @@ package pexplicit.runtime.scheduler.explicit.choiceselector; +import pexplicit.runtime.scheduler.explicit.ExplicitSearchScheduler; import pexplicit.utils.random.RandomNumberGenerator; import java.util.List; @@ -9,7 +10,7 @@ public class ChoiceSelectorRandom extends ChoiceSelector { public ChoiceSelectorRandom() { } - public int selectChoice(List choices) { + public int select(ExplicitSearchScheduler sch, List choices) { return RandomNumberGenerator.getInstance().getRandomInt(choices.size()); } } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategy.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategy.java index d67267486..40206d302 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategy.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategy.java @@ -35,13 +35,13 @@ public abstract class SearchStrategy implements Serializable { public SearchTask createTask(SearchTask parentTask) { SearchTask newTask = new SearchTask(allTasks.size(), parentTask); allTasks.add(newTask); - pendingTasks.add(newTask.getId()); return newTask; } public void createFirstTask() { assert (allTasks.size() == 0); SearchTask firstTask = createTask(null); + pendingTasks.add(firstTask.getId()); setCurrTask(firstTask); } diff --git a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategyDfs.java b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategyDfs.java index b60a4b302..6dae0190c 100644 --- a/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategyDfs.java +++ b/Src/PRuntimes/PExplicitRuntime/src/main/java/pexplicit/runtime/scheduler/explicit/strategy/SearchStrategyDfs.java @@ -5,7 +5,6 @@ public SearchStrategyDfs() { } public void addNewTask(SearchTask task) { - assert (pendingTasks.isEmpty()); } public SearchTask popNextTask() {