From 2645b7b97f3088da26222645a479b4bd1510d623 Mon Sep 17 00:00:00 2001
From: Trevor Bekolay <tbekolay@gmail.com>
Date: Fri, 19 Oct 2018 20:40:53 -0400
Subject: [PATCH] Refactor Simulator.run_steps

One major change is to move the host-to-chip and chip-to-host
logic to CxSimulator and LoihiSimulator. This keeps all of the
manipulation of CxSimulator- and LoihiSimulator-internal values
internal to those classes.

The other major change is to handle each possible permutation of
`run_steps` logic independently and cache the resulting sequence
of step functions. Part of this is for readability (it is now
more clear what happens in what cases, and is split up such
that we no longer get the cyclomatic complexity warning),
but also for speed as in some cases `run_steps` could be
called within a loop.

In order to handle cyclic dependencies, send/receive nodes/targets
were moved to `loihi_cx.py`. This is a temporary location where they
(and specifically `PESModulatoryTarget`) are accessible in all
locations where they are required. The plan is that they will be
relocated in an upcoming refactoring.
---
 nengo_loihi/builder.py              |  17 +-
 nengo_loihi/loihi_cx.py             | 168 ++++++++---
 nengo_loihi/loihi_interface.py      | 175 +++++++++++-
 nengo_loihi/simulator.py            | 416 ++++++++++------------------
 nengo_loihi/splitter.py             |  78 +-----
 nengo_loihi/tests/test_loihi_cx.py  |   9 +-
 nengo_loihi/tests/test_simulator.py | 120 ++++++++
 7 files changed, 595 insertions(+), 388 deletions(-)

diff --git a/nengo_loihi/builder.py b/nengo_loihi/builder.py
index 504c88d27..dbe51b307 100644
--- a/nengo_loihi/builder.py
+++ b/nengo_loihi/builder.py
@@ -15,9 +15,16 @@
 import nengo.utils.numpy as npext
 
 from nengo_loihi.loihi_cx import (
-    CxModel, CxGroup, CxSynapses, CxAxons, CxProbe, CxSpikeInput)
+    ChipReceiveNeurons,
+    ChipReceiveNode,
+    CxAxons,
+    CxGroup,
+    CxModel,
+    CxProbe,
+    CxSpikeInput,
+    CxSynapses,
+)
 from nengo_loihi.neurons import loihi_rates
-from nengo_loihi.splitter import ChipReceiveNeurons, ChipReceiveNode
 
 logger = logging.getLogger(__name__)
 
@@ -74,7 +81,6 @@ def __init__(self, dt=0.001, label=None, builder=None):
         self.objs = collections.defaultdict(dict)
         self.params = {}  # Holds data generated when building objects
         self.probes = []
-        self.chip2host_params = None  # Will be provided by Simulator
         self.probe_conns = {}
 
         self.seeds = {}
@@ -105,6 +111,11 @@ def __init__(self, dt=0.001, label=None, builder=None):
         # limit for clipping intercepts, to avoid neurons with high gains
         self.intercept_limit = 0.95
 
+        # Will be provided by Simulator
+        self.chip2host_params = None
+        self.chip2host_receivers = None
+        self.host2chip_senders = None
+
     @property
     def inter_rate(self):
         return (1. / (self.dt * self.inter_n) if self._inter_rate is None else
diff --git a/nengo_loihi/loihi_cx.py b/nengo_loihi/loihi_cx.py
index 0f4dc0078..d3af8a430 100644
--- a/nengo_loihi/loihi_cx.py
+++ b/nengo_loihi/loihi_cx.py
@@ -5,6 +5,7 @@
 import warnings
 
 import numpy as np
+import nengo
 from nengo.exceptions import BuildError, SimulationError
 from nengo.utils.compat import is_iterable
 
@@ -479,13 +480,6 @@ def discretize(self):
         for group in self.cx_groups:
             group.discretize()
 
-    def get_loihi(self, seed=None):
-        from nengo_loihi.loihi_interface import LoihiSimulator
-        return LoihiSimulator(self, seed=seed)
-
-    def get_simulator(self, seed=None):
-        return CxSimulator(self, seed=seed)
-
     def validate(self):
         if len(self.cx_groups) == 0:
             raise BuildError("No neurons marked for execution on-chip. "
@@ -510,6 +504,7 @@ def __init__(self, model, seed=None):
 
         self.build(model, seed=seed)
 
+        self._chip2host_sent_steps = 0
         self._probe_filters = {}
         self._probe_filter_pos = {}
 
@@ -526,27 +521,6 @@ def error(cls, msg):
         else:
             warnings.warn(msg)
 
-    def clear(self):
-        """Clear all signals set in `build` (to free up memory)"""
-        self.q = None
-        self.u = None
-        self.v = None
-        self.s = None
-        self.c = None
-        self.w = None
-
-        self.vth = None
-        self.vmin = None
-        self.vmax = None
-
-        self.bias = None
-        self.ref = None
-        self.a_in = None
-        self.z = None
-
-        self.noiseGen = None
-        self.noiseTarget = None
-
     def build(self, model, seed=None):  # noqa: C901
         """Set up NumPy arrays to emulate chip memory and I/O."""
         model.validate()
@@ -684,6 +658,76 @@ def noiseGen(n=self.n_cx, rng=self.rng):
         self.noiseGen = noiseGen
         self.noiseTarget = noiseTarget
 
+    def clear(self):
+        """Clear all signals set in `build` (to free up memory)"""
+        self.q = None
+        self.u = None
+        self.v = None
+        self.s = None
+        self.c = None
+        self.w = None
+
+        self.vth = None
+        self.vmin = None
+        self.vmax = None
+
+        self.bias = None
+        self.ref = None
+        self.a_in = None
+        self.z = None
+
+        self.noiseGen = None
+        self.noiseTarget = None
+
+    def close(self):
+        self.closed = True
+        self.clear()
+
+    def chip2host(self):
+        # go through the list of chip2host connections
+        increment = None
+        for probe, receiver in self.model.chip2host_receivers.items():
+            # extract the probe data from the simulator
+            cx_probe = self.model.objs[probe]['out']
+            x = self.probe_outputs[cx_probe][self._chip2host_sent_steps:]
+            if len(x) > 0:
+                if increment is None:
+                    increment = len(x)
+                else:
+                    assert increment == len(x)
+                if cx_probe.weights is not None:
+                    x = np.dot(x, cx_probe.weights)
+                for j in range(len(x)):
+                    receiver.receive(
+                        self.model.dt * (self._chip2host_sent_steps + j + 2),
+                        x[j]
+                    )
+        if increment is not None:
+            self._chip2host_sent_steps += increment
+
+    def host2chip(self):
+        # go through the list of host2chip connections
+        for sender, receiver in self.model.host2chip_senders.items():
+            learning_rate = 50  # This is set to match hardware
+            if isinstance(receiver, PESModulatoryTarget):
+                for t, x in sender.queue:
+                    probe = receiver.target
+                    conn = self.model.probe_conns[probe]
+                    dec_syn = self.model.objs[conn]['decoders']
+                    assert dec_syn.tracing
+
+                    z = self.z[dec_syn]
+                    x = np.hstack([-x, x])
+
+                    delta_w = np.outer(z, x) * learning_rate
+
+                    for i, w in enumerate(dec_syn.weights):
+                        w += delta_w[i].astype('int32')
+            else:
+                for t, x in sender.queue:
+                    receiver.receive(t, x)
+            del sender.queue[:]
+
     def step(self):  # noqa: C901
         """Advance the simulation by 1 step (``dt`` seconds)."""
 
@@ -829,6 +873,68 @@ def get_probe_output(self, probe):
         x = x if cx_probe.weights is None else np.dot(x, cx_probe.weights)
         return self._filter_probe(cx_probe, x)
 
-    def close(self):
-        self.closed = True
-        self.clear()
+
+class PESModulatoryTarget(object):
+    def __init__(self, target):
+        self.target = target
+
+
+class HostSendNode(nengo.Node):
+    """For sending host->chip messages"""
+
+    def __init__(self, dimensions):
+        self.queue = []
+        super(HostSendNode, self).__init__(self.update,
+                                           size_in=dimensions, size_out=0)
+
+    def update(self, t, x):
+        assert len(self.queue) == 0 or t > self.queue[-1][0]
+        self.queue.append((t, x))
+
+
+class HostReceiveNode(nengo.Node):
+    """For receiving chip->host messages"""
+
+    def __init__(self, dimensions):
+        self.queue = [(0, np.zeros(dimensions))]
+        self.queue_index = 0
+        super(HostReceiveNode, self).__init__(self.update,
+                                              size_in=0, size_out=dimensions)
+
+    def update(self, t):
+        while (len(self.queue) > self.queue_index + 1
+               and self.queue[self.queue_index][0] < t):
+            self.queue_index += 1
+        return self.queue[self.queue_index][1]
+
+    def receive(self, t, x):
+        self.queue.append((t, x))
+
+
+class ChipReceiveNode(nengo.Node):
+    """For receiving host->chip messages"""
+
+    def __init__(self, dimensions, size_out):
+        self.raw_dimensions = dimensions
+        self.cx_spike_input = CxSpikeInput(
+            np.zeros((0, dimensions), dtype=bool))
+        self.last_time = None
+        super(ChipReceiveNode, self).__init__(self.update,
+                                              size_in=0, size_out=size_out)
+
+    def update(self, t):
+        raise SimulationError("ChipReceiveNodes should not be run")
+
+    def receive(self, t, x):
+        assert self.last_time is None or t > self.last_time
+        # TODO: make this stacking efficient
+        self.cx_spike_input.spikes = np.vstack([self.cx_spike_input.spikes,
+                                                [x > 0]])
+        self.last_time = t
+
+
+class ChipReceiveNeurons(ChipReceiveNode):
+    """Passes spikes directly (no on-off neuron encoding)"""
+    def __init__(self, dimensions, neuron_type=None):
+        self.neuron_type = neuron_type
+        super(ChipReceiveNeurons, self).__init__(dimensions, dimensions)
diff --git a/nengo_loihi/loihi_interface.py b/nengo_loihi/loihi_interface.py
index d50074e24..6dc1e1b49 100644
--- a/nengo_loihi/loihi_interface.py
+++ b/nengo_loihi/loihi_interface.py
@@ -10,6 +10,7 @@
 import jinja2
 import numpy as np
 
+import nengo
 from nengo.exceptions import SimulationError
 
 try:
@@ -30,6 +31,7 @@ def no_nxsdk(*args, **kwargs):
 from nengo_loihi.allocators import one_to_one_allocator
 from nengo_loihi.loihi_api import (
     CX_PROFILES_MAX, VTH_PROFILES_MAX, bias_to_manexp)
+from nengo_loihi.loihi_cx import CxGroup, PESModulatoryTarget
 
 logger = logging.getLogger(__name__)
 
@@ -385,14 +387,17 @@ class LoihiSimulator(object):
         the nengo_io_h2c channel on one timestep.
     """
 
-    def __init__(self, cx_model, seed=None, snip_max_spikes_per_step=50):
+    def __init__(self, cx_model,
+                 use_snips=True, seed=None, snip_max_spikes_per_step=50):
         self.closed = False
-
         self.check_nxsdk_version()
 
         self.n2board = None
         self._probe_filters = {}
         self._probe_filter_pos = {}
+        self._snip_probes = {}
+        self._cx_probe2probe = {}
+        self._chip2host_sent_steps = 0
         self.snip_max_spikes_per_step = snip_max_spikes_per_step
 
         nxsdk_dir = os.path.realpath(
@@ -409,7 +414,7 @@ def __init__(self, cx_model, seed=None, snip_max_spikes_per_step=50):
         # from previous simulators
         N2SpikeProbe.probeDict.clear()
 
-        self.build(cx_model, seed=seed)
+        self.build(cx_model, use_snips=use_snips, seed=seed)
 
     def __enter__(self):
         return self
@@ -435,8 +440,30 @@ def check_nxsdk_version():
                           "version (%s); latest fully supported version is "
                           "%s" % (version, max_tested))
 
-    def build(self, cx_model, seed=None):
+    def build(self, cx_model, use_snips=True, seed=None):
         cx_model.validate()
+
+        if use_snips:
+            # tag all probes as being snip-based,
+            # having normal probes at the same time as snips causes problems
+            for group in cx_model.cx_groups.keys():
+                for cx_probe in group.probes:
+                    cx_probe.use_snip = True
+            # create a place to store data from snip probes
+            for probe in cx_model.probes:
+                self._snip_probes[probe] = []
+
+            # map CxProbes to their nengo.Probes
+            for obj in cx_model.objs:
+                if isinstance(obj, nengo.Probe):
+                    # actual nengo.Probes on chip objects
+                    cx_probe = cx_model.objs[obj]['out']
+                    self._cx_probe2probe[cx_probe] = obj
+            for probe in cx_model.chip2host_receivers:
+                # probes used for chip->host communication
+                cx_probe = cx_model.objs[probe]['out']
+                self._cx_probe2probe[cx_probe] = probe
+
         self.model = cx_model
 
         # --- allocate --
@@ -458,6 +485,140 @@ def run_steps(self, steps, blocking=True):
         self.connect()
         self.n2board.run(steps, aSync=not blocking)
 
+    def chip2host(self):
+        count = self.nengo_io_c2h_count
+        data = self.nengo_io_c2h.read(count)
+        time_step, data = data[0], np.array(data[1:])
+        snip_range = self.nengo_io_snip_range
+        for cx_probe, probe in self._cx_probe2probe.items():
+            x = data[snip_range[cx_probe]]
+            if cx_probe.key == 's':
+                if isinstance(cx_probe.target, CxGroup):
+                    refract_delays = cx_probe.target.refractDelay
+                else:
+                    refract_delays = 1
+
+                # Loihi uses the voltage value to indicate where we
+                # are in the refractory period. We want to find neurons
+                # starting their refractory period.
+                x = (x == refract_delays * 128)
+            if cx_probe.weights is not None:
+                x = np.dot(x, cx_probe.weights)
+            receiver = self.model.chip2host_receivers.get(probe, None)
+            if receiver is not None:
+                # chip->host
+                receiver.receive(self.model.dt * time_step, x)
+            else:
+                # onchip probes
+                self._snip_probes[probe].append(x)
+
+    def chip2host_precomputed(self):
+        # TODO: this is almost identical to CxSimulator.chip2host
+        increment = None
+        for probe, receiver in self.model.chip2host_receivers.items():
+            # extract the probe data from the simulator
+            cx_probe = self.model.objs[probe]['out']
+            n2probe = self.board.probe_map[cx_probe]
+            x = np.column_stack([
+                p.timeSeries.data[self._chip2host_sent_steps:]
+                for p in n2probe])
+            if len(x) > 0:
+                if increment is None:
+                    increment = len(x)
+                else:
+                    assert increment == len(x)
+                if cx_probe.weights is not None:
+                    x = np.dot(x, cx_probe.weights)
+                for j in range(len(x)):
+                    receiver.receive(
+                        self.model.dt * (self._chip2host_sent_steps + j + 2),
+                        x[j])
+        if increment is not None:
+            self._chip2host_sent_steps += increment
+
+    def send_spikes(self):
+        # TODO: this is almost the same as _host2chip_spikes
+        items = []
+        for sender, receiver in self.model.host2chip_senders.items():
+            for t, x in sender.queue:
+                receiver.receive(t, x)
+            del sender.queue[:]
+            spike_input = receiver.cx_spike_input
+            sent_count = spike_input.sent_count
+            while sent_count < len(spike_input.spikes):
+                for j, s in enumerate(spike_input.spikes[sent_count]):
+                    if s:
+                        for output_axon in spike_input.axon_ids:
+                            items.append(
+                                (sent_count,) + output_axon[j])
+                sent_count += 1
+            spike_input.sent_count = sent_count
+        if len(items) > 0:
+            for info in sorted(items):
+                spike_input.spike_gen.addSpike(*info)
+
+    def host2chip(self):
+        to_send = self._host2chip_spikes()
+        errors = self._host2chip_errors()
+        max_spikes = self.snip_max_spikes_per_step
+        if len(to_send) > max_spikes:
+            warnings.warn("Too many spikes (%d) sent in one time "
+                          "step.  Increase the value of "
+                          "snip_max_spikes_per_step (currently "
+                          "set to %d)" % (len(to_send), max_spikes))
+            del to_send[max_spikes:]
+
+        msg = [len(to_send)]
+        for spike in to_send:
+            assert spike[0] == 0
+            msg.extend(spike[1:3])
+        for error in errors:
+            msg.extend(error)
+        self.nengo_io_h2c.write(len(msg), msg)
+
+    def _host2chip_spikes(self):
+        to_send = []
+        for sender, receiver in self.model.host2chip_senders.items():
+            if hasattr(receiver, "receive"):
+                for t, x in sender.queue:
+                    receiver.receive(t, x)
+                del sender.queue[:]
+                spike_input = receiver.cx_spike_input
+                sent_count = spike_input.sent_count
+                axon_ids = spike_input.axon_ids
+                spikes = spike_input.spikes
+                while sent_count < len(spikes):
+                    for j, s in enumerate(spikes[sent_count]):
+                        if s:
+                            for output_axon in axon_ids:
+                                to_send.append(output_axon[j])
+                    sent_count += 1
+                spike_input.sent_count = sent_count
+        return to_send
+
+    def _host2chip_errors(self):
+        errors = []
+        for sender, receiver in self.model.host2chip_senders.items():
+            if isinstance(receiver, PESModulatoryTarget):
+                for t, x in sender.queue:
+                    x = (100 * x).astype(int)
+                    x = np.clip(x, -100, 100, out=x)
+                    probe = receiver.target
+                    conn = self.model.probe_conns[probe]
+                    dec_cx = self.model.objs[conn]['decoded']
+                    for core in self.board.chips[0].cores:
+                        for group in core.groups:
+                            if group == dec_cx:
+                                # TODO: assumes one group per core
+                                coreid = core.learning_coreid
+                            break
+
+                    assert coreid is not None
+
+                    errors.append([coreid, len(x)] + x.tolist())
+                del sender.queue[:]
+        return errors
+
     def wait_for_completion(self):
         self.n2board.finishRun()
 
@@ -520,6 +681,12 @@ def _filter_probe(self, cx_probe, data):
 
     def get_probe_output(self, probe):
         cx_probe = self.model.objs[probe]['out']
+        if cx_probe.use_snip:
+            data = self._snip_probes[probe]
+            if probe.synapse is not None:
+                return probe.synapse.filt(data, dt=self.model.dt, y0=0)
+            else:
+                return data
         n2probe = self.board.probe_map[cx_probe]
         x = np.column_stack([p.timeSeries.data for p in n2probe])
         x = x if cx_probe.weights is None else np.dot(x, cx_probe.weights)
diff --git a/nengo_loihi/simulator.py b/nengo_loihi/simulator.py
index 89098d2c3..fa458eae3 100644
--- a/nengo_loihi/simulator.py
+++ b/nengo_loihi/simulator.py
@@ -1,3 +1,4 @@
+from collections import OrderedDict
 import logging
 import warnings
 
@@ -10,8 +11,9 @@
 from nengo.simulator import ProbeDict as NengoProbeDict
 
 from nengo_loihi.builder import Model
-from nengo_loihi.loihi_cx import CxGroup
-from nengo_loihi.splitter import PESModulatoryTarget, split
+from nengo_loihi.loihi_cx import CxSimulator
+from nengo_loihi.loihi_interface import LoihiSimulator
+from nengo_loihi.splitter import split
 import nengo_loihi.config as config
 
 logger = logging.getLogger(__name__)
@@ -158,34 +160,40 @@ def __init__(self, network, dt=0.001, seed=None, model=None,  # noqa: C901
                              % (1. / max_rate, self.dt))
         self.precompute = precompute
         self.networks = None
-        self.sims = {}
+        self.sims = OrderedDict()
+        self._run_steps = None
 
-        self.chip2host_sent_steps = 0  # how many timesteps have been sent
         if network is not None:
             nengo.rc.set("decoder_cache", "enabled", "False")
             config.add_params(network)
 
-            # split the host into two or three networks
+            # split the host into one, two or three networks
             self.networks = split(
                 network, precompute, max_rate, self.model.inter_tau)
             network = self.networks.chip
 
-            self.chip2host_receivers = self.networks.chip2host_receivers
-            self.host2chip_senders = self.networks.host2chip_senders
             self.model.chip2host_params = self.networks.chip2host_params
+            self.model.chip2host_receivers = self.networks.chip2host_receivers
+            self.model.host2chip_senders = self.networks.host2chip_senders
 
             self.chip = self.networks.chip
             self.host = self.networks.host
             self.host_pre = self.networks.host_pre
 
-            if precompute:
+            if len(self.host_pre.all_objects) > 0:
                 self.sims["host_pre"] = nengo.Simulator(
                     self.host_pre, dt=self.dt, progress_bar=False)
-                self.sims["host_post"] = nengo.Simulator(
-                    self.host, dt=self.dt, progress_bar=False)
-            else:
+
+            if len(self.host.all_objects) > 0:
                 self.sims["host"] = nengo.Simulator(
                     self.host, dt=self.dt, progress_bar=False)
+            elif not precompute:
+                # If there is no host and precompute=False, then all objects
+                # must be on the chip, which is precomputable in the sense that
+                # no communication has to happen with the host.
+                # We could warn about this, but we want to avoid people having
+                # to specify `precompute` unless they absolutely have to.
+                self.precompute = True
 
             # Build the network into the model
             self.model.build(network)
@@ -208,42 +216,17 @@ def __init__(self, network, dt=0.001, seed=None, model=None,  # noqa: C901
             except ImportError:
                 target = 'sim'
 
-        if target == 'simreal':
-            logger.info("Using real-valued simulator")
-            self.sims["emulator"] = self.model.get_simulator(seed=seed)
-        elif target == 'sim':
-            logger.info("Using discretized simulator")
-            self.model.discretize()  # Make parameters fixed bit widths
-            self.sims["emulator"] = self.model.get_simulator(seed=seed)
+        logger.info("Simulator target is %r", target)
+        logger.info("Simulator precompute is %r", self.precompute)
+
+        if target != "simreal":
+            self.model.discretize()
+
+        if target in ("simreal", "sim"):
+            self.sims["emulator"] = CxSimulator(self.model, seed=seed)
         elif target == 'loihi':
-            logger.info(
-                "Using Loihi hardware with precompute=%s", self.precompute)
-            self.model.discretize()  # Make parameters fixed bit widths
-            if not precompute:
-                # tag all probes as being snipbased
-                #  (having normal probes at the same time as snips
-                #   seems to cause problems)
-                for group in self.model.cx_groups.keys():
-                    for cx_probe in group.probes:
-                        cx_probe.use_snip = True
-                # create a place to store data from snip probes
-                self.snip_probes = {}
-                for probe in network.all_probes:
-                    self.snip_probes[probe] = []
-
-                # create a list of all the CxProbes and their nengo.Probes
-                self.cx_probe2probe = {}
-                for obj in self.model.objs.keys():
-                    if isinstance(obj, nengo.Probe):
-                        # actual nengo.Probes on chip objects
-                        cx_probe = self.model.objs[obj]['out']
-                        self.cx_probe2probe[cx_probe] = obj
-                for probe in self.chip2host_receivers.keys():
-                    # probes used for chip->host communication
-                    cx_probe = self.model.objs[probe]['out']
-                    self.cx_probe2probe[cx_probe] = probe
-
-            self.sims["loihi"] = self.model.get_loihi(seed=seed)
+            self.sims["loihi"] = LoihiSimulator(
+                self.model, use_snips=not self.precompute, seed=seed)
         else:
             raise ValidationError("Must be 'simreal', 'sim', or 'loihi'",
                                   attr="target")
@@ -314,19 +297,14 @@ def _probe(self):
             assert ("loihi" not in self.sims
                     or "emulator" not in self.sims)
             if "loihi" in self.sims:
-                cx_probe = self.sims["loihi"].model.objs[probe]['out']
-                if cx_probe.use_snip:
-                    data = self.snip_probes[probe]
-                    if probe.synapse is not None:
-                        data = probe.synapse.filt(data, dt=self.dt, y0=0)
-                else:
-                    data = self.sims["loihi"].get_probe_output(probe)
+                data = self.sims["loihi"].get_probe_output(probe)
             elif "emulator" in self.sims:
                 data = self.sims["emulator"].get_probe_output(probe)
             # TODO: stop recomputing this all the time
             del self._probe_outputs[probe][:]
             self._probe_outputs[probe].extend(data)
-            assert len(self._probe_outputs[probe]) == self.n_steps
+            assert len(self._probe_outputs[probe]) == self.n_steps, (
+                len(self._probe_outputs[probe]), self.n_steps)
 
     def _probe_step_time(self):
         self._time = self._n_steps * self.dt
@@ -388,9 +366,113 @@ def run(self, time_in_seconds):
 
     def step(self):
         """Advance the simulator by 1 step (``dt`` seconds)."""
-
         self.run_steps(1)
 
+    def _make_run_steps(self):
+        if self._run_steps is not None:
+            return
+        assert "emulator" not in self.sims or "loihi" not in self.sims
+        if "emulator" in self.sims:
+            self._make_emu_run_steps()
+        else:
+            self._make_loihi_run_steps()
+
+    def _make_emu_run_steps(self):
+        host_pre = self.sims.get("host_pre", None)
+        emulator = self.sims["emulator"]
+        host = self.sims.get("host", None)
+
+        if self.precompute:
+            if host_pre is not None and host is not None:
+
+                def emu_precomputed_host_pre_and_host(steps):
+                    host_pre.run_steps(steps)
+                    emulator.host2chip()
+                    emulator.run_steps(steps)
+                    emulator.chip2host()
+                    host.run_steps(steps)
+                self._run_steps = emu_precomputed_host_pre_and_host
+
+            elif host_pre is not None:
+
+                def emu_precomputed_host_pre_only(steps):
+                    host_pre.run_steps(steps)
+                    emulator.host2chip()
+                    emulator.run_steps(steps)
+                self._run_steps = emu_precomputed_host_pre_only
+
+            elif host is not None:
+
+                def emu_precomputed_host_only(steps):
+                    emulator.run_steps(steps)
+                    emulator.chip2host()
+                    host.run_steps(steps)
+                self._run_steps = emu_precomputed_host_only
+
+            else:
+                self._run_steps = emulator.run_steps
+
+        else:
+            assert host is not None, "Model is precomputable"
+
+            def emu_bidirectional_with_host(steps):
+                for _ in range(steps):
+                    host.step()
+                    emulator.host2chip()
+                    emulator.step()
+                    emulator.chip2host()
+            self._run_steps = emu_bidirectional_with_host
+
+    def _make_loihi_run_steps(self):
+        host_pre = self.sims.get("host_pre", None)
+        loihi = self.sims["loihi"]
+        host = self.sims.get("host", None)
+
+        if self.precompute:
+            if host_pre is not None and host is not None:
+
+                def loihi_precomputed_host_pre_and_host(steps):
+                    host_pre.run_steps(steps)
+                    loihi.send_spikes()
+                    loihi.run_steps(steps, blocking=True)
+                    loihi.chip2host_precomputed()
+                    host.run_steps(steps)
+                self._run_steps = loihi_precomputed_host_pre_and_host
+
+            elif host_pre is not None:
+
+                def loihi_precomputed_host_pre_only(steps):
+                    host_pre.run_steps(steps)
+                    loihi.send_spikes()
+                    loihi.run_steps(steps, blocking=True)
+                self._run_steps = loihi_precomputed_host_pre_only
+
+            elif host is not None:
+
+                def loihi_precomputed_host_only(steps):
+                    loihi.run_steps(steps)
+                    loihi.chip2host_precomputed()
+                    host.run_steps(steps)
+                self._run_steps = loihi_precomputed_host_only
+
+            else:
+                self._run_steps = loihi.run_steps
+
+        else:
+            assert host is not None, "Model is precomputable"
+
+            def loihi_bidirectional_with_host(steps):
+                loihi.create_io_snip()
+                loihi.run_steps(steps, blocking=False)
+                for _ in range(steps):
+                    host.step()
+                    loihi.host2chip()
+                    loihi.chip2host()
+                logger.info("Waiting for run_steps to complete...")
+                loihi.wait_for_completion()
+                logger.info("run_steps completed")
+            self._run_steps = loihi_bidirectional_with_host
+
     def run_steps(self, steps):
         """Simulate for the given number of ``dt`` steps.
 
@@ -401,231 +483,13 @@ def run_steps(self, steps):
         """
         if self.closed:
             raise SimulatorClosed("Simulator cannot run because it is closed.")
-
-        if "emulator" in self.sims:
-            if self.precompute:
-                self.sims["host_pre"].run_steps(steps)
-                self.handle_host2chip_communications()
-                self.sims["emulator"].run_steps(steps)
-                self.handle_chip2host_communications()
-                self.sims["host_post"].run_steps(steps)
-            elif "host" in self.sims:
-                for i in range(steps):
-                    self.sims["host"].step()
-                    self.handle_host2chip_communications()
-                    self.sims["emulator"].step()
-                    self.handle_chip2host_communications()
-            else:
-                raise Exception
-                self.sims["emulator"].run_steps(steps)
-        elif "loihi" in self.sims:
-            if self.precompute:
-                self.sims["host_pre"].run_steps(steps)
-                self.handle_host2chip_communications()
-                self.sims["loihi"].run_steps(steps, blocking=True)
-                self.handle_chip2host_communications()
-                self.sims["host_post"].run_steps(steps)
-            elif "host" in self.sims:
-                self.sims["loihi"].create_io_snip()
-                self.sims["loihi"].run_steps(steps, blocking=False)
-                for i in range(steps):
-                    self.sims["host"].run_steps(1)
-                    self.handle_host2chip_communications()
-                    self.handle_chip2host_communications()
-
-                logger.info("Waiting for completion")
-                self.sims["loihi"].wait_for_completion()
-                logger.info("done")
-            else:
-                raise Exception
-                self.sims["loihi"].run_steps(steps, blocking=True)
-
+        if self._run_steps is None:
+            self._make_run_steps()
+        self._run_steps(steps)
         self._n_steps += steps
         logger.info("Finished running for %d steps", steps)
         self._probe()
 
-    def handle_host2chip_communications(self):  # noqa: C901
-        if "emulator" in self.sims:
-            if self.precompute or "host" in self.sims:
-                # go through the list of host2chip connections
-                for sender, receiver in self.host2chip_senders.items():
-                    learning_rate = 50  # This is set to match hardware
-                    if isinstance(receiver, PESModulatoryTarget):
-                        for t, x in sender.queue:
-                            probe = receiver.target
-                            conn = self.model.probe_conns[probe]
-                            dec_syn = self.model.objs[conn]['decoders']
-                            assert dec_syn.tracing
-
-                            z = self.sims["emulator"].z[dec_syn]
-                            x = np.hstack([-x, x])
-
-                            delta_w = np.outer(z, x) * learning_rate
-
-                            for i, w in enumerate(dec_syn.weights):
-                                w += delta_w[i].astype('int32')
-                    else:
-                        for t, x in sender.queue:
-                            receiver.receive(t, x)
-                    del sender.queue[:]
-        elif "loihi" in self.sims:
-            if self.precompute:
-                # go through the list of host2chip connections
-                items = []
-                for sender, receiver in self.host2chip_senders.items():
-                    for t, x in sender.queue:
-                        receiver.receive(t, x)
-                    del sender.queue[:]
-                    spike_input = receiver.cx_spike_input
-                    sent_count = spike_input.sent_count
-                    while sent_count < len(spike_input.spikes):
-                        for j, s in enumerate(spike_input.spikes[sent_count]):
-                            if s:
-                                for output_axon in spike_input.axon_ids:
-                                    items.append(
-                                        (sent_count,) + output_axon[j])
-                        sent_count += 1
-                    spike_input.sent_count = sent_count
-                if len(items) > 0:
-                    for info in sorted(items):
-                        spike_input.spike_gen.addSpike(*info)
-            elif "host" in self.sims:
-                to_send = []
-                errors = []
-                # go through the list of host2chip connections
-                for sender, receiver in self.host2chip_senders.items():
-                    if isinstance(receiver, PESModulatoryTarget):
-                        for t, x in sender.queue:
-                            x = (100 * x).astype(int)
-                            x = np.clip(x, -100, 100, out=x)
-                            probe = receiver.target
-                            conn = self.model.probe_conns[probe]
-                            dec_cx = self.model.objs[conn]['decoded']
-                            for core in (
-                                    self.sims["loihi"].board.chips[0].cores):
-                                for group in core.groups:
-                                    if group == dec_cx:
-                                        # TODO: assumes one group per core
-                                        coreid = core.learning_coreid
-                                    break
-
-                            assert coreid is not None
-
-                            errors.append([coreid, len(x)] + x.tolist())
-                        del sender.queue[:]
-
-                    else:
-                        for t, x in sender.queue:
-                            receiver.receive(t, x)
-                        del sender.queue[:]
-                        spike_input = receiver.cx_spike_input
-                        sent_count = spike_input.sent_count
-                        axon_ids = spike_input.axon_ids
-                        spikes = spike_input.spikes
-                        while sent_count < len(spikes):
-                            for j, s in enumerate(spikes[sent_count]):
-                                if s:
-                                    for output_axon in axon_ids:
-                                        to_send.append(output_axon[j])
-                            sent_count += 1
-                        spike_input.sent_count = sent_count
-
-                max_spikes = self.sims["loihi"].snip_max_spikes_per_step
-                if len(to_send) > max_spikes:
-                    warnings.warn("Too many spikes (%d) sent in one time "
-                                  "step.  Increase the value of "
-                                  "snip_max_spikes_per_step (currently "
-                                  "set to %d)" % (len(to_send), max_spikes))
-                    del to_send[max_spikes:]
-
-                msg = [len(to_send)]
-                for spike in to_send:
-                    assert spike[0] == 0
-                    msg.extend(spike[1:3])
-                for error in errors:
-                    msg.extend(error)
-                self.sims["loihi"].nengo_io_h2c.write(len(msg), msg)
-            else:
-                raise NotImplementedError()
-
-    def handle_chip2host_communications(self):  # noqa: C901
-        if "emulator" in self.sims:
-            if self.precompute or "host" in self.sims:
-                # go through the list of chip2host connections
-                i = self.chip2host_sent_steps
-                increment = None
-                for probe, receiver in self.chip2host_receivers.items():
-                    # extract the probe data from the simulator
-                    cx_probe = self.sims["emulator"].model.objs[probe]['out']
-
-                    x = self.sims["emulator"].probe_outputs[cx_probe][i:]
-                    if len(x) > 0:
-                        if increment is None:
-                            increment = len(x)
-                        else:
-                            assert increment == len(x)
-                        if cx_probe.weights is not None:
-                            x = np.dot(x, cx_probe.weights)
-
-                        for j in range(len(x)):
-                            receiver.receive(self.dt * (i + j + 2), x[j])
-                if increment is not None:
-                    self.chip2host_sent_steps += increment
-            else:
-                raise NotImplementedError()
-        elif "loihi" in self.sims:
-            if self.precompute:
-                # go through the list of chip2host connections
-                increment = None
-                for probe, receiver in self.chip2host_receivers.items():
-                    # extract the probe data from the simulator
-                    cx_probe = self.sims["loihi"].model.objs[probe]['out']
-                    n2probe = self.sims["loihi"].board.probe_map[cx_probe]
-                    x = np.column_stack([
-                        p.timeSeries.data[self.chip2host_sent_steps:]
-                        for p in n2probe])
-                    if len(x) > 0:
-                        if increment is None:
-                            increment = len(x)
-                        else:
-                            assert increment == len(x)
-                        if cx_probe.weights is not None:
-                            x = np.dot(x, cx_probe.weights)
-                        for j in range(len(x)):
-                            receiver.receive(
-                                self.dt * (self.chip2host_sent_steps + j + 2),
-                                x[j])
-                if increment is not None:
-                    self.chip2host_sent_steps += increment
-            elif "host" in self.sims:
-                count = self.sims["loihi"].nengo_io_c2h_count
-                data = self.sims["loihi"].nengo_io_c2h.read(count)
-                time_step, data = data[0], np.array(data[1:])
-                snip_range = self.sims["loihi"].nengo_io_snip_range
-                for cx_probe, probe in self.cx_probe2probe.items():
-                    x = data[snip_range[cx_probe]]
-                    if cx_probe.key == 's':
-                        if isinstance(cx_probe.target, CxGroup):
-                            refract_delays = cx_probe.target.refractDelay
-                        else:
-                            refract_delays = 1
-
-                        # Loihi uses the voltage value to indicate where we
-                        # are in the refractory period. We want to find neurons
-                        # starting their refractory period.
-                        x = (x == refract_delays * 128)
-                    if cx_probe.weights is not None:
-                        x = np.dot(x, cx_probe.weights)
-                    receiver = self.chip2host_receivers.get(probe, None)
-                    if receiver is not None:
-                        # chip->host
-                        receiver.receive(self.dt*(time_step), x)
-                    else:
-                        # onchip probes
-                        self.snip_probes[probe].append(x)
-            else:
-                raise NotImplementedError()
-
     def trange(self, sample_every=None):
         """Create a vector of times matching probed data.
 
diff --git a/nengo_loihi/splitter.py b/nengo_loihi/splitter.py
index c7335f407..205ee6990 100644
--- a/nengo_loihi/splitter.py
+++ b/nengo_loihi/splitter.py
@@ -1,11 +1,14 @@
 from collections import defaultdict
 import logging
+import warnings
 
 import nengo
-from nengo.exceptions import BuildError, SimulationError
+from nengo.exceptions import BuildError
 import numpy as np
 
-from nengo_loihi import loihi_cx
+from nengo_loihi.loihi_cx import (
+    ChipReceiveNode, ChipReceiveNeurons, HostSendNode, HostReceiveNode,
+    PESModulatoryTarget)
 from nengo_loihi.neurons import NIF
 
 logger = logging.getLogger(__name__)
@@ -125,8 +128,9 @@ def split(net, precompute, max_rate, inter_tau):
     # Commit to the moves marked in the previous steps
     networks.finalize()
     if precompute:
-        assert len(networks.host_pre.all_objects) > 0, (
-            "No precomputable objects")
+        if len(networks.host_pre.all_objects) == 0:
+            warnings.warn("No precomputable objects. Setting precompute=True "
+                          "has no effect.")
     else:
         assert len(networks.host_pre.all_objects) == 0, (
             "Object erroneously added to host_pre")
@@ -419,69 +423,3 @@ def split_pre_from_host(networks):  # noqa: C901
                                      "as it is dependent on output")
                 networks.move(obj, "host_pre", force=True)
                 queue.append(obj)
-
-
-class PESModulatoryTarget(object):
-    def __init__(self, target):
-        self.target = target
-
-
-class HostSendNode(nengo.Node):
-    """For sending host->chip messages"""
-
-    def __init__(self, dimensions):
-        self.queue = []
-        super(HostSendNode, self).__init__(self.update,
-                                           size_in=dimensions, size_out=0)
-
-    def update(self, t, x):
-        assert len(self.queue) == 0 or t > self.queue[-1][0]
-        self.queue.append((t, x))
-
-
-class HostReceiveNode(nengo.Node):
-    """For receiving chip->host messages"""
-
-    def __init__(self, dimensions):
-        self.queue = [(0, np.zeros(dimensions))]
-        self.queue_index = 0
-        super(HostReceiveNode, self).__init__(self.update,
-                                              size_in=0, size_out=dimensions)
-
-    def update(self, t):
-        while (len(self.queue) > self.queue_index + 1
-               and self.queue[self.queue_index][0] < t):
-            self.queue_index += 1
-        return self.queue[self.queue_index][1]
-
-    def receive(self, t, x):
-        self.queue.append((t, x))
-
-
-class ChipReceiveNode(nengo.Node):
-    """For receiving host->chip messages"""
-
-    def __init__(self, dimensions, size_out):
-        self.raw_dimensions = dimensions
-        self.cx_spike_input = loihi_cx.CxSpikeInput(
-            np.zeros((0, dimensions), dtype=bool))
-        self.last_time = None
-        super(ChipReceiveNode, self).__init__(self.update,
-                                              size_in=0, size_out=size_out)
-
-    def update(self, t):
-        raise SimulationError("ChipReceiveNodes should not be run")
-
-    def receive(self, t, x):
-        assert self.last_time is None or t > self.last_time
-        # TODO: make this stacking efficient
-        self.cx_spike_input.spikes = np.vstack([self.cx_spike_input.spikes,
-                                                [x > 0]])
-        self.last_time = t
-
-
-class ChipReceiveNeurons(ChipReceiveNode):
-    """Passes spikes directly (no on-off neuron encoding)"""
-    def __init__(self, dimensions, neuron_type=None):
-        self.neuron_type = neuron_type
-        super(ChipReceiveNeurons, self).__init__(dimensions, dimensions)
diff --git a/nengo_loihi/tests/test_loihi_cx.py b/nengo_loihi/tests/test_loihi_cx.py
index 014e917fa..bd2e14451 100644
--- a/nengo_loihi/tests/test_loihi_cx.py
+++ b/nengo_loihi/tests/test_loihi_cx.py
@@ -6,6 +6,7 @@
 from nengo_loihi.loihi_api import VTH_MAX
 from nengo_loihi.loihi_cx import (
     CxAxons, CxGroup, CxModel, CxProbe, CxSimulator, CxSpikeInput, CxSynapses)
+from nengo_loihi.loihi_interface import LoihiSimulator
 
 
 def test_simulator_noise(request, plt, seed):
@@ -29,12 +30,12 @@ def test_simulator_noise(request, plt, seed):
     model.discretize()
 
     if target == 'loihi':
-        with model.get_loihi(seed=seed) as sim:
+        with LoihiSimulator(model, use_snips=False, seed=seed) as sim:
             sim.run_steps(1000)
             y = np.column_stack([
                 p.timeSeries.data for p in sim.board.probe_map[probe]])
     else:
-        with model.get_simulator(seed=seed) as sim:
+        with CxSimulator(model, seed=seed) as sim:
             sim.run_steps(1000)
         y = sim.probe_outputs[probe]
 
@@ -131,7 +132,7 @@ def test_uv_overflow(n_axons, Simulator, plt, allclose):
     assert CxSimulator.strict  # Tests should be run in strict mode
     CxSimulator.strict = False
     try:
-        emu = model.get_simulator()
+        emu = CxSimulator(model)
         with pytest.warns(UserWarning):
             emu.run_steps(nt)
     finally:
@@ -141,7 +142,7 @@ def test_uv_overflow(n_axons, Simulator, plt, allclose):
     emu_v = np.array(emu.probe_outputs[probe_v])
     emu_s = np.array(emu.probe_outputs[probe_s])
 
-    with model.get_loihi() as sim:
+    with LoihiSimulator(model, use_snips=False) as sim:
         sim.run_steps(nt)
         sim_u = np.column_stack([
             p.timeSeries.data for p in sim.board.probe_map[probe_u]])
diff --git a/nengo_loihi/tests/test_simulator.py b/nengo_loihi/tests/test_simulator.py
index 9502d53f2..f3fb519b9 100644
--- a/nengo_loihi/tests/test_simulator.py
+++ b/nengo_loihi/tests/test_simulator.py
@@ -1,3 +1,5 @@
+import inspect
+
 import nengo
 import numpy as np
 import pytest
@@ -138,3 +140,121 @@ def test_close(Simulator, precompute):
 
     assert sim.closed
     assert all(s.closed for s in sim.sims.values())
+
+
+def test_all_run_steps(Simulator):
+    # Case 1. No objects on host, so no host and no host_pre
+    with nengo.Network() as net:
+        pre = nengo.Ensemble(10, 1)
+        post = nengo.Ensemble(10, 1)
+        nengo.Connection(pre, post)
+
+    # 1a. precompute=False, no host
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    # Since no objects on host, we should be precomputing even if we did not
+    # explicitly request precomputing
+    assert sim.precompute
+    assert inspect.ismethod(sim._run_steps)
+    assert sim._run_steps.__name__ == "run_steps"
+
+    # 1b. precompute=True, no host, no host_pre
+    with pytest.warns(UserWarning) as record:
+        with Simulator(net, precompute=True) as sim:
+            sim.run(0.001)
+    assert any("No precomputable objects" in r.message.args[0] for r in record)
+    assert inspect.ismethod(sim._run_steps)
+    assert sim._run_steps.__name__ == "run_steps"
+
+    # Case 2: Add a precomputable off-chip object, so we have either host or
+    # host_pre but not both host and host_pre
+    with net:
+        stim = nengo.Node(1)
+        stim_conn = nengo.Connection(stim, pre)
+
+    # 2a. precompute=False, host
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_bidirectional_with_host")
+
+    # 2b. precompute=True, no host, host_pre
+    with Simulator(net, precompute=True) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_precomputed_host_pre_only")
+
+    # Case 3: Add a non-precomputable off-chip object so we have host
+    # and host_pre
+    with net:
+        out = nengo.Node(size_in=1)
+        nengo.Connection(post, out)
+
+    # 3a. precompute=False, host (same as 2a)
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_bidirectional_with_host")
+
+    # 3b. precompute=True, host, host_pre
+    with Simulator(net, precompute=True) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_precomputed_host_pre_and_host")
+
+    # Case 4: Delete the precomputable off-chip object, so we have host only
+    net.nodes.remove(stim)
+    net.connections.remove(stim_conn)
+
+    # 4a. precompute=False, host (same as 2a and 3a)
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_bidirectional_with_host")
+
+    # 4b. precompute=True, host, no host_pre
+    with pytest.warns(UserWarning) as record:
+        with Simulator(net, precompute=True) as sim:
+            sim.run(0.001)
+    assert any("No precomputable objects" in r.message.args[0] for r in record)
+    assert sim._run_steps.__name__.endswith("_precomputed_host_only")
+
+
+def test_no_precomputable(Simulator):
+    with nengo.Network() as net:
+        active_ens = nengo.Ensemble(10, 1,
+                                    gain=np.ones(10) * 10,
+                                    bias=np.ones(10) * 10)
+        out = nengo.Node(size_in=10)
+        nengo.Connection(active_ens.neurons, out)
+        out_p = nengo.Probe(out)
+
+    with pytest.warns(UserWarning) as record:
+        with Simulator(net, precompute=True) as sim:
+            sim.run(0.01)
+
+    assert sim._run_steps.__name__.endswith("precomputed_host_only")
+    # Should warn that no objects are precomputable
+    assert any("No precomputable objects" in r.message.args[0] for r in record)
+    # But still mark the sim as precomputable for speed reasons, because
+    # there are no inputs that depend on outputs in this case
+    assert sim.precompute
+    assert sim.data[out_p].shape[0] == sim.trange().shape[0]
+    assert np.all(sim.data[out_p][-1] > 100)
+
+
+def test_all_onchip(Simulator):
+    with nengo.Network() as net:
+        active_ens = nengo.Ensemble(10, 1,
+                                    gain=np.ones(10) * 10,
+                                    bias=np.ones(10) * 10)
+        out = nengo.Ensemble(10, 1, gain=np.ones(10), bias=np.ones(10))
+        nengo.Connection(active_ens.neurons, out.neurons,
+                         transform=np.eye(10) * 10)
+        out_p = nengo.Probe(out.neurons)
+
+    with Simulator(net) as sim:
+        sim.run(0.01)
+
+    # Though we did not specify precompute, the model should be marked as
+    # precomputable because there are no off-chip objects
+    assert sim.precompute
+    assert inspect.ismethod(sim._run_steps)
+    assert sim._run_steps.__name__ == "run_steps"
+    assert sim.data[out_p].shape[0] == sim.trange().shape[0]
+    assert np.all(sim.data[out_p][-1] > 100)