diff --git a/nengo_loihi/builder.py b/nengo_loihi/builder.py
index 504c88d27..dbe51b307 100644
--- a/nengo_loihi/builder.py
+++ b/nengo_loihi/builder.py
@@ -15,9 +15,16 @@
 import nengo.utils.numpy as npext
 
 from nengo_loihi.loihi_cx import (
-    CxModel, CxGroup, CxSynapses, CxAxons, CxProbe, CxSpikeInput)
+    ChipReceiveNeurons,
+    ChipReceiveNode,
+    CxAxons,
+    CxGroup,
+    CxModel,
+    CxProbe,
+    CxSpikeInput,
+    CxSynapses,
+)
 from nengo_loihi.neurons import loihi_rates
-from nengo_loihi.splitter import ChipReceiveNeurons, ChipReceiveNode
 
 logger = logging.getLogger(__name__)
 
@@ -74,7 +81,6 @@ def __init__(self, dt=0.001, label=None, builder=None):
         self.objs = collections.defaultdict(dict)
         self.params = {}  # Holds data generated when building objects
         self.probes = []
-        self.chip2host_params = None  # Will be provided by Simulator
         self.probe_conns = {}
 
         self.seeds = {}
@@ -105,6 +111,11 @@ def __init__(self, dt=0.001, label=None, builder=None):
         # limit for clipping intercepts, to avoid neurons with high gains
         self.intercept_limit = 0.95
 
+        # Will be provided by Simulator
+        self.chip2host_params = None
+        self.chip2host_receivers = None
+        self.host2chip_senders = None
+
     @property
     def inter_rate(self):
         return (1. / (self.dt * self.inter_n) if self._inter_rate is None else
diff --git a/nengo_loihi/loihi_cx.py b/nengo_loihi/loihi_cx.py
index 0f4dc0078..d3af8a430 100644
--- a/nengo_loihi/loihi_cx.py
+++ b/nengo_loihi/loihi_cx.py
@@ -5,6 +5,7 @@
 import warnings
 
 import numpy as np
+import nengo
 from nengo.exceptions import BuildError, SimulationError
 from nengo.utils.compat import is_iterable
 
@@ -479,13 +480,6 @@ def discretize(self):
         for group in self.cx_groups:
             group.discretize()
 
-    def get_loihi(self, seed=None):
-        from nengo_loihi.loihi_interface import LoihiSimulator
-        return LoihiSimulator(self, seed=seed)
-
-    def get_simulator(self, seed=None):
-        return CxSimulator(self, seed=seed)
-
     def validate(self):
         if len(self.cx_groups) == 0:
             raise BuildError("No neurons marked for execution on-chip. "
@@ -510,6 +504,7 @@ def __init__(self, model, seed=None):
 
         self.build(model, seed=seed)
 
+        self._chip2host_sent_steps = 0
         self._probe_filters = {}
         self._probe_filter_pos = {}
 
@@ -526,27 +521,6 @@ def error(cls, msg):
         else:
             warnings.warn(msg)
 
-    def clear(self):
-        """Clear all signals set in `build` (to free up memory)"""
-        self.q = None
-        self.u = None
-        self.v = None
-        self.s = None
-        self.c = None
-        self.w = None
-
-        self.vth = None
-        self.vmin = None
-        self.vmax = None
-
-        self.bias = None
-        self.ref = None
-        self.a_in = None
-        self.z = None
-
-        self.noiseGen = None
-        self.noiseTarget = None
-
     def build(self, model, seed=None):  # noqa: C901
         """Set up NumPy arrays to emulate chip memory and I/O."""
         model.validate()
@@ -684,6 +658,76 @@ def noiseGen(n=self.n_cx, rng=self.rng):
         self.noiseGen = noiseGen
         self.noiseTarget = noiseTarget
 
+    def clear(self):
+        """Clear all signals set in `build` (to free up memory)"""
+        self.q = None
+        self.u = None
+        self.v = None
+        self.s = None
+        self.c = None
+        self.w = None
+
+        self.vth = None
+        self.vmin = None
+        self.vmax = None
+
+        self.bias = None
+        self.ref = None
+        self.a_in = None
+        self.z = None
+
+        self.noiseGen = None
+        self.noiseTarget = None
+
+    def close(self):
+        self.closed = True
+        self.clear()
+
+    def chip2host(self):
+        # go through the list of chip2host connections
+        increment = None
+        for probe, receiver in self.model.chip2host_receivers.items():
+            # extract the probe data from the simulator
+            cx_probe = self.model.objs[probe]['out']
+            x = self.probe_outputs[cx_probe][self._chip2host_sent_steps:]
+            if len(x) > 0:
+                if increment is None:
+                    increment = len(x)
+                else:
+                    assert increment == len(x)
+                if cx_probe.weights is not None:
+                    x = np.dot(x, cx_probe.weights)
+                for j in range(len(x)):
+                    receiver.receive(
+                        self.model.dt * (self._chip2host_sent_steps + j + 2),
+                        x[j]
+                    )
+        if increment is not None:
+            self._chip2host_sent_steps += increment
+
+    def host2chip(self):
+        # go through the list of host2chip connections
+        for sender, receiver in self.model.host2chip_senders.items():
+            learning_rate = 50  # This is set to match hardware
+            if isinstance(receiver, PESModulatoryTarget):
+                for t, x in sender.queue:
+                    probe = receiver.target
+                    conn = self.model.probe_conns[probe]
+                    dec_syn = self.model.objs[conn]['decoders']
+                    assert dec_syn.tracing
+
+                    z = self.z[dec_syn]
+                    x = np.hstack([-x, x])
+
+                    delta_w = np.outer(z, x) * learning_rate
+
+                    for i, w in enumerate(dec_syn.weights):
+                        w += delta_w[i].astype('int32')
+            else:
+                for t, x in sender.queue:
+                    receiver.receive(t, x)
+            del sender.queue[:]
+
     def step(self):  # noqa: C901
         """Advance the simulation by 1 step (``dt`` seconds)."""
 
@@ -829,6 +873,68 @@ def get_probe_output(self, probe):
         x = x if cx_probe.weights is None else np.dot(x, cx_probe.weights)
         return self._filter_probe(cx_probe, x)
 
-    def close(self):
-        self.closed = True
-        self.clear()
+
+class PESModulatoryTarget(object):
+    def __init__(self, target):
+        self.target = target
+
+
+class HostSendNode(nengo.Node):
+    """For sending host->chip messages"""
+
+    def __init__(self, dimensions):
+        self.queue = []
+        super(HostSendNode, self).__init__(self.update,
+                                           size_in=dimensions, size_out=0)
+
+    def update(self, t, x):
+        assert len(self.queue) == 0 or t > self.queue[-1][0]
+        self.queue.append((t, x))
+
+
+class HostReceiveNode(nengo.Node):
+    """For receiving chip->host messages"""
+
+    def __init__(self, dimensions):
+        self.queue = [(0, np.zeros(dimensions))]
+        self.queue_index = 0
+        super(HostReceiveNode, self).__init__(self.update,
+                                              size_in=0, size_out=dimensions)
+
+    def update(self, t):
+        while (len(self.queue) > self.queue_index + 1
+               and self.queue[self.queue_index][0] < t):
+            self.queue_index += 1
+        return self.queue[self.queue_index][1]
+
+    def receive(self, t, x):
+        self.queue.append((t, x))
+
+
+class ChipReceiveNode(nengo.Node):
+    """For receiving host->chip messages"""
+
+    def __init__(self, dimensions, size_out):
+        self.raw_dimensions = dimensions
+        self.cx_spike_input = CxSpikeInput(
+            np.zeros((0, dimensions), dtype=bool))
+        self.last_time = None
+        super(ChipReceiveNode, self).__init__(self.update,
+                                              size_in=0, size_out=size_out)
+
+    def update(self, t):
+        raise SimulationError("ChipReceiveNodes should not be run")
+
+    def receive(self, t, x):
+        assert self.last_time is None or t > self.last_time
+        # TODO: make this stacking efficient
+        self.cx_spike_input.spikes = np.vstack([self.cx_spike_input.spikes,
+                                                [x > 0]])
+        self.last_time = t
+
+
+class ChipReceiveNeurons(ChipReceiveNode):
+    """Passes spikes directly (no on-off neuron encoding)"""
+    def __init__(self, dimensions, neuron_type=None):
+        self.neuron_type = neuron_type
+        super(ChipReceiveNeurons, self).__init__(dimensions, dimensions)
diff --git a/nengo_loihi/loihi_interface.py b/nengo_loihi/loihi_interface.py
index d50074e24..6dc1e1b49 100644
--- a/nengo_loihi/loihi_interface.py
+++ b/nengo_loihi/loihi_interface.py
@@ -10,6 +10,7 @@
 import jinja2
 import numpy as np
 
+import nengo
 from nengo.exceptions import SimulationError
 
 try:
@@ -30,6 +31,7 @@ def no_nxsdk(*args, **kwargs):
 from nengo_loihi.allocators import one_to_one_allocator
 from nengo_loihi.loihi_api import (
     CX_PROFILES_MAX, VTH_PROFILES_MAX, bias_to_manexp)
+from nengo_loihi.loihi_cx import CxGroup, PESModulatoryTarget
 
 logger = logging.getLogger(__name__)
 
@@ -385,14 +387,17 @@ class LoihiSimulator(object):
         the nengo_io_h2c channel on one timestep.
     """
 
-    def __init__(self, cx_model, seed=None, snip_max_spikes_per_step=50):
+    def __init__(self, cx_model,
+                 use_snips=True, seed=None, snip_max_spikes_per_step=50):
         self.closed = False
-
         self.check_nxsdk_version()
 
         self.n2board = None
         self._probe_filters = {}
         self._probe_filter_pos = {}
+        self._snip_probes = {}
+        self._cx_probe2probe = {}
+        self._chip2host_sent_steps = 0
         self.snip_max_spikes_per_step = snip_max_spikes_per_step
 
         nxsdk_dir = os.path.realpath(
@@ -409,7 +414,7 @@ def __init__(self, cx_model, seed=None, snip_max_spikes_per_step=50):
         # from previous simulators
         N2SpikeProbe.probeDict.clear()
 
-        self.build(cx_model, seed=seed)
+        self.build(cx_model, use_snips=use_snips, seed=seed)
 
     def __enter__(self):
         return self
@@ -435,8 +440,30 @@ def check_nxsdk_version():
                           "version (%s); latest fully supported version is "
                           "%s" % (version, max_tested))
 
-    def build(self, cx_model, seed=None):
+    def build(self, cx_model, use_snips=True, seed=None):
         cx_model.validate()
+
+        if use_snips:
+            # tag all probes as being snip-based,
+            # having normal probes at the same time as snips causes problems
+            for group in cx_model.cx_groups.keys():
+                for cx_probe in group.probes:
+                    cx_probe.use_snip = True
+            # create a place to store data from snip probes
+            for probe in cx_model.probes:
+                self._snip_probes[probe] = []
+
+            # map CxProbes to their nengo.Probes
+            for obj in cx_model.objs:
+                if isinstance(obj, nengo.Probe):
+                    # actual nengo.Probes on chip objects
+                    cx_probe = cx_model.objs[obj]['out']
+                    self._cx_probe2probe[cx_probe] = obj
+            for probe in cx_model.chip2host_receivers:
+                # probes used for chip->host communication
+                cx_probe = cx_model.objs[probe]['out']
+                self._cx_probe2probe[cx_probe] = probe
+
         self.model = cx_model
 
         # --- allocate --
@@ -458,6 +485,140 @@ def run_steps(self, steps, blocking=True):
         self.connect()
         self.n2board.run(steps, aSync=not blocking)
 
+    def chip2host(self):
+        count = self.nengo_io_c2h_count
+        data = self.nengo_io_c2h.read(count)
+        time_step, data = data[0], np.array(data[1:])
+        snip_range = self.nengo_io_snip_range
+        for cx_probe, probe in self._cx_probe2probe.items():
+            x = data[snip_range[cx_probe]]
+            if cx_probe.key == 's':
+                if isinstance(cx_probe.target, CxGroup):
+                    refract_delays = cx_probe.target.refractDelay
+                else:
+                    refract_delays = 1
+
+                # Loihi uses the voltage value to indicate where we
+                # are in the refractory period. We want to find neurons
+                # starting their refractory period.
+                x = (x == refract_delays * 128)
+            if cx_probe.weights is not None:
+                x = np.dot(x, cx_probe.weights)
+            receiver = self.model.chip2host_receivers.get(probe, None)
+            if receiver is not None:
+                # chip->host
+                receiver.receive(self.model.dt * time_step, x)
+            else:
+                # onchip probes
+                self._snip_probes[probe].append(x)
+
+    def chip2host_precomputed(self):
+        # TODO: this is almost identical to CxSimulator.chip2host
+        increment = None
+        for probe, receiver in self.model.chip2host_receivers.items():
+            # extract the probe data from the simulator
+            cx_probe = self.model.objs[probe]['out']
+            n2probe = self.board.probe_map[cx_probe]
+            x = np.column_stack([
+                p.timeSeries.data[self._chip2host_sent_steps:]
+                for p in n2probe])
+            if len(x) > 0:
+                if increment is None:
+                    increment = len(x)
+                else:
+                    assert increment == len(x)
+                if cx_probe.weights is not None:
+                    x = np.dot(x, cx_probe.weights)
+                for j in range(len(x)):
+                    receiver.receive(
+                        self.model.dt * (self._chip2host_sent_steps + j + 2),
+                        x[j])
+        if increment is not None:
+            self._chip2host_sent_steps += increment
+
+    def send_spikes(self):
+        # TODO: this is almost the same as _host2chip_spikes
+        items = []
+        for sender, receiver in self.model.host2chip_senders.items():
+            for t, x in sender.queue:
+                receiver.receive(t, x)
+            del sender.queue[:]
+            spike_input = receiver.cx_spike_input
+            sent_count = spike_input.sent_count
+            while sent_count < len(spike_input.spikes):
+                for j, s in enumerate(spike_input.spikes[sent_count]):
+                    if s:
+                        for output_axon in spike_input.axon_ids:
+                            items.append(
+                                (sent_count,) + output_axon[j])
+                sent_count += 1
+            spike_input.sent_count = sent_count
+        if len(items) > 0:
+            for info in sorted(items):
+                spike_input.spike_gen.addSpike(*info)
+
+    def host2chip(self):
+        to_send = self._host2chip_spikes()
+        errors = self._host2chip_errors()
+        max_spikes = self.snip_max_spikes_per_step
+        if len(to_send) > max_spikes:
+            warnings.warn("Too many spikes (%d) sent in one time "
+                          "step.  Increase the value of "
+                          "snip_max_spikes_per_step (currently "
+                          "set to %d)" % (len(to_send), max_spikes))
+            del to_send[max_spikes:]
+
+        msg = [len(to_send)]
+        for spike in to_send:
+            assert spike[0] == 0
+            msg.extend(spike[1:3])
+        for error in errors:
+            msg.extend(error)
+        self.nengo_io_h2c.write(len(msg), msg)
+
+    def _host2chip_spikes(self):
+        to_send = []
+        for sender, receiver in self.model.host2chip_senders.items():
+            if hasattr(receiver, "receive"):
+                for t, x in sender.queue:
+                    receiver.receive(t, x)
+                del sender.queue[:]
+                spike_input = receiver.cx_spike_input
+                sent_count = spike_input.sent_count
+                axon_ids = spike_input.axon_ids
+                spikes = spike_input.spikes
+                while sent_count < len(spikes):
+                    for j, s in enumerate(spikes[sent_count]):
+                        if s:
+                            for output_axon in axon_ids:
+                                to_send.append(output_axon[j])
+                    sent_count += 1
+                spike_input.sent_count = sent_count
+        return to_send
+
+    def _host2chip_errors(self):
+        errors = []
+        for sender, receiver in self.model.host2chip_senders.items():
+            if isinstance(receiver, PESModulatoryTarget):
+                for t, x in sender.queue:
+                    x = (100 * x).astype(int)
+                    x = np.clip(x, -100, 100, out=x)
+                    probe = receiver.target
+                    conn = self.model.probe_conns[probe]
+                    dec_cx = self.model.objs[conn]['decoded']
+                    for core in self.board.chips[0].cores:
+                        for group in core.groups:
+                            if group == dec_cx:
+                                # TODO: assumes one group per core
+                                coreid = core.learning_coreid
+                            break
+
+                    assert coreid is not None
+
+                    errors.append([coreid, len(x)] + x.tolist())
+                del sender.queue[:]
+        return errors
+
     def wait_for_completion(self):
         self.n2board.finishRun()
 
@@ -520,6 +681,12 @@ def _filter_probe(self, cx_probe, data):
 
     def get_probe_output(self, probe):
         cx_probe = self.model.objs[probe]['out']
+        if cx_probe.use_snip:
+            data = self._snip_probes[probe]
+            if probe.synapse is not None:
+                return probe.synapse.filt(data, dt=self.model.dt, y0=0)
+            else:
+                return data
         n2probe = self.board.probe_map[cx_probe]
         x = np.column_stack([p.timeSeries.data for p in n2probe])
         x = x if cx_probe.weights is None else np.dot(x, cx_probe.weights)
diff --git a/nengo_loihi/simulator.py b/nengo_loihi/simulator.py
index 89098d2c3..fa458eae3 100644
--- a/nengo_loihi/simulator.py
+++ b/nengo_loihi/simulator.py
@@ -1,3 +1,4 @@
+from collections import OrderedDict
 import logging
 import warnings
 
@@ -10,8 +11,9 @@
 from nengo.simulator import ProbeDict as NengoProbeDict
 
 from nengo_loihi.builder import Model
-from nengo_loihi.loihi_cx import CxGroup
-from nengo_loihi.splitter import PESModulatoryTarget, split
+from nengo_loihi.loihi_cx import CxSimulator
+from nengo_loihi.loihi_interface import LoihiSimulator
+from nengo_loihi.splitter import split
 import nengo_loihi.config as config
 
 logger = logging.getLogger(__name__)
@@ -158,34 +160,40 @@ def __init__(self, network, dt=0.001, seed=None, model=None,  # noqa: C901
                              % (1. / max_rate, self.dt))
         self.precompute = precompute
         self.networks = None
-        self.sims = {}
+        self.sims = OrderedDict()
+        self._run_steps = None
 
-        self.chip2host_sent_steps = 0  # how many timesteps have been sent
         if network is not None:
             nengo.rc.set("decoder_cache", "enabled", "False")
             config.add_params(network)
 
-            # split the host into two or three networks
+            # split the host into one, two or three networks
             self.networks = split(
                 network, precompute, max_rate, self.model.inter_tau)
             network = self.networks.chip
 
-            self.chip2host_receivers = self.networks.chip2host_receivers
-            self.host2chip_senders = self.networks.host2chip_senders
             self.model.chip2host_params = self.networks.chip2host_params
+            self.model.chip2host_receivers = self.networks.chip2host_receivers
+            self.model.host2chip_senders = self.networks.host2chip_senders
 
             self.chip = self.networks.chip
             self.host = self.networks.host
             self.host_pre = self.networks.host_pre
 
-            if precompute:
+            if len(self.host_pre.all_objects) > 0:
                 self.sims["host_pre"] = nengo.Simulator(
                     self.host_pre, dt=self.dt, progress_bar=False)
-                self.sims["host_post"] = nengo.Simulator(
-                    self.host, dt=self.dt, progress_bar=False)
-            else:
+
+            if len(self.host.all_objects) > 0:
                 self.sims["host"] = nengo.Simulator(
                     self.host, dt=self.dt, progress_bar=False)
+            elif not precompute:
+                # If there is no host and precompute=False, then all objects
+                # must be on the chip, which is precomputable in the sense that
+                # no communication has to happen with the host.
+                # We could warn about this, but we want to avoid people having
+                # to specify `precompute` unless they absolutely have to.
+                self.precompute = True
 
             # Build the network into the model
             self.model.build(network)
@@ -208,42 +216,17 @@ def __init__(self, network, dt=0.001, seed=None, model=None,  # noqa: C901
             except ImportError:
                 target = 'sim'
 
-        if target == 'simreal':
-            logger.info("Using real-valued simulator")
-            self.sims["emulator"] = self.model.get_simulator(seed=seed)
-        elif target == 'sim':
-            logger.info("Using discretized simulator")
-            self.model.discretize()  # Make parameters fixed bit widths
-            self.sims["emulator"] = self.model.get_simulator(seed=seed)
+        logger.info("Simulator target is %r", target)
+        logger.info("Simulator precompute is %r", self.precompute)
+
+        if target != "simreal":
+            self.model.discretize()
+
+        if target in ("simreal", "sim"):
+            self.sims["emulator"] = CxSimulator(self.model, seed=seed)
         elif target == 'loihi':
-            logger.info(
-                "Using Loihi hardware with precompute=%s", self.precompute)
-            self.model.discretize()  # Make parameters fixed bit widths
-            if not precompute:
-                # tag all probes as being snipbased
-                #  (having normal probes at the same time as snips
-                #   seems to cause problems)
-                for group in self.model.cx_groups.keys():
-                    for cx_probe in group.probes:
-                        cx_probe.use_snip = True
-                # create a place to store data from snip probes
-                self.snip_probes = {}
-                for probe in network.all_probes:
-                    self.snip_probes[probe] = []
-
-                # create a list of all the CxProbes and their nengo.Probes
-                self.cx_probe2probe = {}
-                for obj in self.model.objs.keys():
-                    if isinstance(obj, nengo.Probe):
-                        # actual nengo.Probes on chip objects
-                        cx_probe = self.model.objs[obj]['out']
-                        self.cx_probe2probe[cx_probe] = obj
-                for probe in self.chip2host_receivers.keys():
-                    # probes used for chip->host communication
-                    cx_probe = self.model.objs[probe]['out']
-                    self.cx_probe2probe[cx_probe] = probe
-
-            self.sims["loihi"] = self.model.get_loihi(seed=seed)
+            self.sims["loihi"] = LoihiSimulator(
+                self.model, use_snips=not self.precompute, seed=seed)
         else:
             raise ValidationError("Must be 'simreal', 'sim', or 'loihi'",
                                   attr="target")
@@ -314,19 +297,14 @@ def _probe(self):
             assert ("loihi" not in self.sims
                     or "emulator" not in self.sims)
             if "loihi" in self.sims:
-                cx_probe = self.sims["loihi"].model.objs[probe]['out']
-                if cx_probe.use_snip:
-                    data = self.snip_probes[probe]
-                    if probe.synapse is not None:
-                        data = probe.synapse.filt(data, dt=self.dt, y0=0)
-                else:
-                    data = self.sims["loihi"].get_probe_output(probe)
+                data = self.sims["loihi"].get_probe_output(probe)
             elif "emulator" in self.sims:
                 data = self.sims["emulator"].get_probe_output(probe)
             # TODO: stop recomputing this all the time
             del self._probe_outputs[probe][:]
             self._probe_outputs[probe].extend(data)
-            assert len(self._probe_outputs[probe]) == self.n_steps
+            assert len(self._probe_outputs[probe]) == self.n_steps, (
+                len(self._probe_outputs[probe]), self.n_steps)
 
     def _probe_step_time(self):
         self._time = self._n_steps * self.dt
@@ -388,9 +366,113 @@ def run(self, time_in_seconds):
 
     def step(self):
         """Advance the simulator by 1 step (``dt`` seconds)."""
-
         self.run_steps(1)
 
+    def _make_run_steps(self):
+        if self._run_steps is not None:
+            return
+        assert "emulator" not in self.sims or "loihi" not in self.sims
+        if "emulator" in self.sims:
+            self._make_emu_run_steps()
+        else:
+            self._make_loihi_run_steps()
+
+    def _make_emu_run_steps(self):
+        host_pre = self.sims.get("host_pre", None)
+        emulator = self.sims["emulator"]
+        host = self.sims.get("host", None)
+
+        if self.precompute:
+            if host_pre is not None and host is not None:
+
+                def emu_precomputed_host_pre_and_host(steps):
+                    host_pre.run_steps(steps)
+                    emulator.host2chip()
+                    emulator.run_steps(steps)
+                    emulator.chip2host()
+                    host.run_steps(steps)
+                self._run_steps = emu_precomputed_host_pre_and_host
+
+            elif host_pre is not None:
+
+                def emu_precomputed_host_pre_only(steps):
+                    host_pre.run_steps(steps)
+                    emulator.host2chip()
+                    emulator.run_steps(steps)
+                self._run_steps = emu_precomputed_host_pre_only
+
+            elif host is not None:
+
+                def emu_precomputed_host_only(steps):
+                    emulator.run_steps(steps)
+                    emulator.chip2host()
+                    host.run_steps(steps)
+                self._run_steps = emu_precomputed_host_only
+
+            else:
+                self._run_steps = emulator.run_steps
+
+        else:
+            assert host is not None, "Model is precomputable"
+
+            def emu_bidirectional_with_host(steps):
+                for _ in range(steps):
+                    host.step()
+                    emulator.host2chip()
+                    emulator.step()
+                    emulator.chip2host()
+            self._run_steps = emu_bidirectional_with_host
+
+    def _make_loihi_run_steps(self):
+        host_pre = self.sims.get("host_pre", None)
+        loihi = self.sims["loihi"]
+        host = self.sims.get("host", None)
+
+        if self.precompute:
+            if host_pre is not None and host is not None:
+
+                def loihi_precomputed_host_pre_and_host(steps):
+                    host_pre.run_steps(steps)
+                    loihi.send_spikes()
+                    loihi.run_steps(steps, blocking=True)
+                    loihi.chip2host_precomputed()
+                    host.run_steps(steps)
+                self._run_steps = loihi_precomputed_host_pre_and_host
+
+            elif host_pre is not None:
+
+                def loihi_precomputed_host_pre_only(steps):
+                    host_pre.run_steps(steps)
+                    loihi.send_spikes()
+                    loihi.run_steps(steps, blocking=True)
+                self._run_steps = loihi_precomputed_host_pre_only
+
+            elif host is not None:
+
+                def loihi_precomputed_host_only(steps):
+                    loihi.run_steps(steps)
+                    loihi.chip2host_precomputed()
+                    host.run_steps(steps)
+                self._run_steps = loihi_precomputed_host_only
+
+            else:
+                self._run_steps = loihi.run_steps
+
+        else:
+            assert host is not None, "Model is precomputable"
+
+            def loihi_bidirectional_with_host(steps):
+                loihi.create_io_snip()
+                loihi.run_steps(steps, blocking=False)
+                for _ in range(steps):
+                    host.step()
+                    loihi.host2chip()
+                    loihi.chip2host()
+                logger.info("Waiting for run_steps to complete...")
+                loihi.wait_for_completion()
+                logger.info("run_steps completed")
+            self._run_steps = loihi_bidirectional_with_host
+
     def run_steps(self, steps):
         """Simulate for the given number of ``dt`` steps.
 
@@ -401,231 +483,13 @@ def run_steps(self, steps):
         """
         if self.closed:
             raise SimulatorClosed("Simulator cannot run because it is closed.")
-
-        if "emulator" in self.sims:
-            if self.precompute:
-                self.sims["host_pre"].run_steps(steps)
-                self.handle_host2chip_communications()
-                self.sims["emulator"].run_steps(steps)
-                self.handle_chip2host_communications()
-                self.sims["host_post"].run_steps(steps)
-            elif "host" in self.sims:
-                for i in range(steps):
-                    self.sims["host"].step()
-                    self.handle_host2chip_communications()
-                    self.sims["emulator"].step()
-                    self.handle_chip2host_communications()
-            else:
-                raise Exception
-                self.sims["emulator"].run_steps(steps)
-        elif "loihi" in self.sims:
-            if self.precompute:
-                self.sims["host_pre"].run_steps(steps)
-                self.handle_host2chip_communications()
-                self.sims["loihi"].run_steps(steps, blocking=True)
-                self.handle_chip2host_communications()
-                self.sims["host_post"].run_steps(steps)
-            elif "host" in self.sims:
-                self.sims["loihi"].create_io_snip()
-                self.sims["loihi"].run_steps(steps, blocking=False)
-                for i in range(steps):
-                    self.sims["host"].run_steps(1)
-                    self.handle_host2chip_communications()
-                    self.handle_chip2host_communications()
-
-                logger.info("Waiting for completion")
-                self.sims["loihi"].wait_for_completion()
-                logger.info("done")
-            else:
-                raise Exception
-                self.sims["loihi"].run_steps(steps, blocking=True)
-
+        if self._run_steps is None:
+            self._make_run_steps()
+        self._run_steps(steps)
         self._n_steps += steps
         logger.info("Finished running for %d steps", steps)
         self._probe()
 
-    def handle_host2chip_communications(self):  # noqa: C901
-        if "emulator" in self.sims:
-            if self.precompute or "host" in self.sims:
-                # go through the list of host2chip connections
-                for sender, receiver in self.host2chip_senders.items():
-                    learning_rate = 50  # This is set to match hardware
-                    if isinstance(receiver, PESModulatoryTarget):
-                        for t, x in sender.queue:
-                            probe = receiver.target
-                            conn = self.model.probe_conns[probe]
-                            dec_syn = self.model.objs[conn]['decoders']
-                            assert dec_syn.tracing
-
-                            z = self.sims["emulator"].z[dec_syn]
-                            x = np.hstack([-x, x])
-
-                            delta_w = np.outer(z, x) * learning_rate
-
-                            for i, w in enumerate(dec_syn.weights):
-                                w += delta_w[i].astype('int32')
-                    else:
-                        for t, x in sender.queue:
-                            receiver.receive(t, x)
-                    del sender.queue[:]
-        elif "loihi" in self.sims:
-            if self.precompute:
-                # go through the list of host2chip connections
-                items = []
-                for sender, receiver in self.host2chip_senders.items():
-                    for t, x in sender.queue:
-                        receiver.receive(t, x)
-                    del sender.queue[:]
-                    spike_input = receiver.cx_spike_input
-                    sent_count = spike_input.sent_count
-                    while sent_count < len(spike_input.spikes):
-                        for j, s in enumerate(spike_input.spikes[sent_count]):
-                            if s:
-                                for output_axon in spike_input.axon_ids:
-                                    items.append(
-                                        (sent_count,) + output_axon[j])
-                        sent_count += 1
-                    spike_input.sent_count = sent_count
-                if len(items) > 0:
-                    for info in sorted(items):
-                        spike_input.spike_gen.addSpike(*info)
-            elif "host" in self.sims:
-                to_send = []
-                errors = []
-                # go through the list of host2chip connections
-                for sender, receiver in self.host2chip_senders.items():
-                    if isinstance(receiver, PESModulatoryTarget):
-                        for t, x in sender.queue:
-                            x = (100 * x).astype(int)
-                            x = np.clip(x, -100, 100, out=x)
-                            probe = receiver.target
-                            conn = self.model.probe_conns[probe]
-                            dec_cx = self.model.objs[conn]['decoded']
-                            for core in (
-                                    self.sims["loihi"].board.chips[0].cores):
-                                for group in core.groups:
-                                    if group == dec_cx:
-                                        # TODO: assumes one group per core
-                                        coreid = core.learning_coreid
-                                    break
-
-                            assert coreid is not None
-
-                            errors.append([coreid, len(x)] + x.tolist())
-                        del sender.queue[:]
-
-                    else:
-                        for t, x in sender.queue:
-                            receiver.receive(t, x)
-                        del sender.queue[:]
-                        spike_input = receiver.cx_spike_input
-                        sent_count = spike_input.sent_count
-                        axon_ids = spike_input.axon_ids
-                        spikes = spike_input.spikes
-                        while sent_count < len(spikes):
-                            for j, s in enumerate(spikes[sent_count]):
-                                if s:
-                                    for output_axon in axon_ids:
-                                        to_send.append(output_axon[j])
-                            sent_count += 1
-                        spike_input.sent_count = sent_count
-
-                max_spikes = self.sims["loihi"].snip_max_spikes_per_step
-                if len(to_send) > max_spikes:
-                    warnings.warn("Too many spikes (%d) sent in one time "
-                                  "step.  Increase the value of "
-                                  "snip_max_spikes_per_step (currently "
-                                  "set to %d)" % (len(to_send), max_spikes))
-                    del to_send[max_spikes:]
-
-                msg = [len(to_send)]
-                for spike in to_send:
-                    assert spike[0] == 0
-                    msg.extend(spike[1:3])
-                for error in errors:
-                    msg.extend(error)
-                self.sims["loihi"].nengo_io_h2c.write(len(msg), msg)
-            else:
-                raise NotImplementedError()
-
-    def handle_chip2host_communications(self):  # noqa: C901
-        if "emulator" in self.sims:
-            if self.precompute or "host" in self.sims:
-                # go through the list of chip2host connections
-                i = self.chip2host_sent_steps
-                increment = None
-                for probe, receiver in self.chip2host_receivers.items():
-                    # extract the probe data from the simulator
-                    cx_probe = self.sims["emulator"].model.objs[probe]['out']
-
-                    x = self.sims["emulator"].probe_outputs[cx_probe][i:]
-                    if len(x) > 0:
-                        if increment is None:
-                            increment = len(x)
-                        else:
-                            assert increment == len(x)
-                        if cx_probe.weights is not None:
-                            x = np.dot(x, cx_probe.weights)
-
-                        for j in range(len(x)):
-                            receiver.receive(self.dt * (i + j + 2), x[j])
-                if increment is not None:
-                    self.chip2host_sent_steps += increment
-            else:
-                raise NotImplementedError()
-        elif "loihi" in self.sims:
-            if self.precompute:
-                # go through the list of chip2host connections
-                increment = None
-                for probe, receiver in self.chip2host_receivers.items():
-                    # extract the probe data from the simulator
-                    cx_probe = self.sims["loihi"].model.objs[probe]['out']
-                    n2probe = self.sims["loihi"].board.probe_map[cx_probe]
-                    x = np.column_stack([
-                        p.timeSeries.data[self.chip2host_sent_steps:]
-                        for p in n2probe])
-                    if len(x) > 0:
-                        if increment is None:
-                            increment = len(x)
-                        else:
-                            assert increment == len(x)
-                        if cx_probe.weights is not None:
-                            x = np.dot(x, cx_probe.weights)
-                        for j in range(len(x)):
-                            receiver.receive(
-                                self.dt * (self.chip2host_sent_steps + j + 2),
-                                x[j])
-                if increment is not None:
-                    self.chip2host_sent_steps += increment
-            elif "host" in self.sims:
-                count = self.sims["loihi"].nengo_io_c2h_count
-                data = self.sims["loihi"].nengo_io_c2h.read(count)
-                time_step, data = data[0], np.array(data[1:])
-                snip_range = self.sims["loihi"].nengo_io_snip_range
-                for cx_probe, probe in self.cx_probe2probe.items():
-                    x = data[snip_range[cx_probe]]
-                    if cx_probe.key == 's':
-                        if isinstance(cx_probe.target, CxGroup):
-                            refract_delays = cx_probe.target.refractDelay
-                        else:
-                            refract_delays = 1
-
-                        # Loihi uses the voltage value to indicate where we
-                        # are in the refractory period. We want to find neurons
-                        # starting their refractory period.
-                        x = (x == refract_delays * 128)
-                    if cx_probe.weights is not None:
-                        x = np.dot(x, cx_probe.weights)
-                    receiver = self.chip2host_receivers.get(probe, None)
-                    if receiver is not None:
-                        # chip->host
-                        receiver.receive(self.dt*(time_step), x)
-                    else:
-                        # onchip probes
-                        self.snip_probes[probe].append(x)
-            else:
-                raise NotImplementedError()
-
     def trange(self, sample_every=None):
         """Create a vector of times matching probed data.
 
diff --git a/nengo_loihi/splitter.py b/nengo_loihi/splitter.py
index c7335f407..205ee6990 100644
--- a/nengo_loihi/splitter.py
+++ b/nengo_loihi/splitter.py
@@ -1,11 +1,14 @@
 from collections import defaultdict
 import logging
+import warnings
 
 import nengo
-from nengo.exceptions import BuildError, SimulationError
+from nengo.exceptions import BuildError
 import numpy as np
 
-from nengo_loihi import loihi_cx
+from nengo_loihi.loihi_cx import (
+    ChipReceiveNode, ChipReceiveNeurons, HostSendNode, HostReceiveNode,
+    PESModulatoryTarget)
 from nengo_loihi.neurons import NIF
 
 logger = logging.getLogger(__name__)
@@ -125,8 +128,9 @@ def split(net, precompute, max_rate, inter_tau):
     # Commit to the moves marked in the previous steps
     networks.finalize()
     if precompute:
-        assert len(networks.host_pre.all_objects) > 0, (
-            "No precomputable objects")
+        if len(networks.host_pre.all_objects) == 0:
+            warnings.warn("No precomputable objects. Setting precompute=True "
+                          "has no effect.")
     else:
         assert len(networks.host_pre.all_objects) == 0, (
             "Object erroneously added to host_pre")
@@ -419,69 +423,3 @@ def split_pre_from_host(networks):  # noqa: C901
                                      "as it is dependent on output")
                 networks.move(obj, "host_pre", force=True)
                 queue.append(obj)
-
-
-class PESModulatoryTarget(object):
-    def __init__(self, target):
-        self.target = target
-
-
-class HostSendNode(nengo.Node):
-    """For sending host->chip messages"""
-
-    def __init__(self, dimensions):
-        self.queue = []
-        super(HostSendNode, self).__init__(self.update,
-                                           size_in=dimensions, size_out=0)
-
-    def update(self, t, x):
-        assert len(self.queue) == 0 or t > self.queue[-1][0]
-        self.queue.append((t, x))
-
-
-class HostReceiveNode(nengo.Node):
-    """For receiving chip->host messages"""
-
-    def __init__(self, dimensions):
-        self.queue = [(0, np.zeros(dimensions))]
-        self.queue_index = 0
-        super(HostReceiveNode, self).__init__(self.update,
-                                              size_in=0, size_out=dimensions)
-
-    def update(self, t):
-        while (len(self.queue) > self.queue_index + 1
-               and self.queue[self.queue_index][0] < t):
-            self.queue_index += 1
-        return self.queue[self.queue_index][1]
-
-    def receive(self, t, x):
-        self.queue.append((t, x))
-
-
-class ChipReceiveNode(nengo.Node):
-    """For receiving host->chip messages"""
-
-    def __init__(self, dimensions, size_out):
-        self.raw_dimensions = dimensions
-        self.cx_spike_input = loihi_cx.CxSpikeInput(
-            np.zeros((0, dimensions), dtype=bool))
-        self.last_time = None
-        super(ChipReceiveNode, self).__init__(self.update,
-                                              size_in=0, size_out=size_out)
-
-    def update(self, t):
-        raise SimulationError("ChipReceiveNodes should not be run")
-
-    def receive(self, t, x):
-        assert self.last_time is None or t > self.last_time
-        # TODO: make this stacking efficient
-        self.cx_spike_input.spikes = np.vstack([self.cx_spike_input.spikes,
-                                                [x > 0]])
-        self.last_time = t
-
-
-class ChipReceiveNeurons(ChipReceiveNode):
-    """Passes spikes directly (no on-off neuron encoding)"""
-    def __init__(self, dimensions, neuron_type=None):
-        self.neuron_type = neuron_type
-        super(ChipReceiveNeurons, self).__init__(dimensions, dimensions)
diff --git a/nengo_loihi/tests/test_loihi_cx.py b/nengo_loihi/tests/test_loihi_cx.py
index 014e917fa..bd2e14451 100644
--- a/nengo_loihi/tests/test_loihi_cx.py
+++ b/nengo_loihi/tests/test_loihi_cx.py
@@ -6,6 +6,7 @@
 from nengo_loihi.loihi_api import VTH_MAX
 from nengo_loihi.loihi_cx import (
     CxAxons, CxGroup, CxModel, CxProbe, CxSimulator, CxSpikeInput, CxSynapses)
+from nengo_loihi.loihi_interface import LoihiSimulator
 
 
 def test_simulator_noise(request, plt, seed):
@@ -29,12 +30,12 @@ def test_simulator_noise(request, plt, seed):
     model.discretize()
 
     if target == 'loihi':
-        with model.get_loihi(seed=seed) as sim:
+        with LoihiSimulator(model, use_snips=False, seed=seed) as sim:
             sim.run_steps(1000)
             y = np.column_stack([
                 p.timeSeries.data for p in sim.board.probe_map[probe]])
     else:
-        with model.get_simulator(seed=seed) as sim:
+        with CxSimulator(model, seed=seed) as sim:
             sim.run_steps(1000)
         y = sim.probe_outputs[probe]
 
@@ -131,7 +132,7 @@ def test_uv_overflow(n_axons, Simulator, plt, allclose):
     assert CxSimulator.strict  # Tests should be run in strict mode
     CxSimulator.strict = False
     try:
-        emu = model.get_simulator()
+        emu = CxSimulator(model)
         with pytest.warns(UserWarning):
             emu.run_steps(nt)
     finally:
@@ -141,7 +142,7 @@ def test_uv_overflow(n_axons, Simulator, plt, allclose):
     emu_v = np.array(emu.probe_outputs[probe_v])
     emu_s = np.array(emu.probe_outputs[probe_s])
 
-    with model.get_loihi() as sim:
+    with LoihiSimulator(model, use_snips=False) as sim:
         sim.run_steps(nt)
         sim_u = np.column_stack([
             p.timeSeries.data for p in sim.board.probe_map[probe_u]])
diff --git a/nengo_loihi/tests/test_simulator.py b/nengo_loihi/tests/test_simulator.py
index 9502d53f2..f3fb519b9 100644
--- a/nengo_loihi/tests/test_simulator.py
+++ b/nengo_loihi/tests/test_simulator.py
@@ -1,3 +1,5 @@
+import inspect
+
 import nengo
 import numpy as np
 import pytest
@@ -138,3 +140,121 @@ def test_close(Simulator, precompute):
 
     assert sim.closed
     assert all(s.closed for s in sim.sims.values())
+
+
+def test_all_run_steps(Simulator):
+    # Case 1. No objects on host, so no host and no host_pre
+    with nengo.Network() as net:
+        pre = nengo.Ensemble(10, 1)
+        post = nengo.Ensemble(10, 1)
+        nengo.Connection(pre, post)
+
+    # 1a. precompute=False, no host
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    # Since no objects on host, we should be precomputing even if we did not
+    # explicitly request precomputing
+    assert sim.precompute
+    assert inspect.ismethod(sim._run_steps)
+    assert sim._run_steps.__name__ == "run_steps"
+
+    # 1b. precompute=True, no host, no host_pre
+    with pytest.warns(UserWarning) as record:
+        with Simulator(net, precompute=True) as sim:
+            sim.run(0.001)
+    assert any("No precomputable objects" in r.message.args[0] for r in record)
+    assert inspect.ismethod(sim._run_steps)
+    assert sim._run_steps.__name__ == "run_steps"
+
+    # Case 2: Add a precomputable off-chip object, so we have either host or
+    # host_pre but not both host and host_pre
+    with net:
+        stim = nengo.Node(1)
+        stim_conn = nengo.Connection(stim, pre)
+
+    # 2a. precompute=False, host
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_bidirectional_with_host")
+
+    # 2b. precompute=True, no host, host_pre
+    with Simulator(net, precompute=True) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_precomputed_host_pre_only")
+
+    # Case 3: Add a non-precomputable off-chip object so we have host
+    # and host_pre
+    with net:
+        out = nengo.Node(size_in=1)
+        nengo.Connection(post, out)
+
+    # 3a. precompute=False, host (same as 2a)
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_bidirectional_with_host")
+
+    # 3b. precompute=True, host, host_pre
+    with Simulator(net, precompute=True) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_precomputed_host_pre_and_host")
+
+    # Case 4: Delete the precomputable off-chip object, so we have host only
+    net.nodes.remove(stim)
+    net.connections.remove(stim_conn)
+
+    # 4a. precompute=False, host (same as 2a and 3a)
+    with Simulator(net) as sim:
+        sim.run(0.001)
+    assert sim._run_steps.__name__.endswith("_bidirectional_with_host")
+
+    # 4b. precompute=True, host, no host_pre
+    with pytest.warns(UserWarning) as record:
+        with Simulator(net, precompute=True) as sim:
+            sim.run(0.001)
+    assert any("No precomputable objects" in r.message.args[0] for r in record)
+    assert sim._run_steps.__name__.endswith("_precomputed_host_only")
+
+
+def test_no_precomputable(Simulator):
+    with nengo.Network() as net:
+        active_ens = nengo.Ensemble(10, 1,
+                                    gain=np.ones(10) * 10,
+                                    bias=np.ones(10) * 10)
+        out = nengo.Node(size_in=10)
+        nengo.Connection(active_ens.neurons, out)
+        out_p = nengo.Probe(out)
+
+    with pytest.warns(UserWarning) as record:
+        with Simulator(net, precompute=True) as sim:
+            sim.run(0.01)
+
+    assert sim._run_steps.__name__.endswith("precomputed_host_only")
+    # Should warn that no objects are precomputable
+    assert any("No precomputable objects" in r.message.args[0] for r in record)
+    # But still mark the sim as precomputable for speed reasons, because
+    # there are no inputs that depend on outputs in this case
+    assert sim.precompute
+    assert sim.data[out_p].shape[0] == sim.trange().shape[0]
+    assert np.all(sim.data[out_p][-1] > 100)
+
+
+def test_all_onchip(Simulator):
+    with nengo.Network() as net:
+        active_ens = nengo.Ensemble(10, 1,
+                                    gain=np.ones(10) * 10,
+                                    bias=np.ones(10) * 10)
+        out = nengo.Ensemble(10, 1, gain=np.ones(10), bias=np.ones(10))
+        nengo.Connection(active_ens.neurons, out.neurons,
+                         transform=np.eye(10) * 10)
+        out_p = nengo.Probe(out.neurons)
+
+    with Simulator(net) as sim:
+        sim.run(0.01)
+
+    # Though we did not specify precompute, the model should be marked as
+    # precomputable because there are no off-chip objects
+    assert sim.precompute
+    assert inspect.ismethod(sim._run_steps)
+    assert sim._run_steps.__name__ == "run_steps"
+    assert sim.data[out_p].shape[0] == sim.trange().shape[0]
+    assert np.all(sim.data[out_p][-1] > 100)