Skip to content

Commit

Permalink
got various GPU bugs sorted -- only 1 real bug, in NewStateNeuron -- …
Browse files Browse the repository at this point in the history
…filed issue #234
  • Loading branch information
rcoreilly committed Jun 2, 2023
1 parent ed3de63 commit 1a14a31
Show file tree
Hide file tree
Showing 14 changed files with 157 additions and 31 deletions.
62 changes: 54 additions & 8 deletions axon/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ func (gp *GPU) Config(ctx *Context, net *Network) {
gp.Sys.NewComputePipelineEmbed("SynCa", content, "shaders/gpu_synca.spv")
gp.Sys.NewComputePipelineEmbed("CyclePost", content, "shaders/gpu_cyclepost.spv")

gp.Sys.NewComputePipelineEmbed("NewState", content, "shaders/gpu_newstate.spv")
gp.Sys.NewComputePipelineEmbed("NewStatePool", content, "shaders/gpu_newstate_pool.spv")
gp.Sys.NewComputePipelineEmbed("NewStateNeuron", content, "shaders/gpu_newstate_neuron.spv")
gp.Sys.NewComputePipelineEmbed("MinusPool", content, "shaders/gpu_minuspool.spv")
gp.Sys.NewComputePipelineEmbed("MinusNeuron", content, "shaders/gpu_minusneuron.spv")
gp.Sys.NewComputePipelineEmbed("PlusStart", content, "shaders/gpu_plusstart.spv")
Expand All @@ -216,6 +217,7 @@ func (gp *GPU) Config(ctx *Context, net *Network) {
gp.Sys.NewComputePipelineEmbed("ApplyExts", content, "shaders/gpu_applyext.spv")

gp.Sys.NewEvent("MemCopyTo")
gp.Sys.NewEvent("MemCopyTo2")
gp.Sys.NewEvent("MemCopyFm")
gp.Sys.NewEvent("CycleEnd")
gp.Sys.NewEvent("CycleInc")
Expand Down Expand Up @@ -460,6 +462,19 @@ func (gp *GPU) SyncStateToGPU() {
gp.SyncMemToGPU()
}

// SyncStateGBufToGPU copies LayVals, Pools, Neurons, GBuf state to GPU
// this is typically sufficient for most syncing --
// only missing the Synapses which must be copied separately.
// Calls SyncMemToGPU -- use when this is the only copy taking place.
func (gp *GPU) SyncStateGBufToGPU() {
if !gp.On {
return
}
gp.CopyStateToStaging()
gp.CopyGBufToStaging()
gp.SyncMemToGPU()
}

// SyncAllToGPU copies LayerVals, Pools, Neurons, Synapses to GPU.
// Calls SyncMemToGPU -- use when this is the only copy taking place.
func (gp *GPU) SyncAllToGPU() {
Expand Down Expand Up @@ -497,17 +512,23 @@ func (gp *GPU) SyncSynapsesToGPU() {
gp.SyncMemToGPU()
}

// SyncGBufToGPU copies the GBuf and GSyns memory to the GPU.
// This is a temporary measure to be replaced with a simple kernel to init gbuf,
// needed for InitActs.
func (gp *GPU) SyncGBufToGPU() {
// CopyGBufToStaging copies the GBuf and GSyns memory to staging.
func (gp *GPU) CopyGBufToStaging() {
if !gp.On {
return
}
_, gbv, _ := gp.Syns.ValByIdxTry("GBuf", 0)
gbv.CopyFromBytes(unsafe.Pointer(&gp.Net.PrjnGBuf[0]))
_, gsv, _ := gp.Syns.ValByIdxTry("GSyns", 0)
gsv.CopyFromBytes(unsafe.Pointer(&gp.Net.PrjnGSyns[0]))
}

// SyncGBufToGPU copies the GBuf and GSyns memory to the GPU.
func (gp *GPU) SyncGBufToGPU() {
if !gp.On {
return
}
gp.CopyGBufToStaging()
gp.SyncMemToGPU()
}

Expand Down Expand Up @@ -816,8 +837,8 @@ func (gp *GPU) RunApplyExtsCmd() vk.CommandBuffer {
glr := gp.SyncRegionStruct("Globals")
gp.StartRunCmd(cmd)
gp.Sys.ComputeCmdCopyToGPUCmd(cmd, exr, cxr, glr)
gp.Sys.ComputeSetEventCmd(cmd, "MemCopyTo")
gp.RunPipelineCmd(cmd, "ApplyExts", neurDataN, "MemCopyTo", "")
gp.Sys.ComputeSetEventCmd(cmd, "MemCopyTo2")
gp.RunPipelineCmd(cmd, "ApplyExts", neurDataN, "MemCopyTo2", "")
gp.Sys.ComputeCmdEndCmd(cmd)
return cmd
}
Expand Down Expand Up @@ -1005,8 +1026,33 @@ func (gp *GPU) RunCycleSeparateFuns() {
// ThetaCycle trial.
// The caller must check the On flag before running this, to use CPU vs. GPU
func (gp *GPU) RunNewState() {
// todo: we're not actually calling this now, due to bug in NewStateNeuron
cmd := gp.RunNewStateCmd()
gnm := "GPU:NewState"
gp.Net.FunTimerStart(gnm)
gp.Sys.ComputeSubmitWaitCmd(cmd)
gp.Net.FunTimerStop(gnm)
}

// RunNewStateCmd returns the commands to
// run the NewState shader to update variables
// at the start of a new trial.
func (gp *GPU) RunNewStateCmd() vk.CommandBuffer {
cnm := "RunNewState"
cmd, err := gp.Sys.CmdBuffByNameTry(cnm)
if err == nil {
return cmd
}
cmd = gp.Sys.NewCmdBuff(cnm)

neurDataN := int(gp.Net.NNeurons) * int(gp.Net.MaxData)
poolDataN := len(gp.Net.Pools)
gp.RunPipelineWait("NewState", poolDataN)

gp.StartRunCmd(cmd)
gp.RunPipelineCmd(cmd, "NewStatePool", poolDataN, "", "PoolGi")
gp.RunPipelineCmd(cmd, "NewStateNeuron", neurDataN, "PoolGi", "") // todo: this has NrnV read = 0 bug
gp.Sys.ComputeCmdEndCmd(cmd)
return cmd
}

// RunMinusPhase runs the MinusPhase shader to update snapshot variables
Expand Down
2 changes: 1 addition & 1 deletion axon/gpu_hlsl/gpu_cycleinc.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
// Set 2: main network structs and vals -- all are writable
[[vk::binding(0, 2)]] RWStructuredBuffer<Context> Ctx; // [0]

[numthreads(1, 1, 1)]
[numthreads(64, 1, 1)]
void main(uint3 idx : SV_DispatchThreadID) { // over Context
if(idx.x == 0) {
Ctx[0].CycleInc();
Expand Down
4 changes: 2 additions & 2 deletions axon/gpu_hlsl/gpu_cyclepost.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ void CyclePost(inout Context ctx, in LayerParams ly, int li, uint di) {
CyclePost2(ctx, ly, uint(li), di, LayVals[ly.Idxs.ValsIdx(di)], Pools[ly.Idxs.PoolIdx(0, di)]);
}

[numthreads(1, 1, 1)]
void main(uint3 idx : SV_DispatchThreadID) { // todo: iterate over global Data parallel
[numthreads(64, 1, 1)]
void main(uint3 idx : SV_DispatchThreadID) {
if (idx.x >= Ctx[0].NetIdxs.NData) {
return;
}
Expand Down
57 changes: 57 additions & 0 deletions axon/gpu_hlsl/gpu_newstate_neuron.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (c) 2022, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// does NewState Update on each Neuron
// note: anything *reading from neuron level must be called at neuron level!

#include "synmem.hlsl"

// note: all must be visible always because accessor methods refer to them
[[vk::binding(0, 1)]] StructuredBuffer<uint> NeuronIxs; // [Neurons][Idxs]
[[vk::binding(1, 1)]] StructuredBuffer<uint> SynapseIxs; // [Layer][SendPrjns][SendNeurons][Syns]
[[vk::binding(1, 2)]] RWStructuredBuffer<float> Neurons; // [Neurons][Vars][Data]
[[vk::binding(2, 2)]] RWStructuredBuffer<float> NeuronAvgs; // [Neurons][Vars]
[[vk::binding(5, 2)]] RWStructuredBuffer<float> Globals; // [NGlobals]
[[vk::binding(0, 3)]] RWStructuredBuffer<SynMemBlock> Synapses; // [Layer][SendPrjns][SendNeurons][Syns]
[[vk::binding(1, 3)]] RWStructuredBuffer<SynMemBlock> SynapseCas; // [Layer][SendPrjns][SendNeurons][Syns][Data]

#include "context.hlsl"
#include "layerparams.hlsl"

// note: binding is var, set

// Set 0: uniform layer params -- could not have prjns also be uniform..
[[vk::binding(0, 0)]] StructuredBuffer<LayerParams> Layers; // [Layer]

// Set 1: effectively uniform prjn params as structured buffers in storage

// Set 2: main network structs and vals -- all are writable
[[vk::binding(0, 2)]] StructuredBuffer<Context> Ctx; // [0]
[[vk::binding(3, 2)]] RWStructuredBuffer<Pool> Pools; // [Layer][Pools][Data]
[[vk::binding(4, 2)]] RWStructuredBuffer<LayerVals> LayVals; // [Layer][Data]


void NewStateNeuron2(in Context ctx, in LayerParams ly, uint ni, uint di) {
ly.NewStateNeuron(ctx, ni, di, LayVals[ly.Idxs.ValsIdx(di)]);
}

void NewStateNeuron(in Context ctx, uint ni, uint di) {
uint li = NrnI(ctx, ni, NrnLayIdx);
NewStateNeuron2(ctx, Layers[li], ni, di);
}

[numthreads(64, 1, 1)]
void main(uint3 idx : SV_DispatchThreadID) { // over Neurons * Data
uint ni = Ctx[0].NetIdxs.ItemIdx(idx.x);
if (!Ctx[0].NetIdxs.NeurIdxIsValid(ni)) {
return;
}
uint di = Ctx[0].NetIdxs.DataIdx(idx.x);
if (!Ctx[0].NetIdxs.DataIdxIsValid(di)) {
return;
}
NewStateNeuron(Ctx[0], ni, di);
}


Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,15 @@ void InitPrjnGBuffs(in Context ctx, in PrjnParams pj) {
}
}

void NewStateNeuron(in Context ctx, in LayerParams ly, uint ni, uint di, in LayerVals vals) {
ly.NewStateNeuron(ctx, ni, di, vals);
}

void NewState2(in Context ctx, in LayerParams ly, uint di, inout Pool pl, inout LayerVals vals) {
ly.NewStatePool(ctx, pl);
if (pl.IsLayPool == 0) {
return;
}
ly.NewStateLayer(ctx, pl, vals);
for (uint lni = pl.StIdx; lni < pl.EdIdx; lni++) {
NewStateNeuron(ctx, ly, lni + ly.Idxs.NeurSt, di, vals);
}
// if (ly.Act.Decay.Glong != 0) { // clear pipeline of incoming spikes, assuming time has passed
for (uint pi = 0; pi < ly.Idxs.RecvN; pi++) {
InitPrjnGBuffs(ctx, Prjns[ly.Idxs.RecvSt + pi]);
}
// }
}

void NewState(in Context ctx, uint di, inout Pool pl) {
Expand Down
13 changes: 13 additions & 0 deletions axon/layer_compute.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,19 @@ func (ly *Layer) NewState(ctx *Context) {
ly.InitPrjnGBuffs(ctx)
}

// NewStateNeurons only calls the neurons part of new state -- for misbehaving GPU
func (ly *Layer) NewStateNeurons(ctx *Context) {
nn := ly.NNeurons
for di := uint32(0); di < ctx.NetIdxs.NData; di++ {
vals := ly.LayerVals(di)
for lni := uint32(0); lni < nn; lni++ {
ni := ly.NeurStIdx + lni
// note: this calls the basic neuron-level DecayState
ly.Params.NewStateNeuron(ctx, ni, di, vals)
}
}
}

// DecayState decays activation state by given proportion
// (default decay values are ly.Params.Acts.Decay.Act, Glong)
func (ly *Layer) DecayState(ctx *Context, di uint32, decay, glong, ahp float32) {
Expand Down
11 changes: 7 additions & 4 deletions axon/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,19 @@ func (nt *Network) UpdateParams() {
// properly prior to calling this and subsequent Cycle methods.
func (nt *Network) NewState(ctx *Context) {
nt.NData = ctx.NetIdxs.NData
if nt.GPU.On {
nt.GPU.RunNewState()
return
}
// if nt.GPU.On { // todo: this has a bug in neuron-level access in updating SpkPrv
// nt.GPU.RunNewState()
// return
// }
for _, ly := range nt.Layers {
if ly.IsOff() {
continue
}
ly.NewState(ctx)
}
if nt.GPU.On {
nt.GPU.SyncStateGBufToGPU()
}
}

// Cycle runs one cycle of activation updating using threading methods.
Expand Down
Binary file modified axon/shaders/gpu_cycleinc.spv
Binary file not shown.
Binary file modified axon/shaders/gpu_cyclepost.spv
Binary file not shown.
Binary file removed axon/shaders/gpu_newstate.spv
Binary file not shown.
Binary file added axon/shaders/gpu_newstate_neuron.spv
Binary file not shown.
Binary file added axon/shaders/gpu_newstate_pool.spv
Binary file not shown.
2 changes: 1 addition & 1 deletion examples/boa/approach_env.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (ev *Approach) Defaults() {

// Config configures the world
func (ev *Approach) Config() {
// ev.Rand.NewRand(ev.RndSeed)
ev.Rand.NewRand(ev.RndSeed)
ev.CSTot = ev.NDrives * ev.CSPerDrive
ev.ActMap = make(map[string]int)
for i, act := range ev.Acts {
Expand Down
28 changes: 22 additions & 6 deletions examples/boa/boa.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ type SimParams struct {
// Defaults sets default params
func (ss *SimParams) Defaults() {
ss.NData = 1
ss.EnvSameSeed = false
ss.EnvSameSeed = false // set to true to test ndata
ss.PctCortexMax = 1.0
ss.PctCortexStEpc = 5
ss.PctCortexStEpc = 10
ss.PctCortexNEpc = 5
ss.PctCortexInterval = 1
ss.PCAInterval = 10
Expand Down Expand Up @@ -329,6 +329,7 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
return
}
net.Defaults()
net.SetNThreads(4)
ss.Params.SetObject("Network")
ss.InitWts(net)
}
Expand Down Expand Up @@ -480,6 +481,14 @@ func (ss *Sim) ConfigLoops() {
} else {
axon.LooperUpdtNetView(man, &ss.ViewUpdt, ss.Net)
axon.LooperUpdtPlots(man, &ss.GUI)
for _, m := range man.Stacks {
m.Loops[etime.Cycle].OnEnd.Prepend("GUI:CounterUpdt", func() {
ss.NetViewCounters()
})
m.Loops[etime.Trial].OnEnd.Prepend("GUI:CounterUpdt", func() {
ss.NetViewCounters()
})
}
}

if Debug {
Expand Down Expand Up @@ -693,10 +702,16 @@ func (ss *Sim) StatCounters(di int) {
ss.Stats.SetFloat32("CS", float32(ev.CS))
ss.Stats.SetFloat32("US", float32(ev.US))
ss.Stats.SetFloat32("HasRew", axon.GlbV(ctx, uint32(di), axon.GvHasRew))
ss.Stats.SetString("TrialName", "trl")
if di == 0 {
ss.ViewUpdt.Text = ss.Stats.Print([]string{"Run", "Epoch", "Trial", "Cycle", "NetAction", "Instinct", "ActAction", "ActMatch", "JustGated", "Should", "Rew"})
ss.Stats.SetString("TrialName", "trl") // todo: could have dist, US etc
}

func (ss *Sim) NetViewCounters() {
if ss.GUI.ViewUpdt.View == nil {
return
}
di := ss.GUI.ViewUpdt.View.Di
ss.StatCounters(di)
ss.ViewUpdt.Text = ss.Stats.Print([]string{"Run", "Epoch", "Trial", "Cycle", "NetAction", "Instinct", "ActAction", "ActMatch", "JustGated", "Should", "Rew"})
}

// TrialStats computes the trial-level statistics.
Expand Down Expand Up @@ -997,7 +1012,8 @@ func (ss *Sim) Log(mode etime.Modes, time etime.Times) {

switch {
case time == etime.Cycle:
row = ss.Stats.Int("Cycle")
return /// not doing cycle-level logging -- too slow for gpu in general
// row = ss.Stats.Int("Cycle")
case time == etime.Trial:
if mode == etime.Train {
trl := ss.Loops.GetLoop(mode, etime.Trial).Counter.Cur
Expand Down

0 comments on commit 1a14a31

Please sign in to comment.