-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathCUDAAgent.h
411 lines (404 loc) · 22.7 KB
/
CUDAAgent.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
#ifndef INCLUDE_FLAMEGPU_SIMULATION_DETAIL_CUDAAGENT_H_
#define INCLUDE_FLAMEGPU_SIMULATION_DETAIL_CUDAAGENT_H_
#include <memory>
#include <map>
#include <utility>
#include <string>
#include <mutex>
#include <unordered_map>
#include <list>
// include sub classes
#include "flamegpu/detail/JitifyCache.h"
#include "flamegpu/simulation/detail/CUDAAgentStateList.h"
#include "flamegpu/model/AgentFunctionData.cuh"
#include "flamegpu/model/SubAgentData.h"
#include "flamegpu/runtime/detail/curve/curve_rtc.cuh"
#include "flamegpu/simulation/detail/AgentInterface.h"
#include "flamegpu/simulation/detail/EnvironmentManager.cuh"
namespace flamegpu {
class HostAPI;
struct VarOffsetStruct;
namespace detail {
class CUDAEnvironmentDirectedGraphBuffers;
class CUDAMacroEnvironment;
class CUDAScatter;
class CUDAFatAgent;
/**
* This is the regular CUDAAgent
* It provides access to the device buffers representing the states of a particular agent
* However it does not own these buffers, they are owned by it's parent CUDAFatAgent, as buffers are shared with all mapped agents too.
*/
class CUDAAgent : public AgentInterface {
#ifdef FLAMEGPU_VISUALISATION
friend struct visualiser::AgentVisData;
#endif // FLAMEGPU_VISUALISATION
// Requires access to internal maps holding curve
friend class CUDAEnvironmentDirectedGraphBuffers;
public:
/**
* map of agent function name to RTC function instance
*/
typedef std::map<const std::string, std::unique_ptr<jitify::experimental::KernelInstantiation>> CUDARTCFuncMap;
typedef std::map<const std::string, std::shared_ptr<detail::curve::CurveRTCHost>> CUDARTCHeaderMap;
/**
* Element type of CUDARTCFuncMap
*/
typedef std::pair<const std::string, std::unique_ptr<jitify::experimental::KernelInstantiation>> CUDARTCFuncMapPair;
/**
* Normal constructor
* @param description Agent description of the agent
* @param _cudaSimulation Parent CUDASimulation of the agent
*/
CUDAAgent(const AgentData& description, const CUDASimulation &_cudaSimulation);
/**
* Subagent form constructor, used when creating a CUDAAgent for a mapped agent within a submodel
* @param description Agent description of the agent
* @param _cudaSimulation Parent CUDASimulation of the agent
* @param master_agent The (parent) agent which this is agent is mapped to
* @param mapping Mapping definition for how this agent is connected with its master agent.
*/
CUDAAgent(
const AgentData &description,
const CUDASimulation &_cudaSimulation,
const std::unique_ptr<CUDAAgent> &master_agent,
const std::shared_ptr<SubAgentData> &mapping);
/**
* Uses the cuRVE runtime to map the variables used by the agent function to the cuRVE
* library so that can be accessed by name within a n agent function
* @param func The function.
* @param instance_id The CUDASimulation instance_id of the parent instance. This is added to the hash, to differentiate instances
* @note TODO: This could be improved by iterating the variable list within the state_list, rather than individually looking up vars (the two lists should have all the same vars)
* @note This should probably be addressed when curve is updated to not use individual memcpys
*/
void mapRuntimeVariables(const AgentFunctionData& func, unsigned int instance_id) const;
/**
* Copies population data from the provided host object
* To the device buffers held by this object (overwriting any existing agent data)
* Also updates population size, clears disabled agents
* @param population An AgentVector object with the same internal AgentData description, to provide the input data
* @param state_name The agent state to add the agents to
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
* @note Scatter is required for initialising submodel vars
*/
void setPopulationData(const AgentVector& population, const std::string &state_name, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Copies population data the device buffers held by this object
* To the hosts object (overwriting any existing agent data)
* @param population An AgentVector object with the same internal AgentData description, to receive the output data
* @param state_name The agent state to get the agents from
*/
void getPopulationData(AgentVector& population, const std::string& state_name) const;
/**
* Returns the number of alive and active agents in the named state
* @param state The state to return information about
*/
unsigned int getStateSize(const std::string &state) const override;
/**
* Returns the number of alive and active agents in the named state
* @param state The state to return information about
*/
unsigned int getStateAllocatedSize(const std::string &state) const;
/**
* Returns the Agent description which this CUDAAgent represents.
*/
CAgentDescription getAgentDescription() const override;
/**
* Returns the device pointer to the buffer for the associated state and variable
* @note This returns data_condition, such that the buffer does not include disabled agents
*/
void *getStateVariablePtr(const std::string &state_name, const std::string &variable_name) override;
/**
* Processes agent death, this call is forwarded to the fat agent
* All disabled agents are scattered to swap
* Only alive agents with deathflag are scattered
* @param func The agent function condition being processed
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
* @see CUDAFatAgent::processDeath(unsigned int, const std::string &, unsigned int)
*/
void processDeath(const AgentFunctionData& func, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Transitions all active agents from the source state to the destination state
* @param _src The source state
* @param _dest The destination state
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
* @see CUDAFatAgent::transitionState(unsigned int, const std::string &, const std::string &, unsigned int)
*/
void transitionState(const std::string &_src, const std::string &_dest, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Scatters agents based on their output of the agent function condition
* Agents which failed the condition are scattered to the front and marked as disabled
* Agents which pass the condition are scattered to after the disabled agents
* @param func The agent function being processed
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
* @see CUDAFatAgent::processFunctionCondition(unsigned int, unsigned int)
* @note Named state must not already contain disabled agents
* @note The disabled agents are re-enabled using clearFunctionCondition(const std::string &)
*/
void processFunctionCondition(const AgentFunctionData& func, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Scatters agents from the provided device buffer, this is used for host agent creation
* The device buffer must be packed according to the param offsets
* @param state_name The state agents are scattered into
* @param newSize The number of new agents
* @param d_inBuff The device buffer containing the new agents
* @param offsets This defines how the memory is laid out within d_inBuff
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
*/
void scatterHostCreation(const std::string &state_name, unsigned int newSize, char *const d_inBuff, const VarOffsetStruct &offsets, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Sorts all agent variables according to the positions stored inside Message Output scan buffer
* @param state_name The state agents are scattered into
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
* @see HostAgentAPI::sort(const std::string &, HostAgentAPI::Order, int, int)
*/
void scatterSort_async(const std::string &state_name, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Allocates a buffer for storing new agents into and
* uses the cuRVE runtime to map variables for use with an agent function that has device agent birth
* @param func_agent The Cuda agent which the agent function belongs to (required so that RTC function instances can be obtained)
* @param func The agent function being processed
* @param maxLen The maximum number of new agents (this will be the size of the agent state executing func)
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param instance_id The CUDASimulation instance_id of the parent instance. This is added to the hash, to differentiate instances
* @param stream The CUDA stream used to execute the initialisation of the new agent databuffers
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @note This method is async, the stream used it not synchronised
*/
void mapNewRuntimeVariables_async(const CUDAAgent& func_agent, const AgentFunctionData& func, unsigned int maxLen, detail::CUDAScatter &scatter, unsigned int instance_id, cudaStream_t stream, unsigned int streamId);
/**
* Releases the buffer that was storing new age data
* @param func The function.
*/
void releaseNewBuffer(const AgentFunctionData& func);
/**
* Scatters agents from the currently assigned device agent birth buffer (see member variable newBuffs)
* The device buffer must be packed in the same format as mapNewRuntimeVariables(const AgentFunctionData&, unsigned int, unsigned int)
* @param func The agent function being processed
* @param newSize The maximum number of new agents (this will be the size of the agent state executing func)
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
*/
void scatterNew(const AgentFunctionData& func, unsigned int newSize, detail::CUDAScatter &scatter, unsigned int streamId, cudaStream_t stream);
/**
* Reenables all disabled agents within the named state
* @param state The named state to enable all agents within
*/
void clearFunctionCondition(const std::string &state);
/**
* Instantiates a RTC Agent function (or agent function condition) from agent function data description containing the source.
*
* Uses Jitify to create an instantiation of the program. Any compilation errors in the user provided agent function will be reported here.
* @param func The Agent function data structure containing the src for the function
* @param env Object containing environment properties for the simulation instance
* @param macro_env Object containing environment macro properties for the simulation instance
* @param directed_graphs Map of directed graphs for the simulation instance
* @param function_condition If true then this function will instantiate a function condition rather than an agent function
* @throw exception::InvalidAgentFunc thrown if the user supplied agent function has compilation errors
*/
void addInstantitateRTCFunction(const AgentFunctionData& func, const std::shared_ptr<EnvironmentManager>& env, std::shared_ptr<const detail::CUDAMacroEnvironment> macro_env,
const std::unordered_map<std::string, std::shared_ptr<CUDAEnvironmentDirectedGraphBuffers>>& directed_graphs, bool function_condition = false);
/**
* Instantiates the curve instance for an (non-RTC) Agent function (or agent function condition) from agent function data description containing the source.
*
* This performs CUDA allocations, so must be performed after CUDA device selection/context creation
* @param func The Agent function data structure containing the details of the function
* @param env Object containing environment properties for the simulation instance
* @param macro_env Object containing environment macro properties for the simulation instance
* @param directed_graphs Map of directed graphs for the simulation instance
* @param function_condition If true then this function will instantiate a function condition rather than an agent function
*/
void addInstantitateFunction(const AgentFunctionData& func, const std::shared_ptr<EnvironmentManager>& env, std::shared_ptr<const detail::CUDAMacroEnvironment> macro_env,
const std::unordered_map<std::string, std::shared_ptr<CUDAEnvironmentDirectedGraphBuffers>>& directed_graphs, bool function_condition = false);
/**
* Returns the jitify kernel instantiation of the agent function.
* Will throw an exception::InvalidAgentFunc excpetion if the function name does not have a valid instantiation
* @param function_name the name of the RTC agent function or the agent function name suffixed with condition (if it is a function condition)
*/
const jitify::experimental::KernelInstantiation& getRTCInstantiation(const std::string &function_name) const;
detail::curve::CurveRTCHost &getRTCHeader(const std::string &function_name) const;
/**
* Returns the host interface for managing the curve instance for the named agent function
* @param function_name The name of the agent's agent function, to return the curve instance for
*/
detail::curve::HostCurve &getCurve(const std::string& function_name) const;
/**
* Returns the CUDARTCFuncMap
*/
const CUDARTCFuncMap& getRTCFunctions() const;
/**
* Resets the number of agents in any unmapped statelists to 0
* They count as unmapped if they are not mapped to a master state, sub mappings will be reset
* @param scatter Scatter instance and scan arrays to be used (CUDASimulation::singletons->scatter)
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
*/
void initUnmappedVars(CUDAScatter& scatter, unsigned int streamId, cudaStream_t stream);
/**
* Initialises any agent variables within the CUDAFatAgentStateList of state which are not present in the agent-state's CUDAAgentStateList
* @param state Affected state
* @param count Number of variables to init
* @param offset Offset into the buffer of agents to init
* @param scatter Scatter instance and scan arrays to be used
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
* @param stream CUDA stream to be used for async CUDA operations
*/
void initExcludedVars(const std::string& state, unsigned int count, unsigned int offset, CUDAScatter& scatter, unsigned int streamId, cudaStream_t stream);
/**
* Resets the number of agents in every statelist to 0
*/
void cullAllStates();
/**
* Resets the number of agents in any unmapped statelists to 0
* They count as unmapped if they are not mapped to a master state, sub mappings will be reset
*/
void cullUnmappedStates();
/**
* Resize the state underlying buffers
* This is triggered on the CUDAFatAgentStateList, so unmapped agent variables are also resized
* @param state The minimum number of agents that must be representable
* @param minSize The minimum number of agents that must be representable
* @param retainData If true existing buffer data is retained
* @param stream The stream to be used for memcpys if data is retained
*/
void resizeState(const std::string &state, unsigned int minSize, bool retainData, cudaStream_t stream);
/**
* Updates the number of alive agents, does not affect disabled agents or change agent data
* @param state The state to affect
* @param newSize Number of active agents
* @throw exception::InvalidMemoryCapacity If the new number of disabled + active agents would exceed currently allocated buffer capacity
*/
void setStateAgentCount(const std::string& state, unsigned int newSize);
/**
* Returns a list of variable buffers attached to bound agents, not available in this agent
* @param state The state affected state
* @note This access is only intended for DeviceAgentVector's correctly handling of subagents
*/
std::list<std::shared_ptr<VariableBuffer>> getUnboundVariableBuffers(const std::string& state);
/**
* Returns the next free agent id, and increments the ID tracker by the specified count
* @param count Number that will be added to the return value on next call to this function
* @return An ID that can be assigned to an agent that wil be stored within this CUDAAgent's CUDAFatAgent
*/
id_t nextID(unsigned int count = 1) override;
/**
* Returns a device pointer to the value returns by nextID(0)
* If the device value is changed, then the internal ID counter must be updated via CUDAAgent::scatterNew()
*/
id_t* getDeviceNextID();
/**
* Assigns IDs to any agents who's ID has the value ID_NOT_SET
* @param hostapi HostAPI object, this is used to provide cub temp storage
* @param scatter Scatter instance and scan arrays to be used
* @param stream The CUDAStream to use for CUDA operations
* @param streamId The stream index to use for accessing stream specific resources such as scan compaction arrays and buffers
*/
void assignIDs(HostAPI &hostapi, CUDAScatter& scatter, cudaStream_t stream, unsigned int streamId);
/**
* Used to allow HostAgentAPI to store a persistent DeviceAgentVector
* @param state_name Agent state to affect
* @param d_vec The DeviceAgentVector to be stored
*/
void setPopulationVec(const std::string& state_name, const std::shared_ptr<DeviceAgentVector_impl>& d_vec) override;
/**
* Used to allow HostAgentAPI to retrieve a stored DeviceAgentVector
* @param state_name Agent state to affect
*/
std::shared_ptr<DeviceAgentVector_impl> getPopulationVec(const std::string& state_name) override;
/**
* Used to allow HostAgentAPI to clear the stored DeviceAgentVector
* Any changes will be synchronised first
*/
void resetPopulationVecs() override;
private:
/**
* Validates all IDs for contained agents, if any share an ID (which is not ID_NOT_SET) an exception is thrown
* @throws exception::AgentIDCollision If the contained agent populations contain multiple agents with the same ID
*/
void validateIDCollisions(cudaStream_t stream) const;
/**
* Sums the size required for all variables
*/
static size_t calcTotalVarSize(const AgentData &agent) {
size_t rtn = 0;
for (const auto &v : agent.variables) {
rtn += v.second.type_size * v.second.elements;
}
return rtn;
}
/**
* The fat index of this agent within its CUDAFatAgent
*/
unsigned int getFatIndex() const { return fat_index; }
/**
* Returns this CUDAAgent's fat agent
*/
std::shared_ptr<CUDAFatAgent> getFatAgent() { return fat_agent; }
/**
* The agent description of this agent
*/
const AgentData &agent_description;
/**
* Map of all states held by this agent
*/
std::unordered_map<std::string, std::shared_ptr<CUDAAgentStateList>> state_map;
/**
* The 'fat agent' representation of this agent
* This acts as a CUDAAgent with access to all variables for all mapped agent-states
*/
std::shared_ptr<CUDAFatAgent> fat_agent;
/**
* Multiple mapped agents may share names/definitions, so instead they are assigned indices
*/
const unsigned int fat_index;
/**
* The parent model
*/
const CUDASimulation &cudaSimulation;
/**
* map between function_name (or function_name_condition) and the jitify instance
*/
CUDARTCFuncMap rtc_func_map;
/**
* map between function name (or function_name_condition) and the rtc header
* This allows access to the header data cache, for updating curve
*/
CUDARTCHeaderMap rtc_header_map;
/**
* map between function name (or function_name_condition) and the curve interface
* This allows access for updating curve
*/
std::unordered_map<std::string, std::shared_ptr<detail::curve::HostCurve>> curve_map;
/**
* Used when allocated new buffers
*/
const size_t TOTAL_AGENT_VARIABLE_SIZE;
/**
* Holds currently held new buffs set by mapNewRuntimeVariables, cleared by unmapNewRuntimeVariables
* key: initial state name, val: allocated buffer
*/
std::unordered_map<std::string, void*> newBuffs;
/**
* Mutex for writing to newBuffs
*/
std::mutex newBuffsMutex;
/**
* Nullptr until getPopulationData() is called, after which it holds the return value
*/
std::map<std::string, std::shared_ptr<DeviceAgentVector_impl>> population_dvec;
};
} // namespace detail
} // namespace flamegpu
#endif // INCLUDE_FLAMEGPU_SIMULATION_DETAIL_CUDAAGENT_H_