Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.

fix memory share from system in graph stream analysis. #289

Open
wants to merge 17 commits into
base: developing
Choose a base branch
from
Open
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions framework/core/net/net.cpp
Original file line number Diff line number Diff line change
@@ -660,8 +660,7 @@ template<typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
Status Net<Ttype, Dtype, Ptype, RunType>::init_env(graph::Graph<Ttype, Dtype, Ptype>& graph) {
LOG(WARNING) << "Detect and initial " << graph.get_ins().size() << " lanes.";
// fixme, multi_stream error
//Env<Ttype>::env_init(graph.get_ins().size());
Env<Ttype>::env_init(1);
Env<Ttype>::env_init(graph.get_ins().size());
LOG(WARNING) << "Current used device id : " << TargetWrapper<Ttype>::get_device_id();
return Status::OK();
}
20 changes: 12 additions & 8 deletions framework/graph/graph.cpp
Original file line number Diff line number Diff line change
@@ -138,19 +138,23 @@ Status Graph<Ttype, Dtype, Ptype>::Optimize() EXCLUSIVE_LOCKS_REQUIRED(_mut) {
_nodes_exec_order = scheduler.get_exec_node_in_order();
#else // enable conv+eltwise fusion
// optimization
ConvElsFusionScheduler conv_eltwise_fusion_scheduler;
conv_eltwise_fusion_scheduler.RegIOResource(_vgraph);
conv_eltwise_fusion_scheduler.Run();
// get node exec in order
//_nodes_exec_order = conv_eltwise_fusion_scheduler.get_exec_node_in_order();
ConvElsFusionScheduler conv_eltwise_fusion_scheduler;
conv_eltwise_fusion_scheduler.RegIOResource(_vgraph);
conv_eltwise_fusion_scheduler.Run();
// get node exec in order
//_nodes_exec_order = conv_eltwise_fusion_scheduler.get_exec_node_in_order();
#endif
// optimization again
// optimization again
ParallScheduler para_scheduler;
para_scheduler.RegIOResource(_vgraph);
para_scheduler.Run();

MemoryScheduler mem_scheduler;
mem_scheduler.RegIOResource(_vgraph);
mem_scheduler.Run();
ParallScheduler para_scheduler;
/*ParallScheduler para_scheduler;
para_scheduler.RegIOResource(_vgraph);
para_scheduler.Run();
para_scheduler.Run();*/

// set info for graph
statistics.set_info<IS_OPTIMIZED>(true);
20 changes: 10 additions & 10 deletions framework/graph/graph_global_mem.h
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ class GraphGlobalMemBase {
}

/// get sum size in m-btyes
size_t get_sum_mbyte() EXCLUSIVE_LOCKS_REQUIRED(_mut) {
float get_sum_mbyte() EXCLUSIVE_LOCKS_REQUIRED(_mut) {
std::unique_lock<std::mutex> lock(this->_mut);
size_t sum = 0;
for (auto block_p : _int8_mem_pool) {
@@ -139,7 +139,7 @@ enum INFO{

template<INFO INFO_T>
struct Decide{
typedef int type;
typedef float type;
};

template<>
@@ -164,16 +164,16 @@ struct Statistics {
template<INFO INFO_T>
struct Info_to_type {};

inline void _set_info(int mem_in_mbytes, Info_to_type<TEMP_MEM>) {
inline void _set_info(float mem_in_mbytes, Info_to_type<TEMP_MEM>) {
temp_mem_used = mem_in_mbytes;
}
inline void _set_info(int mem_in_mbytes, Info_to_type<ORI_TEMP_MEM>) {
inline void _set_info(float mem_in_mbytes, Info_to_type<ORI_TEMP_MEM>) {
original_temp_mem_used = mem_in_mbytes;
}
inline void _set_info(int mem_in_mbytes, Info_to_type<MODEL_MEM>) {
inline void _set_info(float mem_in_mbytes, Info_to_type<MODEL_MEM>) {
model_mem_used = mem_in_mbytes;
}
inline void _set_info(int mem_in_mbytes, Info_to_type<SYSTEM_MEM>) {
inline void _set_info(float mem_in_mbytes, Info_to_type<SYSTEM_MEM>) {
system_mem_used = mem_in_mbytes;
}
inline void _set_info(bool whether_optimized, Info_to_type<IS_OPTIMIZED>) {
@@ -198,13 +198,13 @@ struct Statistics {

private:
///< temp_mem_used : temp memory used by anakin edge [MB].default 0
int temp_mem_used{0};
float temp_mem_used{0.f};
///< original_temp_mem_used : temp memory used by old version [MB].default 0
int original_temp_mem_used{0};
float original_temp_mem_used{0.f};
///< system_mem_used : system mem used by nvidia / amd GPU system resource [MB].default 0
int system_mem_used{0};
float system_mem_used{0.f};
///< model_mem_used : mem used by model.default 0
int model_mem_used{0};
float model_mem_used{0.f};

///< is_optimized stand for whether optimized flag.default false
bool is_optimized{false};
5 changes: 2 additions & 3 deletions framework/graph/llvm/optimizer/memory_scheduler.cpp
Original file line number Diff line number Diff line change
@@ -123,8 +123,8 @@ void IOBlockResource::free(std::vector<io>& io_vec, VGraph* vgraph_p) {

void IOBlockResource::lock(std::vector<io>& io_vec) {
for (auto& io_res : io_vec) {
if (has_free()) {
auto& tmp_io = _free.front(); // get active resouce
if (has_free(io_res)) {
auto tmp_io = get_free(io_res);//_free.front(); // get active resouce
io_res.shared = true;

if (tmp_io.shared) {
@@ -134,7 +134,6 @@ void IOBlockResource::lock(std::vector<io>& io_vec) {
}

_lock.push_back(io_res);
_free.pop_front();
} else { // alloc new io block
io_res.shared = false;
_lock.push_back(io_res);
23 changes: 22 additions & 1 deletion framework/graph/llvm/optimizer/memory_scheduler.h
Original file line number Diff line number Diff line change
@@ -83,7 +83,28 @@ class IOBlockResource {
~IOBlockResource() {}

void free(std::vector<io>&, VGraph*);
inline bool has_free() { return !(_free.empty()); }
inline bool has_free(io& target) {
for (auto it = _free.begin(); it != _free.end();) {
auto& io_tmp = *it;
if(target.lane == io_tmp.lane) {
return true;
}
++it;
}
return false;
}
inline io get_free(io& target) {
for (auto it = _free.begin(); it != _free.end();) {
auto& io_tmp = *it;
if(target.lane == io_tmp.lane) {
it = _free.erase(it);
return io_tmp;
} else {
++it;
}
}
return io();
}
bool is_same_target(io&, io&, VGraph*);
void push_free(io&, VGraph*);
void lock(std::vector<io>&);
8 changes: 6 additions & 2 deletions saber/core/context.h
Original file line number Diff line number Diff line change
@@ -43,14 +43,18 @@ class Context final{
_device_id = device_id;
}
if (data_stream_id >= devs[_device_id]._max_stream) {
LOG(WARNING) << "data stream index exceeds the maximum stream number, set to default stream(0)!";
LOG(WARNING) << "data stream index("<< data_stream_id
<< ") exceeds the maximum stream number("<< devs[_device_id]._max_stream
<< "), set to default stream(0)!";
data_stream_id = 0;
}
_stream_data = devs[_device_id]._data_stream[data_stream_id];
_data_stream_id = data_stream_id;

if (compute_stream_id >= devs[_device_id]._max_stream) {
LOG(WARNING) << "compute stream index exceeds the maximum stream number, set to default stream(0)!";
LOG(WARNING) << "compute stream index(" << compute_stream_id
<< ") exceeds the maximum stream number("<< devs[_device_id]._max_stream
<< "), set to default stream(0)!";
compute_stream_id = 0;
}
_stream_compute = devs[_device_id]._compute_stream[compute_stream_id];
1 change: 0 additions & 1 deletion test/framework/net/net_exec_test.cpp
Original file line number Diff line number Diff line change
@@ -265,7 +265,6 @@ TEST(NetTest, net_execute_reconstruction_test) {

int main(int argc, const char** argv){

Env<Target>::env_init();
// initial logger
logger::init(argv[0]);
InitTest();