Skip to content

Commit

Permalink
Support tensor stride
Browse files Browse the repository at this point in the history
Summary:
X-link: facebookresearch/param#126

Support tensor stride for execution trace.

Test Plan: buck2 test mode/opt caffe2/test:test_profiler_cuda profiler.test_execution_trace.TestExecutionTrace

Differential Revision: D58900476
  • Loading branch information
shengfukevin authored and facebook-github-bot committed Jun 22, 2024
1 parent 5b14943 commit 7d121ef
Showing 1 changed file with 94 additions and 95 deletions.
189 changes: 94 additions & 95 deletions torch/csrc/profiler/standalone/execution_trace_observer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,71 +64,6 @@ std::string json_str_escape(const std::string& str);
constexpr size_t maxNumElements = 4096;
constexpr size_t maxStrLength = 8192;

inline std::string getValueType(
const c10::IValue& val,
const bool baseType = true,
const size_t maxArrayLen = maxNumElements) {
std::string type = val.tagKind();

if (val.isTensor()) {
// Add tensor element data type.
type += fmt::format("({})", std::string(val.toTensor().dtype().name()));
} else if (val.isTuple()) {
const auto& val_container = val.toTupleRef().elements();
std::vector<std::string> str_array;
for (const auto& t : val_container) {
str_array.emplace_back(getValueType(t, false));
}
type += vectorToString(str_array);
} else if (val.isList()) {
const auto& val_list = val.toList();
std::vector<std::string> str_array;
str_array.reserve(val_list.size());
for (const auto j : c10::irange(val_list.size())) {
str_array.push_back(getValueType(val_list.get(j), false));
if (j >= maxArrayLen) {
LOG(WARNING) << "list size=" << val_list.size()
<< " exceeded maxArrayLen=" << maxArrayLen;
break;
}
}
type += vectorToString(str_array);
}
return baseType ? fmt::format("\"{}\"", type) : type;
}

inline std::string getValueShape(
const c10::IValue& val,
const size_t maxArrayLen = maxNumElements) {
if (val.isTensor()) {
auto& tensor = val.toTensor();
if (tensor.defined() &&
!tensor.unsafeGetTensorImpl()->has_symbolic_sizes_strides()) {
return vectorToString(tensor.sizes().vec());
}
} else if (val.isTuple()) {
const auto& val_container = val.toTupleRef().elements();
std::vector<std::string> str_array;
for (const auto& t : val_container) {
str_array.push_back(getValueShape(t));
}
return vectorToString(str_array);
} else if (val.isList()) {
const auto& val_list = val.toList();
std::vector<std::string> str_array;
str_array.reserve(val_list.size());
for (const auto j : c10::irange(val_list.size())) {
str_array.push_back(getValueShape(val_list.get(j)));
if (j >= maxArrayLen) {
LOG(WARNING) << "list size=" << val_list.size()
<< " exceeded maxArrayLen=" << maxArrayLen;
break;
}
}
return vectorToString(str_array);
}
return "[]";
}

inline std::string getScalarValue(const c10::IValue& val) {
if (val.isDouble()) {
Expand Down Expand Up @@ -259,6 +194,7 @@ struct FunctionCallContext : public ObserverContext {
ExecutionTraceObserver::ID parent_id{uninitialized_id};
ExecutionTraceObserver::ID fw_parent_id{uninitialized_id};
std::vector<std::string> input_types;
std::vector<std::string> input_strides;
std::vector<std::string> input_shapes;
std::vector<std::string> input_values;
};
Expand Down Expand Up @@ -303,9 +239,11 @@ static void writeJsonNode(
const uint64_t fw_tid,
const std::string& inputs = "[]",
const std::string& input_shapes = "[]",
const std::string& input_strides = "[]",
const std::string& input_types = "[]",
const std::string& outputs = "[]",
const std::string& output_shapes = "[]",
const std::string& output_strides = "[]",
const std::string& output_types = "[]",
const std::string& operator_schema = "",
const std::string& kernel_backend = "",
Expand All @@ -315,8 +253,8 @@ static void writeJsonNode(
R"JSON(
{{
"id": {}, "name": "{}", "ctrl_deps": {},
"inputs": {{"values": {}, "shapes": {}, "types": {}}},
"outputs": {{"values": {}, "shapes": {}, "types": {}}},
"inputs": {{"values": {}, "shapes": {}, "types": {}, "strides": {}}},
"outputs": {{"values": {}, "shapes": {}, "types": {}, "strides": {}}},
"attrs": [{{"name": "rf_id", "type": "uint64", "value": {}}},{{"name": "fw_parent", "type": "uint64", "value": {}}},{{"name": "seq_id", "type": "int64", "value": {}}},{{"name": "scope", "type": "uint64", "value": {}}},{{"name": "tid", "type": "uint64", "value": {}}},{{"name": "fw_tid", "type": "uint64", "value": {}}},{{"name": "op_schema", "type": "string", "value": "{}"}},{{"name": "kernel_backend", "type": "string", "value": "{}"}},{{"name": "kernel_file", "type": "string", "value": "{}"}}{}]
}})JSON",
id,
Expand All @@ -325,9 +263,11 @@ static void writeJsonNode(
inputs,
input_shapes,
input_types,
input_strides,
outputs,
output_shapes,
output_types,
output_strides,
rf_id,
fw_parent,
seq_id,
Expand Down Expand Up @@ -366,7 +306,7 @@ static bool initExecutionTraceStart(ExecutionTraceObserver& ob) {

ob.out << fmt::format(
R"JSON({{
"schema": "1.1.0-chakra.0.0.4", "pid": {}, "time": "{}", "start_ts": {},
"schema": "1.1.1-chakra.0.0.4", "pid": {}, "time": "{}", "start_ts": {},
"nodes": [)JSON",
ob.pid,
ob.record_time,
Expand Down Expand Up @@ -417,69 +357,125 @@ inline ExecutionTraceObserver::ID getObjectID(
return iter->second;
}

inline std::string convertIValue(

inline std::tuple<std::string, std::string, std::string, std::string> convertIValue(
ExecutionTraceObserver& ob,
const c10::IValue& val,
const bool baseType = true,
const size_t maxArrayLen = maxNumElements) {
std::string type = val.tagKind();
if (val.isTensor()) {
const auto t = val.toTensor().unsafeGetTensorImpl();
ExecutionTraceObserver::ID tensor_id = getObjectID(ob, t);
std::string tensor_shape, tensor_stride, tensor_type, tensor_value;

const auto tensor = val.toTensor();
const auto tensor_impl = tensor.unsafeGetTensorImpl();
if (tensor.defined() &&
!tensor.unsafeGetTensorImpl()->has_symbolic_sizes_strides()) {
// tensor shape
tensor_shape = vectorToString(tensor.sizes().vec());
// tensor strides
tensor_stride = vectorToString(tensor.strides().vec());
}
else {
tensor_shape = "[]";
tensor_stride = "[]";
}
// tensor dtype
type = type + fmt::format("({})", std::string(tensor.dtype().name()));
tensor_type = baseType ? fmt::format("\"{}\"", type) : type;

ExecutionTraceObserver::ID tensor_id = getObjectID(ob, tensor_impl);
ExecutionTraceObserver::ID storage_id = 0;
size_t offset = 0;
size_t numel = 0;
size_t itemsize = 0;
std::string device_str = "";
// symbolic sizes/strides implies t->storage_offset() will fail
if (t->has_storage() && !t->has_symbolic_sizes_strides()) {
auto& t_storage = t->storage();
if (tensor_impl->has_storage() && !tensor_impl->has_symbolic_sizes_strides()) {
auto& t_storage = tensor_impl->storage();
storage_id = getObjectID(ob, t_storage.data());
offset = t->storage_offset();
numel = t->numel();
itemsize = t->itemsize();
device_str = t->device().str();
offset = tensor_impl->storage_offset();
numel = tensor_impl->numel();
itemsize = tensor_impl->itemsize();
device_str = tensor_impl->device().str();
}
return fmt::format(
tensor_value = fmt::format(
"[{},{},{},{},{},\"{}\"]",
tensor_id,
storage_id,
offset,
numel,
itemsize,
device_str);
return std::make_tuple(tensor_shape, tensor_stride, tensor_type, tensor_value);
} else if (val.isTuple()) {
std::vector<std::string> str_array;
const auto& val_tuple = val.toTupleRef().elements();
for (const auto j : c10::irange(val_tuple.size())) {
str_array.push_back(convertIValue(ob, val_tuple[j]));
size_t tuple_size =val_tuple.size();
std::vector<std::string> shape_array;
std::vector<std::string> stride_array;
std::vector<std::string> type_array;
std::vector<std::string> value_array;
for (const auto j : c10::irange(tuple_size)) {
auto tuple = convertIValue(ob, val_tuple[j], false);
shape_array.push_back(std::get<0>(tuple));
stride_array.push_back(std::get<1>(tuple));
type_array.push_back(std::get<2>(tuple));
value_array.push_back(std::get<3>(tuple));
}
return vectorToString(str_array);
type = type + vectorToString(type_array);
std::string tensor_type = baseType ? fmt::format("\"{}\"", type) : type;
return std::make_tuple(vectorToString(shape_array),
vectorToString(stride_array),
tensor_type,
vectorToString(value_array));
} else if (val.isList()) {
const auto& val_list = val.toList();
std::vector<std::string> str_array;
str_array.reserve(val_list.size());
for (const auto j : c10::irange(val_list.size())) {
str_array.push_back(convertIValue(ob, val_list.get(j)));
size_t list_size = val_list.size();
std::vector<std::string> shape_array;
std::vector<std::string> stride_array;
std::vector<std::string> type_array;
std::vector<std::string> value_array;
for (const auto j : c10::irange(list_size)) {
auto tuple = convertIValue(ob, val_list.get(j), false);
shape_array.push_back(std::get<0>(tuple));
stride_array.push_back(std::get<1>(tuple));
type_array.push_back(std::get<2>(tuple));
value_array.push_back(std::get<3>(tuple));
if (j >= maxArrayLen) {
LOG(WARNING) << "list size=" << val_list.size()
<< " exceeded maxArrayLen=" << maxArrayLen;
break;
}
}
return vectorToString(str_array);
type = type + vectorToString(type_array);
std::string tensor_type = baseType ? fmt::format("\"{}\"", type) : type;
return std::make_tuple(vectorToString(shape_array),
vectorToString(stride_array),
tensor_type,
vectorToString(value_array));
} else {
return getScalarValue(val);
std::string tensor_shape = "[]";
std::string tensor_stride = "[]";
std::string tensor_type = baseType ? fmt::format("\"{}\"", type) : type;
std::string tensor_value = getScalarValue(val);

return std::make_tuple(tensor_shape, tensor_stride, tensor_type, tensor_value);
}
}

inline void appendValueInfo(
ExecutionTraceObserver& ob,
const c10::IValue& val,
std::vector<std::string>& values,
std::vector<std::string>& shapes,
std::vector<std::string>& strides,
std::vector<std::string>& types,
std::vector<std::string>& shapes) {
values.push_back(convertIValue(ob, val));
types.push_back(getValueType(val));
shapes.push_back(getValueShape(val));
std::vector<std::string>& values) {
auto tuple = convertIValue(ob, val, true);

shapes.push_back(std::get<0>(tuple));
strides.push_back(std::get<1>(tuple));
types.push_back(std::get<2>(tuple));
values.push_back(std::get<3>(tuple));
}

inline void handleKernelBackendInfo(
Expand Down Expand Up @@ -617,7 +613,7 @@ static void recordOperatorStart(

for (const auto i : c10::irange(input_start, inputs.size())) {
appendValueInfo(
ob, inputs[i], fc.input_values, fc.input_types, fc.input_shapes);
ob, inputs[i], fc.input_shapes, fc.input_strides, fc.input_types, fc.input_values);
}

handleKernelBackendInfo(fc, fn);
Expand Down Expand Up @@ -711,6 +707,7 @@ static void onFunctionExit(const RecordFunction& fn, ObserverContext* ctx_ptr) {
size_t output_start = outputs.size() - num_outputs;

std::vector<std::string> output_types;
std::vector<std::string> output_strides;
std::vector<std::string> output_shapes;
std::vector<std::string> output_values;
try {
Expand All @@ -720,7 +717,7 @@ static void onFunctionExit(const RecordFunction& fn, ObserverContext* ctx_ptr) {
ob->op_stack[fn.threadId()].pop();
for (const auto i : c10::irange(output_start, outputs.size())) {
appendValueInfo(
*ob, outputs[i], output_values, output_types, output_shapes);
*ob, outputs[i], output_shapes, output_strides, output_types, output_values);
}

std::string op_schema_str{};
Expand All @@ -745,9 +742,11 @@ static void onFunctionExit(const RecordFunction& fn, ObserverContext* ctx_ptr) {
fn.forwardThreadId(),
vectorToString(fc.input_values),
vectorToString(fc.input_shapes),
vectorToString(fc.input_strides),
vectorToString(fc.input_types),
vectorToString(output_values),
vectorToString(output_shapes),
vectorToString(output_strides),
vectorToString(output_types),
op_schema_str,
fc.kernel_backend,
Expand Down

0 comments on commit 7d121ef

Please sign in to comment.