Skip to content
This repository has been archived by the owner on Jan 3, 2023. It is now read-only.

Sarkars/flib ngenc compute #529

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ngraph_cluster_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ int NGraphClusterManager::NewCluster() {

GraphDef* NGraphClusterManager::GetClusterGraph(int idx) {
std::lock_guard<std::mutex> guard(s_cluster_graphs_mutex);
return s_cluster_graphs[idx];
return idx < s_cluster_graphs.size() ? s_cluster_graphs[idx] : nullptr;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need this change since now we might have nothing stored in NGraphClusterManager, so need to safely avoid accessing out-of-bounds requests

}

} // namespace ngraph_bridge
Expand Down
28 changes: 25 additions & 3 deletions src/ngraph_encapsulate_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <utility>

#include "tensorflow/core/common_runtime/dma_helper.h"
#include "tensorflow/core/common_runtime/function.h"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Included for FunctionBody

#include "tensorflow/core/common_runtime/optimization_registry.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/node_def_util.h"
Expand Down Expand Up @@ -93,9 +94,30 @@ class NGraphEncapsulateOp : public OpKernel {
OP_REQUIRES_OK(ctx, ctx->GetAttr<int>("ngraph_cluster", &m_ngraph_cluster));
graph_def = NGraphClusterManager::GetClusterGraph(m_ngraph_cluster);

GraphConstructorOptions opts;
opts.allow_internal_ops = true;
OP_REQUIRES_OK(ctx, ConvertGraphDefToGraph(opts, *graph_def, &m_graph));
if (graph_def == nullptr) {
sayantan-nervana marked this conversation as resolved.
Show resolved Hide resolved
// Read graphdef from function library
const FunctionLibraryDefinition flib =
*ctx->function_library()->GetFunctionLibraryDefinition();
const FunctionDef* fdef =
flib.Find("Enc_" + to_string(m_ngraph_cluster) + "_native_segment");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the node name (eg. ngraph_cluster_251) instead of this "Enc_" + to_string(m_ngraph_cluster)" ?

if (fdef == nullptr) {
OP_REQUIRES_OK(
ctx, errors::Internal(
"Did not find graphdef for encapsulate ", m_ngraph_cluster,
" in NGraphClusterManager or function library"));
}
// TODO: how to convert from functiondef to graphdef. Anything easier?
FunctionBody* fnbody;
const auto get_func_sig = [&flib](const string& op, const OpDef** sig) {
return flib.LookUpOpDef(op, sig);
};
FunctionDefToBodyHelper(*fdef, {}, &flib, get_func_sig, &fnbody);
CopyGraph(*fnbody->graph, &m_graph);
} else {
GraphConstructorOptions opts;
opts.allow_internal_ops = true;
OP_REQUIRES_OK(ctx, ConvertGraphDefToGraph(opts, *graph_def, &m_graph));
}
OP_REQUIRES_OK(ctx, ctx->GetAttr("ngraph_graph_id", &m_graph_id));
//
// Initialize the "m_input_is_static" vector as follows:
Expand Down
8 changes: 4 additions & 4 deletions test/python/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

class NgraphTest(object):

def with_ngraph(self, l, config=tf.ConfigProto()):
def with_ngraph(self, l, config=tf.ConfigProto(), graph=None):
if ngraph_bridge.is_grappler_enabled():
rewrite_options = rewriter_config_pb2.RewriterConfig(
meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.
Expand All @@ -50,7 +50,7 @@ def with_ngraph(self, l, config=tf.ConfigProto()):

os.environ['NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS'] = '1'
ngraph_bridge.enable()
with tf.Session(config=config) as sess:
with tf.Session(graph=graph, config=config) as sess:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this change? Just Curious.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when we write python code like a = tf.constant(...); b = tf.constant(...); c = a+b, this underlying graph is added to the default graph. When we read it from a pbtxt (like in this case), the graph is not added to default graph. So was passing the graph along to common.py to use during session construction.

But found a way to set the graph read from pbtxt as default graph (as_default)

retval = l(sess)

os.environ.pop('NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)
Expand All @@ -61,12 +61,12 @@ def with_ngraph(self, l, config=tf.ConfigProto()):

return retval

def without_ngraph(self, l, config=tf.ConfigProto()):
def without_ngraph(self, l, config=tf.ConfigProto(), graph=None):
ngraph_tf_disable_deassign_clusters = os.environ.pop(
'NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS', None)

ngraph_bridge.disable()
with tf.Session(config=config) as sess:
with tf.Session(graph=graph, config=config) as sess:
retval = l(sess)

if ngraph_tf_disable_deassign_clusters is not None:
Expand Down
284 changes: 284 additions & 0 deletions test/python/flib_graph_1.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
node {
name: "Placeholder_2"
op: "Placeholder"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
attr {
key: "shape"
value {
shape {
dim {
size: 2
}
dim {
size: 3
}
}
}
}
}
node {
name: "Placeholder_1"
op: "Placeholder"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
attr {
key: "shape"
value {
shape {
dim {
size: 2
}
dim {
size: 3
}
}
}
}
}
node {
name: "Placeholder"
op: "Placeholder"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
attr {
key: "shape"
value {
shape {
dim {
size: 2
}
dim {
size: 3
}
}
}
}
}
node {
name: "ngraph_cluster_0"
op: "NGraphEncapsulate"
input: "Placeholder"
input: "Placeholder_1"
input: "Placeholder_2"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "Targuments"
value {
list {
type: DT_FLOAT
type: DT_FLOAT
type: DT_FLOAT
}
}
}
attr {
key: "Tresults"
value {
list {
type: DT_FLOAT
type: DT_FLOAT
}
}
}
attr {
key: "_ngraph_backend"
value {
s: "CPU"
}
}
attr {
key: "ngraph_cluster"
value {
i: 0
}
}
attr {
key: "ngraph_graph_id"
value {
i: 0
}
}
}
node {
name: "add_1"
op: "IdentityN"
input: "ngraph_cluster_0"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "T"
value {
list {
type: DT_FLOAT
}
}
}
}
node {
name: "Sigmoid"
op: "IdentityN"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IdentityN due to Kanvi's change

input: "ngraph_cluster_0:1"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "T"
value {
list {
type: DT_FLOAT
}
}
}
}
library {
function {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The library of graphdefs

signature {
name: "Enc_0_native_segment"
input_arg {
name: "ngraph_input_0"
type: DT_FLOAT
}
input_arg {
name: "ngraph_input_1"
type: DT_FLOAT
}
input_arg {
name: "ngraph_input_2"
type: DT_FLOAT
}
output_arg {
name: "ngraph_output_0"
type: DT_FLOAT
}
output_arg {
name: "ngraph_output_1"
type: DT_FLOAT
}
}
node_def {
name: "add"
op: "Add"
input: "ngraph_input_0"
input: "ngraph_input_1"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
attr {
key: "_ngraph_backend"
value {
s: "CPU"
}
}
attr {
key: "_ngraph_cluster"
value {
i: 0
}
}
attr {
key: "_ngraph_marked_for_clustering"
value {
b: true
}
}
experimental_debug_info {
original_node_names: "add"
}
}
node_def {
name: "add_1_ngraph/_0"
op: "Add"
input: "add:z:0"
input: "ngraph_input_2"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
attr {
key: "_ngraph_backend"
value {
s: "CPU"
}
}
attr {
key: "_ngraph_cluster"
value {
i: 0
}
}
attr {
key: "_ngraph_marked_for_clustering"
value {
b: true
}
}
experimental_debug_info {
original_node_names: "add_1_ngraph/_0"
}
}
node_def {
name: "Sigmoid_ngraph/_1"
op: "Sigmoid"
input: "add_1_ngraph/_0:z:0"
device: "/job:localhost/replica:0/task:0/device:CPU:0"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
attr {
key: "_ngraph_backend"
value {
s: "CPU"
}
}
attr {
key: "_ngraph_cluster"
value {
i: 0
}
}
attr {
key: "_ngraph_marked_for_clustering"
value {
b: true
}
}
experimental_debug_info {
original_node_names: "Sigmoid_ngraph/_1"
}
}
ret {
key: "ngraph_output_0"
value: "add_1_ngraph/_0:z:0"
}
ret {
key: "ngraph_output_1"
value: "Sigmoid_ngraph/_1:y:0"
}
}
}
versions {
producer: 27
}
Loading