From 2c2fbc696c8d8ebd155221b3a951775431d9afed Mon Sep 17 00:00:00 2001 From: prometheus <57171759+NLGithubWP@users.noreply.github.com> Date: Sat, 27 Apr 2024 19:22:49 +0800 Subject: [PATCH 1/8] Add benchmark implementations for the cnn ms example --- examples/cnn_ms/benchmark.py | 121 +++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 examples/cnn_ms/benchmark.py diff --git a/examples/cnn_ms/benchmark.py b/examples/cnn_ms/benchmark.py new file mode 100644 index 000000000..9f69feee0 --- /dev/null +++ b/examples/cnn_ms/benchmark.py @@ -0,0 +1,121 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# the code is modified from +# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py + +from singa import opt +from singa import device +from singa import tensor + +import argparse +import time +import numpy as np +from tqdm import trange + + +def train_resnet(DIST=True, graph=True, sequential=False, verbosity=0): + + # Define the hypermeters for the train_resnet + niters = 100 + batch_size = 32 + sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5) + + IMG_SIZE = 224 + + # For distributed training, sequential has better throughput in the current version + if DIST == True: + sgd = opt.DistOpt(sgd) + world_size = sgd.world_size + local_rank = sgd.local_rank + global_rank = sgd.global_rank + sequential = True + else: + local_rank = 0 + world_size = 1 + global_rank = 0 + sequential = False + + dev = device.create_cuda_gpu_on(local_rank) + + tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev) + ty = tensor.Tensor((batch_size,), dev, tensor.int32) + x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32) + y = np.random.randint(0, 1000, batch_size, dtype=np.int32) + tx.copy_from_numpy(x) + ty.copy_from_numpy(y) + + dev.SetVerbosity(verbosity) + dev.SetSkipIteration(5) + + # Construct the model + from model import resnet + model = resnet.resnet50(num_channels=3, num_classes=1000) + + model.train() + model.set_optimizer(sgd) + model.compile([tx], is_train=True, use_graph=graph, sequential=sequential) + + # Train model + dev.Sync() + start = time.time() + with trange(niters) as t: + for _ in t: + model(tx, ty, dist_option='fp32', spars=None) + + dev.Sync() + end = time.time() + titer = (end - start) / float(niters) + throughput = float(niters * batch_size * world_size) / (end - start) + if global_rank == 0: + print("\nThroughput = {} per second".format(throughput), flush=True) + print("TotalTime={}".format(end - start), flush=True) + print("Total={}".format(titer), flush=True) + dev.PrintTimeProfiling() + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description='Throughput test using Resnet 50') + parser.add_argument('--dist', + '--enable-dist', + default='False', + action='store_true', + help='enable distributed training', + dest='DIST') + parser.add_argument('--no-graph', + '--disable-graph', + default='True', + action='store_false', + help='disable graph', + dest='graph') + parser.add_argument('--verbosity', + '--log-verbosity', + default=0, + type=int, + help='logging verbosity', + dest='verbosity') + + args = parser.parse_args() + + train_resnet(DIST=args.DIST, + graph=args.graph, + sequential=False, + verbosity=args.verbosity) From d3cccfec28077d6dd9438d16b2090da171dd81bd Mon Sep 17 00:00:00 2001 From: Zhu Lei Date: Sat, 27 Apr 2024 21:18:14 +0800 Subject: [PATCH 2/8] Add the implementation for the model class --- examples/cnn_ms/pkg_model_code/model.py | 357 ++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 examples/cnn_ms/pkg_model_code/model.py diff --git a/examples/cnn_ms/pkg_model_code/model.py b/examples/cnn_ms/pkg_model_code/model.py new file mode 100644 index 000000000..3fea9143f --- /dev/null +++ b/examples/cnn_ms/pkg_model_code/model.py @@ -0,0 +1,357 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +''' +This script includes Model class for python users +to use Computational Graph in their model. +''' + +import os +import gc +import time +import json +import zipfile +import numpy as np +from functools import wraps +from collections import Iterable + +from singa import tensor +from singa import autograd +from singa import layer +from .tensor import Tensor +from . import singa_wrap as singa + + +class ModelMeta(layer.LayerMeta): + + def buffer_operation(func): + + def remove_creator(tensors): + if not tensors: + return + + if isinstance(tensors, Iterable): + if isinstance(tensors, str): + return + else: + for item in tensors: + if isinstance(item, Iterable): + remove_creator(item) + elif isinstance(item, tensor.Tensor): + item.creator = None + elif isinstance(tensors, tensor.Tensor): + tensors.creator = None + + @wraps(func) + def wrapper(self, *args, **kwargs): + if self.graph_mode and self.training: + if len(args) == 0: + raise ValueError('expect at least one input tensor') + + if isinstance(args[0], list): + assert isinstance( + args[0][0], + Tensor), ('function expects PlaceHolders or Tensors') + dev = args[0][0].device + else: + assert isinstance( + args[0], + Tensor), ('function expects PlaceHolders or Tensors') + dev = args[0].device + + if not self._buffered: + # buffer operations + dev.EnableGraph(True) + self._results = func(self, *args, **kwargs) + dev.Sync() + dev.EnableGraph(False) + self._buffered = True + + # deconstruct Operations before running the entire graph + remove_creator(self._results) + + # make sure all Operations are deallocated + gc.collect() + + # run graph + dev.RunGraph(self.sequential) + return self._results + else: + return func(self, *args, **kwargs) + + return wrapper + + def __new__(cls, name, bases, attr): + if 'train_one_batch' in attr: + attr['train_one_batch'] = ModelMeta.buffer_operation( + attr['train_one_batch']) + + return super(ModelMeta, cls).__new__(cls, name, bases, attr) + + +class Model(layer.Layer, metaclass=ModelMeta): + """ Base class for your neural network models. + + Example usage:: + + import numpy as np + from singa import opt + from singa import tensor + from singa import device + from singa import autograd + from singa import layer + from singa import model + + class MyModel(model.Model): + def __init__(self): + super(MyModel, self).__init__() + + self.softmax_cross_entropy = layer.SoftMaxCrossEntropy() + self.conv1 = layer.Conv2d(1, 20, 5, padding=0) + self.conv2 = layer.Conv2d(20, 50, 5, padding=0) + self.sgd = opt.SGD(lr=0.01) + + def forward(self, x): + y = self.conv1(x) + y = self.conv2(y) + return y + + def train_one_batch(self, x, y): + out = self.forward(x) + loss = self.softmax_cross_entropy(out, y) + self.sgd(loss) + return out, loss + + """ + + # save load states constant + TENSOR_DICT_FILENAME = '/tensor_dict.npz' + STATES_ATTR_FILENAME = '/states_attr.json' + MODEL_STATE_TYPE = 0 + AUX_STATE_TYPE = 1 + + def __init__(self): + """ + Initializes internal Model state + """ + super(Model, self).__init__() + + self.training = True + self.graph_mode = True + self.sequential = False + self._buffered = False + self._results = None + + def compile(self, inputs, is_train=True, use_graph=False, sequential=False): + """ Compile and initialize the model + + This function will automatically derive the shape of parameters + in each sublayer based on the shape of input placeholders. It will + also do some settings. + + Args: + inputs(list): the list of input tensors(placeholders) + is_train(bool): when is_trainis True, this model will enter + training mode, otherwise it will enter the evaluation mode + use_graph(bool): when use_graph is True, computational graph + will be used to train this model + sequential(bool): when sequential is True, model will execute ops + in the graph follow the order of joining the graph + """ + assert len(inputs) > 0 and isinstance(inputs[0], Tensor), ( + 'compile function expects PlaceHolders or Tensors') + + dev = inputs[0].device + dev.EnableGraph(True) + self.forward(*inputs) + dev.EnableGraph(False) + dev.ResetGraph() + + autograd.training = is_train + self.training = is_train + self.graph_mode = use_graph + self.sequential = sequential + + def forward(self, *input): + """Defines the computation performed in every forward propagation. + + Should be overridden by all subclasses. + + Args: + *input: the input training data for the model + + Returns: + out: the outputs of the forward propagation. + """ + raise NotImplementedError + + def train_one_batch(self, *input, **kwargs): + """Defines the computation performed in every training iteration + + Should be overridden by all subclasses. + + Args: + *input: the arguments of train_one_batch + **kwargs: the keyword arguments of train_one_batch + """ + raise NotImplementedError + + def train(self, mode=True): + """Set the model in evaluation mode. + + Args: + mode(bool): when mode is True, this model will enter training mode + """ + self.training = mode + autograd.training = mode + + def eval(self): + """Sets the model in evaluation mode. + """ + self.train(mode=False) + + def graph(self, mode=True, sequential=False): + """ Turn on the computational graph. Specify execution mode. + + Args: + mode(bool): when mode is True, model will use computational graph + sequential(bool): when sequential is True, model will execute ops + in the graph follow the order of joining the graph + """ + self.graph_mode = mode + self.sequential = sequential + + def __get_name__(self): + return self.__class__.__name__ + + def __call__(self, *input, **kwargs): + if self.training: + return self.train_one_batch(*input, **kwargs) + else: + return self.forward(*input, **kwargs) + + def save_states(self, fpath, aux_states={}): + """Save states. + + Args: + fpath: output file path (without the extension) + aux_states(dict): values are standard data types or Tensor, + e.g., epoch ID, learning rate, optimizer states + """ + assert not os.path.isfile(fpath), ( + "Failed to save states, %s is already existed." % fpath) + + states = self.get_states() + + # save states data and attr + tensor_dict = {} + states_attr = {} + for k, v in states.items(): + assert isinstance(v, tensor.Tensor), "Only tensor state is allowed" + tensor_dict[k] = tensor.to_numpy(v) + states_attr[k] = { + 'state_type': self.MODEL_STATE_TYPE, + 'shape': v.shape, + 'dtype': v.dtype + } + + for k, v in aux_states.items(): + assert isinstance(v, + tensor.Tensor), "Only tensor aux state is allowed" + tensor_dict[k] = tensor.to_numpy(v) + states_attr[k] = { + 'state_type': self.AUX_STATE_TYPE, + 'shape': v.shape, + 'dtype': v.dtype + } + + # save to files + timestamp = time.time() + tmp_dir = '/tmp/singa_save_states_%s' % timestamp + os.mkdir(tmp_dir) + tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME + states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME + + np.savez(tensor_dict_fp, **tensor_dict) + + with open(states_attr_fp, 'w') as fp: + json.dump(states_attr, fp) + + compression = zipfile.ZIP_DEFLATED + with zipfile.ZipFile(fpath, mode="w") as zf: + zf.write(tensor_dict_fp, + os.path.basename(tensor_dict_fp), + compress_type=compression) + zf.write(states_attr_fp, + os.path.basename(states_attr_fp), + compress_type=compression) + + # clean up tmp files + os.remove(tensor_dict_fp) + os.remove(states_attr_fp) + os.rmdir(tmp_dir) + + def load_states(self, fpath): + """Load the model states and auxiliary states from disk. + + Usage: + m = MyModel() + m.compile(...) + aux_states = m.load_states('mymodel.zip') + + Args: + path: input file path (without the extension) + Returns: + dict + """ + + assert os.path.isfile(fpath), ( + "Failed to load states, %s is not exist." % fpath) + + timestamp = time.time() + tmp_dir = '/tmp/singa_load_states_%s' % timestamp + os.mkdir(tmp_dir) + + with zipfile.ZipFile(fpath, 'r') as zf: + zf.extractall(tmp_dir) + + tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME + states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME + + with open(states_attr_fp) as f: + states_attr = json.load(f) + + tensor_dict = np.load(tensor_dict_fp) + + # restore singa tensor from numpy + model_states = dict() + aux_states = dict() + + for k in tensor_dict.files: + if states_attr[k]['state_type'] == self.MODEL_STATE_TYPE: + model_states[k] = tensor.from_numpy(tensor_dict[k]) + elif states_attr[k]['state_type'] == self.AUX_STATE_TYPE: + aux_states[k] = tensor.from_numpy(tensor_dict[k]) + + # restore model_states + self.set_states(model_states) + + # clean up tmp files + os.remove(tensor_dict_fp) + os.remove(states_attr_fp) + os.rmdir(tmp_dir) + return aux_states \ No newline at end of file From 50792d7b58ed6e01f822a608aa3595d0caf5f165 Mon Sep 17 00:00:00 2001 From: GY-GitCode Date: Sat, 4 May 2024 05:34:24 -0700 Subject: [PATCH 3/8] Add implementations for the new loss function --- examples/ms_model_mlp/model.py | 216 +++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 examples/ms_model_mlp/model.py diff --git a/examples/ms_model_mlp/model.py b/examples/ms_model_mlp/model.py new file mode 100644 index 000000000..b3fe116f7 --- /dev/null +++ b/examples/ms_model_mlp/model.py @@ -0,0 +1,216 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from singa import layer +from singa import model +from singa import tensor +from singa import opt +from singa import device +from singa.autograd import Operator +from singa.layer import Layer +from singa import singa_wrap as singa +import argparse +import numpy as np + +np_dtype = {"float16": np.float16, "float32": np.float32} + +singa_dtype = {"float16": tensor.float16, "float32": tensor.float32} + +#### self-defined loss begin + +### from autograd.py +class SumError(Operator): + + def __init__(self): + super(SumError, self).__init__() + # self.t = t.data + + def forward(self, x): + # self.err = singa.__sub__(x, self.t) + self.data_x = x + # sqr = singa.Square(self.err) + # loss = singa.SumAll(sqr) + loss = singa.SumAll(x) + # self.n = 1 + # for s in x.shape(): + # self.n *= s + # loss /= self.n + return loss + + def backward(self, dy=1.0): + # dx = self.err + dev = device.get_default_device() + dx = tensor.Tensor(self.data_x.shape, dev, singa_dtype['float32']) + dx.copy_from_numpy(np.ones(self.data_x.shape)) + # dx *= float(2 / self.n) + dx *= dy + return dx + +def se_loss(x): + # assert x.shape == t.shape, "input and target shape different: %s, %s" % ( + # x.shape, t.shape) + return SumError()(x)[0] + +### from layer.py +class SumErrorLayer(Layer): + """ + Generate a MeanSquareError operator + """ + + def __init__(self): + super(SumErrorLayer, self).__init__() + + def forward(self, x): + return se_loss(x) + +#### self-defined loss end + +class MSMLP(model.Model): + + def __init__(self, data_size=10, perceptron_size=100, num_classes=10): + super(MSMLP, self).__init__() + self.num_classes = num_classes + self.dimension = 2 + + self.relu = layer.ReLU() + self.linear1 = layer.Linear(perceptron_size) + self.linear2 = layer.Linear(num_classes) + self.softmax_cross_entropy = layer.SoftMaxCrossEntropy() + self.sum_error = SumErrorLayer() + + def forward(self, inputs): + y = self.linear1(inputs) + y = self.relu(y) + y = self.linear2(y) + return y + + def train_one_batch(self, x, y, dist_option, spars, synflow_flag): + # print ("in train_one_batch") + out = self.forward(x) + # print ("train_one_batch x.data: \n", x.data) + # print ("train_one_batch y.data: \n", y.data) + # print ("train_one_batch out.data: \n", out.data) + if synflow_flag: + # print ("sum_error") + loss = self.sum_error(out) + else: # normal training + # print ("softmax_cross_entropy") + loss = self.softmax_cross_entropy(out, y) + # print ("train_one_batch loss.data: \n", loss.data) + + if dist_option == 'plain': + # print ("before pn_p_g_list = self.optimizer(loss)") + pn_p_g_list = self.optimizer(loss) + # print ("after pn_p_g_list = self.optimizer(loss)") + elif dist_option == 'half': + self.optimizer.backward_and_update_half(loss) + elif dist_option == 'partialUpdate': + self.optimizer.backward_and_partial_update(loss) + elif dist_option == 'sparseTopK': + self.optimizer.backward_and_sparse_update(loss, + topK=True, + spars=spars) + elif dist_option == 'sparseThreshold': + self.optimizer.backward_and_sparse_update(loss, + topK=False, + spars=spars) + # print ("len(pn_p_g_list): \n", len(pn_p_g_list)) + # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0])) + # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0]) + # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data) + # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data) + return pn_p_g_list, out, loss + # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss + + def set_optimizer(self, optimizer): + self.optimizer = optimizer + + +def create_model(pretrained=False, **kwargs): + """Constructs a CNN model. + Args: + pretrained (bool): If True, returns a pre-trained model. + + Returns: + The created CNN model. + """ + model = MSMLP(**kwargs) + + return model + + +__all__ = ['MLP', 'create_model'] + +if __name__ == "__main__": + np.random.seed(0) + + parser = argparse.ArgumentParser() + parser.add_argument('-p', + choices=['float32', 'float16'], + default='float32', + dest='precision') + parser.add_argument('-g', + '--disable-graph', + default='True', + action='store_false', + help='disable graph', + dest='graph') + parser.add_argument('-m', + '--max-epoch', + default=1001, + type=int, + help='maximum epochs', + dest='max_epoch') + args = parser.parse_args() + + # generate the boundary + f = lambda x: (5 * x + 1) + bd_x = np.linspace(-1.0, 1, 200) + bd_y = f(bd_x) + + # generate the training data + x = np.random.uniform(-1, 1, 400) + y = f(x) + 2 * np.random.randn(len(x)) + + # choose one precision + precision = singa_dtype[args.precision] + np_precision = np_dtype[args.precision] + + # convert training data to 2d space + label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32) + data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision) + + dev = device.create_cuda_gpu_on(0) + sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision]) + tx = tensor.Tensor((400, 2), dev, precision) + ty = tensor.Tensor((400,), dev, tensor.int32) + model = MLP(data_size=2, perceptron_size=3, num_classes=2) + + # attach model to graph + model.set_optimizer(sgd) + model.compile([tx], is_train=True, use_graph=args.graph, sequential=True) + model.train() + + for i in range(args.max_epoch): + tx.copy_from_numpy(data) + ty.copy_from_numpy(label) + out, loss = model(tx, ty, 'fp32', spars=None) + + if i % 100 == 0: + print("training loss = ", tensor.to_numpy(loss)[0]) From 0089348808c38e6443dad952ac78a879b0d82694 Mon Sep 17 00:00:00 2001 From: Cai Shaofeng Date: Mon, 6 May 2024 11:43:31 +0800 Subject: [PATCH 4/8] Create native.py Add implementations of the native model for the ms model example --- examples/ms_model_mlp/native.py | 137 ++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 examples/ms_model_mlp/native.py diff --git a/examples/ms_model_mlp/native.py b/examples/ms_model_mlp/native.py new file mode 100644 index 000000000..a82ec3b24 --- /dev/null +++ b/examples/ms_model_mlp/native.py @@ -0,0 +1,137 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from singa import tensor +from singa.tensor import Tensor +from singa import autograd +from singa import opt +import numpy as np +from singa import device +import argparse + +np_dtype = {"float16": np.float16, "float32": np.float32} + +singa_dtype = {"float16": tensor.float16, "float32": tensor.float32} + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-p', + choices=['float32', 'float16'], + default='float32', + dest='precision') + parser.add_argument('-m', + '--max-epoch', + default=1001, + type=int, + help='maximum epochs', + dest='max_epoch') + args = parser.parse_args() + + np.random.seed(0) + + autograd.training = True + + # prepare training data in numpy array + + # generate the boundary + f = lambda x: (5 * x + 1) + bd_x = np.linspace(-1.0, 1, 200) + bd_y = f(bd_x) + + # generate the training data + x = np.random.uniform(-1, 1, 400) + y = f(x) + 2 * np.random.randn(len(x)) + + # convert training data to 2d space + label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]) + data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32) + + def to_categorical(y, num_classes): + """ + Converts a class vector (integers) to binary class matrix. + + Args: + y: class vector to be converted into a matrix + (integers from 0 to num_classes). + num_classes: total number of classes. + + Returns: + A binary matrix representation of the input. + """ + y = np.array(y, dtype="int") + n = y.shape[0] + categorical = np.zeros((n, num_classes)) + categorical[np.arange(n), y] = 1 + return categorical + + label = to_categorical(label, 2).astype(np.float32) + print("train_data_shape:", data.shape) + print("train_label_shape:", label.shape) + + precision = singa_dtype[args.precision] + np_precision = np_dtype[args.precision] + + dev = device.create_cuda_gpu() + + inputs = Tensor(data=data, device=dev) + target = Tensor(data=label, device=dev) + + inputs = inputs.as_type(precision) + target = target.as_type(tensor.int32) + + w0_np = np.random.normal(0, 0.1, (2, 3)).astype(np_precision) + w0 = Tensor(data=w0_np, + device=dev, + dtype=precision, + requires_grad=True, + stores_grad=True) + b0 = Tensor(shape=(3,), + device=dev, + dtype=precision, + requires_grad=True, + stores_grad=True) + b0.set_value(0.0) + + w1_np = np.random.normal(0, 0.1, (3, 2)).astype(np_precision) + w1 = Tensor(data=w1_np, + device=dev, + dtype=precision, + requires_grad=True, + stores_grad=True) + b1 = Tensor(shape=(2,), + device=dev, + dtype=precision, + requires_grad=True, + stores_grad=True) + b1.set_value(0.0) + + sgd = opt.SGD(0.05, 0.8) + + # training process + for i in range(args.max_epoch): + x = autograd.matmul(inputs, w0) + x = autograd.add_bias(x, b0) + x = autograd.relu(x) + x = autograd.matmul(x, w1) + x = autograd.add_bias(x, b1) + loss = autograd.softmax_cross_entropy(x, target) + sgd(loss) + + if i % 100 == 0: + print("%d, training loss = " % i, tensor.to_numpy(loss)[0]) From 94a11c218498a19cb6b985e20a7cefafa5c8b29e Mon Sep 17 00:00:00 2001 From: WANG Sheng Date: Mon, 6 May 2024 23:32:08 +0800 Subject: [PATCH 5/8] Add README.md for the cnn ms example --- examples/cnn_ms/README.md | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 examples/cnn_ms/README.md diff --git a/examples/cnn_ms/README.md b/examples/cnn_ms/README.md new file mode 100644 index 000000000..177ae4d32 --- /dev/null +++ b/examples/cnn_ms/README.md @@ -0,0 +1,44 @@ + + +# Image Classification using Convolutional Neural Networks + +Examples inside this folder show how to train CNN models using +SINGA for image classification. + +* `data` includes the scripts for preprocessing image datasets. + Currently, MNIST, CIFAR10 and CIFAR100 are included. + +* `model` includes the CNN model construction codes by creating + a subclass of `Module` to wrap the neural network operations + of each model. Then computational graph is enabled to optimized + the memory and efficiency. + +* `autograd` includes the codes to train CNN models by calling the + [neural network operations](../../python/singa/autograd.py) imperatively. + The computational graph is not created. + +* `train_cnn.py` is the training script, which controls the training flow by + doing BackPropagation and SGD update. + +* `train_multiprocess.py` is the script for distributed training on a single + node with multiple GPUs; it uses Python's multiprocessing module and NCCL. + +* `train_mpi.py` is the script for distributed training (among multiple nodes) + using MPI and NCCL for communication. + +* `benchmark.py` tests the training throughput using `ResNet50` as the workload. \ No newline at end of file From da18ab9948d5efe77b3dcbe8ef531c8728194aaa Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 7 May 2024 19:34:08 +0800 Subject: [PATCH 6/8] update msmlp --- examples/ms_model_mlp/model.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/examples/ms_model_mlp/model.py b/examples/ms_model_mlp/model.py index b3fe116f7..454b382d5 100644 --- a/examples/ms_model_mlp/model.py +++ b/examples/ms_model_mlp/model.py @@ -83,21 +83,30 @@ def forward(self, x): class MSMLP(model.Model): - def __init__(self, data_size=10, perceptron_size=100, num_classes=10): + def __init__(self, data_size=10, perceptron_size=100, num_classes=10, layer_hidden_list=[10,10,10,10]): super(MSMLP, self).__init__() self.num_classes = num_classes self.dimension = 2 self.relu = layer.ReLU() - self.linear1 = layer.Linear(perceptron_size) - self.linear2 = layer.Linear(num_classes) + self.linear1 = layer.Linear(layer_hidden_list[0]) + self.linear2 = layer.Linear(layer_hidden_list[1]) + self.linear3 = layer.Linear(layer_hidden_list[2]) + self.linear4 = layer.Linear(layer_hidden_list[3]) + self.linear5 = layer.Linear(num_classes) self.softmax_cross_entropy = layer.SoftMaxCrossEntropy() self.sum_error = SumErrorLayer() - + def forward(self, inputs): y = self.linear1(inputs) y = self.relu(y) y = self.linear2(y) + y = self.relu(y) + y = self.linear3(y) + y = self.relu(y) + y = self.linear4(y) + y = self.relu(y) + y = self.linear5(y) return y def train_one_batch(self, x, y, dist_option, spars, synflow_flag): @@ -144,6 +153,7 @@ def set_optimizer(self, optimizer): def create_model(pretrained=False, **kwargs): """Constructs a CNN model. + Args: pretrained (bool): If True, returns a pre-trained model. From 901fed189d93da3b56c52c8bff6a97bb2ff74b6b Mon Sep 17 00:00:00 2001 From: GY-GitCode Date: Sun, 19 May 2024 03:35:26 -0700 Subject: [PATCH 7/8] Add the autograd file for multiprocessing --- .../cnn_ms/autograd/cifar10_multiprocess.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 examples/cnn_ms/autograd/cifar10_multiprocess.py diff --git a/examples/cnn_ms/autograd/cifar10_multiprocess.py b/examples/cnn_ms/autograd/cifar10_multiprocess.py new file mode 100644 index 000000000..815d0119e --- /dev/null +++ b/examples/cnn_ms/autograd/cifar10_multiprocess.py @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from resnet_cifar10 import * +import multiprocessing +import sys + +if __name__ == '__main__': + + # Generate a NCCL ID to be used for collective communication + nccl_id = singa.NcclIdHolder() + + # Configure the number of GPUs to be used + world_size = int(sys.argv[1]) + + # Testing the experimental partial-parameter update asynchronous training + partial_update = True + + process = [] + for local_rank in range(0, world_size): + process.append( + multiprocessing.Process(target=train_cifar10, + args=(True, local_rank, world_size, nccl_id, + partial_update))) + + for p in process: + p.start() From fc127e14f84619d6625ffd1f1e9412cf566ddf1e Mon Sep 17 00:00:00 2001 From: GY-GitCode <165798068+GY-GitCode@users.noreply.github.com> Date: Tue, 21 May 2024 20:39:03 +0800 Subject: [PATCH 8/8] Add the autograd implementation of xceptionnet Add the autograd implementation of xceptionnet --- examples/cnn_ms/autograd/xceptionnet.py | 303 ++++++++++++++++++++++++ 1 file changed, 303 insertions(+) create mode 100644 examples/cnn_ms/autograd/xceptionnet.py diff --git a/examples/cnn_ms/autograd/xceptionnet.py b/examples/cnn_ms/autograd/xceptionnet.py new file mode 100644 index 000000000..8fb23d8cb --- /dev/null +++ b/examples/cnn_ms/autograd/xceptionnet.py @@ -0,0 +1,303 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +from singa import autograd +from singa import tensor +from singa import device +from singa import layer +from singa import opt + +import numpy as np +from tqdm import trange + +# the code is modified from +# https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/xception.py + + +class Block(layer.Layer): + + def __init__(self, + in_filters, + out_filters, + reps, + strides=1, + padding=0, + start_with_relu=True, + grow_first=True): + super(Block, self).__init__() + + if out_filters != in_filters or strides != 1: + self.skip = layer.Conv2d(in_filters, + out_filters, + 1, + stride=strides, + padding=padding, + bias=False) + self.skipbn = layer.BatchNorm2d(out_filters) + else: + self.skip = None + + self.layers = [] + + filters = in_filters + if grow_first: + self.layers.append(layer.ReLU()) + self.layers.append( + layer.SeparableConv2d(in_filters, + out_filters, + 3, + stride=1, + padding=1, + bias=False)) + self.layers.append(layer.BatchNorm2d(out_filters)) + filters = out_filters + + for i in range(reps - 1): + self.layers.append(layer.ReLU()) + self.layers.append( + layer.SeparableConv2d(filters, + filters, + 3, + stride=1, + padding=1, + bias=False)) + self.layers.append(layer.BatchNorm2d(filters)) + + if not grow_first: + self.layers.append(layer.ReLU()) + self.layers.append( + layer.SeparableConv2d(in_filters, + out_filters, + 3, + stride=1, + padding=1, + bias=False)) + self.layers.append(layer.BatchNorm2d(out_filters)) + + if not start_with_relu: + self.layers = self.layers[1:] + else: + self.layers[0] = layer.ReLU() + + if strides != 1: + self.layers.append(layer.MaxPool2d(3, strides, padding + 1)) + + self.register_layers(*self.layers) + + self.add = layer.Add() + + def forward(self, x): + y = self.layers[0](x) + for layer in self.layers[1:]: + if isinstance(y, tuple): + y = y[0] + y = layer(y) + + if self.skip is not None: + skip = self.skip(x) + skip = self.skipbn(skip) + else: + skip = x + y = self.add(y, skip) + return y + + +__all__ = ['Xception'] + + +class Xception(layer.Layer): + """ + Xception optimized for the ImageNet dataset, as specified in + https://arxiv.org/pdf/1610.02357.pdf + """ + + def __init__(self, num_classes=1000): + """ Constructor + Args: + num_classes: number of classes + """ + super(Xception, self).__init__() + self.num_classes = num_classes + + self.conv1 = layer.Conv2d(3, 32, 3, 2, 0, bias=False) + self.bn1 = layer.BatchNorm2d(32) + self.relu1 = layer.ReLU() + + self.conv2 = layer.Conv2d(32, 64, 3, 1, 1, bias=False) + self.bn2 = layer.BatchNorm2d(64) + self.relu2 = layer.ReLU() + # do relu here + + self.block1 = Block(64, + 128, + 2, + 2, + padding=0, + start_with_relu=False, + grow_first=True) + self.block2 = Block(128, + 256, + 2, + 2, + padding=0, + start_with_relu=True, + grow_first=True) + self.block3 = Block(256, + 728, + 2, + 2, + padding=0, + start_with_relu=True, + grow_first=True) + + self.block4 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + self.block5 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + self.block6 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + self.block7 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + + self.block8 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + self.block9 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + self.block10 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + self.block11 = Block(728, + 728, + 3, + 1, + start_with_relu=True, + grow_first=True) + + self.block12 = Block(728, + 1024, + 2, + 2, + start_with_relu=True, + grow_first=False) + + self.conv3 = layer.SeparableConv2d(1024, 1536, 3, 1, 1) + self.bn3 = layer.BatchNorm2d(1536) + self.relu3 = layer.ReLU() + + # Relu Layer + self.conv4 = layer.SeparableConv2d(1536, 2048, 3, 1, 1) + self.bn4 = layer.BatchNorm2d(2048) + + self.relu4 = layer.ReLU() + self.globalpooling = layer.MaxPool2d(10, 1) + self.flatten = layer.Flatten() + self.fc = layer.Linear(2048, num_classes) + + def features(self, input): + x = self.conv1(input) + x = self.bn1(x) + x = self.relu1(x) + + x = self.conv2(x) + x = self.bn2(x) + x = self.relu2(x) + + x = self.block1(x) + x = self.block2(x) + x = self.block3(x) + x = self.block4(x) + x = self.block5(x) + x = self.block6(x) + x = self.block7(x) + x = self.block8(x) + x = self.block9(x) + x = self.block10(x) + x = self.block11(x) + x = self.block12(x) + + x = self.conv3(x) + x = self.bn3(x) + x = self.relu3(x) + + x = self.conv4(x) + x = self.bn4(x) + return x + + def logits(self, features): + x = self.relu4(features) + x = self.globalpooling(x) + x = self.flatten(x) + x = self.fc(x) + return x + + def forward(self, input): + x = self.features(input) + x = self.logits(x) + return x + + +if __name__ == '__main__': + model = Xception(num_classes=1000) + print('Start intialization............') + dev = device.create_cuda_gpu_on(0) + #dev = device.create_cuda_gpu() + + niters = 20 + batch_size = 16 + IMG_SIZE = 299 + sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5) + + tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev) + ty = tensor.Tensor((batch_size,), dev, tensor.int32) + autograd.training = True + x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32) + y = np.random.randint(0, 1000, batch_size, dtype=np.int32) + tx.copy_from_numpy(x) + ty.copy_from_numpy(y) + + with trange(niters) as t: + for _ in t: + x = model(tx) + loss = autograd.softmax_cross_entropy(x, ty) + sgd(loss)