Skip to content

Commit

Permalink
fix plt torch, test=model
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeref996 committed Jan 2, 2025
1 parent 3768d4b commit bc85707
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 26 deletions.
28 changes: 27 additions & 1 deletion framework/e2e/PaddleLT_new/diy/loss/diy_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
"""
diy loss list
"""
import paddle
import os

if "paddle" in os.environ.get("FRAMEWORK"):
import paddle

if "torch" in os.environ.get("FRAMEWORK"):
import torch


def naive_loss_list(logit, loss_list):
Expand Down Expand Up @@ -35,3 +41,23 @@ def mean_loss(logit):
return loss
else:
raise Exception("something wrong with mean_loss!!")


def torch_mean_loss(logit):
"""torch mean loss"""
if isinstance(logit, (list, tuple)):
tmp = 0.0
count = 0
for l in logit:
if isinstance(l, torch.Tensor) and l.numel() > 0:
mean = torch.mean(l)
tmp += mean
count += 1
# loss = tmp / len(logit)
loss = tmp / count
return loss
elif isinstance(logit, torch.Tensor):
loss = torch.mean(logit)
return loss
else:
raise Exception("something wrong with torch_mean_loss!!")
14 changes: 13 additions & 1 deletion framework/e2e/PaddleLT_new/diy/optimizer/diy_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,22 @@
"""
diy optimizer
"""
import paddle
import os

if "paddle" in os.environ.get("FRAMEWORK"):
import paddle

if "torch" in os.environ.get("FRAMEWORK"):
import torch


def naive_opt(net, opt_api, learning_rate):
"""navie optimizer func"""
opt = eval(opt_api)(learning_rate=learning_rate, parameters=net.parameters())
return opt


def torch_opt(net, opt_api, learning_rate):
"""torch optimizer func"""
opt = eval(opt_api)(net.parameters(), lr=learning_rate)
return opt
5 changes: 5 additions & 0 deletions framework/e2e/PaddleLT_new/engine/paddle_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from pltools.logger import Logger

from strategy.ordered_dict import OrderedDictProcess


class LayerTrain(object):
"""
Expand All @@ -37,6 +39,7 @@ def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_

self.testing = testing
self.upstream_net = upstream_net
self.orderdict_usage = orderdict_usage
self.return_net_instance = self.testing.get("return_net_instance", "False")
self.model_dtype = self.testing.get("model_dtype")
paddle.set_default_dtype(self.model_dtype)
Expand All @@ -63,6 +66,8 @@ def _net_instant(self):
net = self.upstream_net
else:
net = BuildLayer(layerfile=self.layerfile).get_layer()
if self.orderdict_usage != "None":
net = OrderedDictProcess(net=net, layerfile=self.layerfile, orderdict_usage=self.orderdict_usage).process()
return net

def _net_optimizer(self):
Expand Down
2 changes: 2 additions & 0 deletions framework/e2e/PaddleLT_new/engine/torch_engine_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

from engine.torch_eval import TorchLayerEval
from engine.torch_train import TorchLayerTrain

# from engine.torch_eval_bm import TorchLayerEvalBM

Expand All @@ -15,5 +16,6 @@

torch_engine_map = {
"torch_dy_eval": TorchLayerEval,
"torch_dy_train": TorchLayerTrain,
# "torch_dy_eval_perf": TorchLayerEvalBM, # 动态图评估性能
}
34 changes: 20 additions & 14 deletions framework/e2e/PaddleLT_new/engine/torch_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

from pltools.logger import Logger

from strategy.ordered_dict import OrderedDictProcess

class LayerTrain(object):

class TorchLayerTrain(object):
"""
构建Layer训练的通用类
"""
Expand All @@ -33,10 +35,11 @@ def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_
self.device = os.environ.get("PLT_SET_DEVICE")
device = torch.device(f"cuda:{device_place_id}")
torch.set_default_device(device)
Logger("LayerTrain.__init__").get_log().info(f"device_place_id is: {device_place_id}")
Logger("TorchLayerTrain.__init__").get_log().info(f"device_place_id is: {device_place_id}")

self.testing = testing
self.upstream_net = upstream_net
self.orderdict_usage = orderdict_usage
self.return_net_instance = self.testing.get("return_net_instance", "False")
self.model_dtype = self.testing.get("model_dtype")
# torch.set_default_dtype(self.model_dtype)
Expand All @@ -53,7 +56,7 @@ def _unset_flags(self, engine_str="test_engine"):
def _net_input(self):
"""get input"""
reset(self.seed)
data = BuildData(layerfile=self.layerfile).get_single_data()
data = BuildData(layerfile=self.layerfile).get_single_data(framework="torch")
return data

def _net_instant(self):
Expand All @@ -63,6 +66,8 @@ def _net_instant(self):
net = self.upstream_net
else:
net = BuildLayer(layerfile=self.layerfile).get_layer()
if self.orderdict_usage != "None":
net = OrderedDictProcess(net=net, layerfile=self.layerfile, orderdict_usage=self.orderdict_usage).process()
return net

def _net_optimizer(self):
Expand Down Expand Up @@ -107,7 +112,7 @@ def _get_data_grad(self, data):
data_grad.append(i.grad)
return data_grad

def dy_train(self):
def torch_dy_train(self):
"""dygraph train"""
# data, net, optimizer, loss = self._get_instant()
data = self._net_input()
Expand All @@ -123,15 +128,16 @@ def dy_train(self):
opt = optimizer.get_opt(net=net)

for epoch in range(self.step):
print("data is: ", data)
logit = net(*data)
# 构建loss用于训练
dy_loss = loss.get_loss(logit)
dy_loss.backward()
if net.parameters():
opt.step()
opt.clear_grad()
opt.zero_grad()

Logger("dy_train").get_log().info(f"已完成 {epoch} 轮训练")
Logger("torch_dy_train").get_log().info(f"已完成 {epoch} 轮训练")
data_grad = self._get_data_grad(data)
if self.return_net_instance == "True":
return {"res": {"logit": logit, "data_grad": data_grad}, "net": net}
Expand Down Expand Up @@ -164,7 +170,7 @@ def dy_train(self):
# dy_loss.backward()
# if dp_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy_dp_train").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down Expand Up @@ -192,7 +198,7 @@ def dy_train(self):
# loss = self.loss.get_loss(logit)
# loss.backward()
# opt.step()
# opt.clear_grad()
# opt.zero_grad()
# return logit

# def dy2st_train(self):
Expand Down Expand Up @@ -220,7 +226,7 @@ def dy_train(self):
# dy_loss.backward()
# if st_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy2st_train").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down Expand Up @@ -252,7 +258,7 @@ def dy_train(self):
# dy_loss.backward()
# if st_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy2st_train_inputspec").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down Expand Up @@ -284,7 +290,7 @@ def dy_train(self):
# dy_loss.backward()
# if st_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy2st_train_static_inputspec").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down Expand Up @@ -321,7 +327,7 @@ def dy_train(self):
# dy_loss.backward()
# if cinn_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy2st_train_cinn").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down Expand Up @@ -360,7 +366,7 @@ def dy_train(self):
# dy_loss.backward()
# if cinn_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy2st_train_cinn_inputspec").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down Expand Up @@ -399,7 +405,7 @@ def dy_train(self):
# dy_loss.backward()
# if cinn_net.parameters():
# opt.step()
# opt.clear_grad()
# opt.zero_grad()

# Logger("dy2st_train_cinn_static_inputspec").get_log().info(f"已完成 {epoch} 轮训练")
# data_grad = self._get_data_grad(data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ def create_tensor_inputs():
"""
paddle tensor
"""
inputs = (paddle.to_tensor(-1 + (1 - -1) * np.random.random([1024, 256, 128, 200]).astype('float32'), dtype='float32', stop_gradient=False), )
inputs = (paddle.to_tensor(-1 + (1 - -1) * np.random.random([1024, 256, 128, 100]).astype('float32'), dtype='float32', stop_gradient=False), )
return inputs


def create_numpy_inputs():
"""
numpy array
"""
inputs = (-1 + (1 - -1) * np.random.random([1024, 256, 128, 200]).astype('float32'), )
inputs = (-1 + (1 - -1) * np.random.random([1024, 256, 128, 100]).astype('float32'), )
return inputs

11 changes: 8 additions & 3 deletions framework/e2e/PaddleLT_new/strategy/ordered_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ def save_ordered_dict(self):
"""
pickle_dict = {}
# print('self.net.state_dict() is: ', self.net.state_dict())
for key, value in self.net.state_dict().items():
pickle_dict[key] = value.numpy()
# print('save pickle_dict[key] is: ', pickle_dict[key])
if self.framework == "paddle":
for key, value in self.net.state_dict().items():
pickle_dict[key] = value.numpy()
# print('save pickle_dict[key] is: ', pickle_dict[key])
elif self.framework == "torch":
for key, value in self.net.state_dict().items():
value = value.cpu()
pickle_dict[key] = value.detach().numpy()
save_pickle(pickle_dict, self.path)
# eval(f"{self.framework}.save")(self.net.state_dict(), self.path)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ def create_tensor_inputs():
"""
PyTorch tensor
"""
inputs = (torch.tensor((-1 + 2 * np.random.random([1024, 256, 128, 200])).astype(np.float32), dtype=torch.float32, requires_grad=True), )
inputs = (torch.tensor((-1 + 2 * np.random.random([1024, 256, 128, 100])).astype(np.float32), dtype=torch.float32, requires_grad=True), )
return inputs


def create_numpy_inputs():
"""
numpy array
"""
inputs = ((-1 + 2 * np.random.random([1024, 256, 128, 200])).astype('float32'),)
inputs = ((-1 + 2 * np.random.random([1024, 256, 128, 100])).astype('float32'),)
return inputs
7 changes: 4 additions & 3 deletions framework/e2e/PaddleLT_new/yaml/dy_train^torch_dy_train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ testings:
device_place_id: "1"
model_dtype: "float32"
Loss:
loss_name: "diy.loss.diy_loss.mean_loss"
loss_name: "diy.loss.diy_loss.torch_mean_loss"
optimizer:
optimizer_name: "diy.optimizer.diy_opt.naive_opt"
optimizer_name: "diy.optimizer.diy_opt.torch_opt"
params:
opt_api: "paddle.optimizer.SGD"
opt_api: "torch.optim.SGD"
learning_rate: 0.00001
step: 1

Expand All @@ -31,6 +31,7 @@ compare:
-
baseline: 'torch_dy_train'
latest: 'dy_train'
compare_method: "torch_compare"
precision:
delta: 0.00001
rtol: 0.000001

0 comments on commit bc85707

Please sign in to comment.