Skip to content

Commit

Permalink
Dev oneflow_backend
Browse files Browse the repository at this point in the history
  • Loading branch information
QuantumLiu committed Feb 14, 2023
1 parent fcd95b4 commit 5230e8e
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 12 deletions.
9 changes: 5 additions & 4 deletions examples/basic_tutorials/mnist_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
# os.environ['TL_BACKEND'] = 'tensorflow'
# os.environ['TL_BACKEND'] = 'mindspore'
# os.environ['TL_BACKEND'] = 'paddle'
os.environ['TL_BACKEND'] = 'torch'
os.environ['TL_BACKEND'] = 'oneflow'
# os.environ['TL_BACKEND'] = 'torch'

import tensorlayerx as tlx
from tensorlayerx.nn import Module
Expand Down Expand Up @@ -60,16 +61,16 @@ def forward(self, x, foo=None):
print_freq = 2

train_weights = MLP.trainable_weights
optimizer = tlx.optimizers.Momentum(0.05, 0.9)
optimizer = tlx.optimizers.Adadelta()#Momentum(0.05, 0.9)
metric = tlx.metrics.Accuracy()
loss_fn = tlx.losses.softmax_cross_entropy_with_logits
train_dataset = mnistdataset(data=X_train, label=y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = tlx.model.Model(network=MLP, loss_fn=loss_fn, optimizer=optimizer, metrics=metric)
model.train(n_epoch=n_epoch, train_dataset=train_loader, print_freq=print_freq, print_train_batch=False)
model.save_weights('./model.npz', format='npz_dict')
model.load_weights('./model.npz', format='npz_dict')
# model.save_weights('./model.npz', format='npz_dict')
# model.load_weights('./model.npz', format='npz_dict')

################################ TensorLayerX and TensorFlow can be mixed programming. #################################
# import os
Expand Down
2 changes: 1 addition & 1 deletion tensorlayerx/backend/ops/oneflow_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def __init__(self, transpose_a=False, transpose_b=False, name=None):
if self.transpose_a or self.transpose_b:
raise NotImplementedError('keyword argument `transpose_a` or `transpose_b` is not supported.')

def forward(self, x, y):
def __call__(self, x, y):
return flow.matmul(x, y)


Expand Down
76 changes: 76 additions & 0 deletions tensorlayerx/model/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ def train(self, n_epoch, train_dataset=None, test_dataset=False, print_train_bat
loss_monitor=loss_monitor
)

elif tlx.BACKEND == "oneflow":
self.of_train(
n_epoch=n_epoch, train_dataset=train_dataset, network=self.network, loss_fn=self.loss_fn,
train_weights=self.train_weights, optimizer=self.optimizer, metrics=self.metrics,
print_train_batch=print_train_batch, print_freq=print_freq, test_dataset=test_dataset,
loss_monitor=loss_monitor
)

def eval(self, test_dataset):
self.network.set_eval()
test_loss, test_acc, n_iter = 0, 0, 0
Expand Down Expand Up @@ -574,6 +582,74 @@ def th_train(
writer.export_scalars_to_json("./all_scalars.json")
writer.close()

def of_train(
self, n_epoch, train_dataset, network, loss_fn, train_weights, optimizer, metrics, print_train_batch,
print_freq, test_dataset, loss_monitor
):
with Progress(TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
TimeRemainingColumn(),
TimeElapsedColumn()) as progress:

n_batch = len(train_dataset)
epoch_tqdm = progress.add_task(description="[red]Epoch progress 0/{}".format(n_epoch), total=n_epoch)
batch_tqdm = progress.add_task(description="[green]Batch progress 0/{}".format(n_batch), total=n_batch)

for epoch in range(n_epoch):
start_time = time.time()

train_loss, train_acc, n_iter = 0, 0, 0
for batch, (X_batch, y_batch) in enumerate(train_dataset):
network.set_train()
output = network(X_batch)
loss = loss_fn(output, y_batch)
grads = optimizer.gradient(loss, train_weights)
optimizer.apply_gradients(zip(grads, train_weights))

train_loss += loss
if metrics:
metrics.update(output, y_batch)
train_acc += metrics.result()
metrics.reset()
else:
train_acc += (output.argmax(1) == y_batch).type(torch.float).mean().item()
n_iter += 1

if print_train_batch:
print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
print(" train loss: {}".format(train_loss / n_iter))
print(" train acc: {}".format(train_acc / n_iter))
progress.advance(batch_tqdm, advance=1)
progress.update(batch_tqdm, description="[green]Batch progress {}/{}".format(batch + 1, n_batch))

if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:

print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
print(" train loss: {}".format(train_loss / n_iter))
print(" train acc: {}".format(train_acc / n_iter))

if test_dataset:
# use training and evaluation sets to evaluate the model every print_freq epoch
if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
network.set_eval()
val_loss, val_acc, n_iter = 0, 0, 0
for X_batch, y_batch in test_dataset:
_logits = network(X_batch) # is_train=False, disable dropout
val_loss += loss_fn(_logits, y_batch)
if metrics:
metrics.update(_logits, y_batch)
val_acc += metrics.result()
metrics.reset()
else:
val_acc += (_logits.argmax(1) == y_batch).type(torch.float).mean().item()
n_iter += 1
print(" val loss: {}".format(val_loss / n_iter))
print(" val acc: {}".format(val_acc / n_iter))
progress.update(epoch_tqdm, description="[red]Epoch progress {}/{}".format(epoch + 1, n_epoch))
progress.advance(epoch_tqdm, advance=1)
progress.reset(batch_tqdm)

class WithGrad(object):
"""Module that returns the gradients.
Expand Down
2 changes: 1 addition & 1 deletion tensorlayerx/nn/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def _save_weights(net, file_path, format=None):
format = 'hdf5'

if format == 'hdf5' or format == 'h5':
if tlx.BACKEND == 'torch':
if tlx.BACKEND == 'torch' or tlx.BACKEND == 'oneflow':
utils.save_weights_to_hdf5(net.named_parameters(), file_path)
else:
utils.save_weights_to_hdf5(net.all_weights, file_path)
Expand Down
12 changes: 6 additions & 6 deletions tensorlayerx/nn/initializers/oneflow_initializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class Zeros(Initializer):
"""

def __call__(self, shape, dtype=tlx.float32):
_tensor = flow.empty(size=shape, dtype=dtype)
_tensor = flow.empty(shape, dtype=dtype)
return flow.nn.init.zeros_(_tensor)


Expand All @@ -98,7 +98,7 @@ class Ones(Initializer):
"""

def __call__(self, shape, dtype=tlx.float32):
_tensor = flow.empty(size=shape, dtype=dtype)
_tensor = flow.empty(shape, dtype=dtype)
return flow.nn.init.ones_(_tensor)


Expand All @@ -123,7 +123,7 @@ def __init__(self, value=0):
self.value = value

def __call__(self, shape, dtype=tlx.float32):
_tensor = flow.empty(size=shape, dtype=dtype)
_tensor = flow.empty(shape, dtype=dtype)
if isinstance(self.value, (int, float)):
return flow.nn.init.constant_(_tensor, val=self.value)
elif isinstance(self.value, (flow.Tensor, list, np.ndarray)):
Expand Down Expand Up @@ -161,7 +161,7 @@ def __init__(self, minval=-0.05, maxval=0.05, seed=None):
self.seed = seed

def __call__(self, shape, dtype=tlx.float32):
_tensor = flow.empty(size=shape, dtype=dtype)
_tensor = flow.empty(shape, dtype=dtype)
return flow.nn.init.uniform_(_tensor, a=self.minval, b=self.maxval)

def get_config(self):
Expand Down Expand Up @@ -197,7 +197,7 @@ def __init__(self, mean=0.0, stddev=0.05, seed=None):
self.seed = seed

def __call__(self, shape, dtype=tlx.float32):
_tensor = flow.empty(size=shape)
_tensor = flow.empty(shape)
return flow.nn.init.normal_(_tensor, mean=self.mean, std=self.stddev)

def get_config(self):
Expand Down Expand Up @@ -237,7 +237,7 @@ def __init__(self, mean=0.0, stddev=0.05, seed=None):
self.seed = seed

def __call__(self, shape, dtype=tlx.float32):
_tensor = flow.empty(size=shape)
_tensor = flow.empty(shape)
return self._truncated_normal(_tensor, self.mean, self.stddev)

def _truncated_normal(self, tensor, mean=0, std=0.09):
Expand Down

0 comments on commit 5230e8e

Please sign in to comment.