-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_util.py
118 lines (107 loc) · 3.69 KB
/
model_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import time
import os
from collections import OrderedDict
from torch.utils.data import Subset
import networkx
# Restrict the gradient to its largest k absolute values of coordinates.
# can control the square loss using the parameter k.
def quantizer_topk(gradient, k = 5):
absoulte = torch.abs( gradient )
sign = torch.sign(gradient)
values,indices = torch.topk( absoulte, k , sorted = False ,dim=0)
gradient.zero_()
gradient.scatter_(0,indices,values)
return gradient*sign
# Lossy compression.
# Ref - https://arxiv.org/abs/1610.02132
# Can control the expected square loss using the parameter k.
def quantizer_lossy( gradient, k = 64 ):
norm = torch.norm( gradient )
absoulte = torch.abs( gradient )
absoulte = ( absoulte/norm )*k
floor = torch.floor(absoulte)
if gradient.is_cuda:
dev = "cuda:0"
else:
dev = "cpu"
random_ceil = torch.rand(*gradient.shape,device = dev) < ( gradient - floor )
floor = ( floor + random_ceil.float() ) * (1/k)
#rescale
return (norm) * ( torch.sign(gradient) * floor )
# Adjancency graph of the ring.
def ring( num_workers ):
ring = torch.zeros([num_workers, num_workers])
for i in range(num_workers-1):
ring[i,i+1] = 1.0
ring[i,i-1] = 1.0
#close
ring[num_workers - 1, 0 ] = 1.0
ring[num_workers - 1, num_workers-2 ] = 1.0
# Make the diagonal 1.
for i in range(num_workers):
ring[i,i] = 1
return ring
# Adjancency graph of the torus.
def torus(sqrt_num_workers):
num_workers = sqrt_num_workers*sqrt_num_workers
torus = networkx.generators.lattice.grid_2d_graph(sqrt_num_workers,sqrt_num_workers, periodic=True)
torus = networkx.adjacency_matrix(torus).toarray()
# Make the diagonal 1.
for i in range(num_workers):
torus[i,i] = 1
return torus
# An example graph where degree of each node is k.
def degree_k( num_workers , k ):
half_k = k/2
W = torch.zeros([num_workers, num_workers])
for i in range(num_workers):
count = 0
column = i
while count < half_k:
count = count+1
#left
if i-count >= 0 :
W[i, i-count] = 1.0
else :
W[ i, num_workers + i - count ] = 1.0
#right
if i+count < num_workers:
W[i, i+count] = 1.0
else:
W[i, i+count - num_workers ] = 1.0
# Make the diagonal 1.
for i in range(num_workers):
W[i,i] = 1
return W
# Splits the dataset across nodes.
def trainset_node_split(dataset, N, seed = 0):
np.random.seed(seed)
a = np.arange(len(dataset))
np.random.shuffle(a)
datasets = {}
size = int(len(dataset)/N)
for i in range(N):
datasets[i] = Subset(dataset, a[i*size:(i+1)*size].tolist())
return datasets
# Compares the outputs and labels and returns the count of the correct predictions.
def count_correct(outputs, labels,criterion):
""" count correct predictions """
if isinstance(criterion, nn.BCELoss):
predicted = (outputs > 0.5).to(dtype=torch.int64)
labels = (labels > 0.5).to(dtype=torch.int64)
elif isinstance(criterion, nn.CrossEntropyLoss):
_, predicted = outputs.max(1)
else:
print('Error in criterion')
raise ValueError
correct = (predicted == labels).sum().item()
return correct