-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmnist.py
84 lines (66 loc) · 2.67 KB
/
mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import torch
import tensorflow as tf
from torch.utils.data import TensorDataset, DataLoader, random_split
def normalizer(x, device):
mean = torch.FloatTensor([0.1307]).view((1, 1, 1, 1)).to(device)
sigma = torch.FloatTensor([0.3081]).view((1, 1, 1, 1)).to(device)
return (x - mean) / sigma
def get_mnist(normalize, device):
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(
path="mnist.npz"
)
x_train = x_train / 255.0
x_test = x_test / 255.0
x_train = torch.as_tensor(x_train, dtype=torch.float32).to(device)
x_train.unsqueeze_(1)
x_test = torch.as_tensor(x_test, dtype=torch.float32).to(device)
x_test.unsqueeze_(1)
if normalize:
x_train = normalizer(x_train, device)
x_test = normalizer(x_test, device)
y_train = torch.as_tensor(y_train, dtype=torch.float32).to(device)
y_test = torch.as_tensor(y_test, dtype=torch.float32).to(device)
train_data = TensorDataset(x_train, y_train)
test_data = TensorDataset(x_test, y_test)
return train_data, test_data
class MnistLoader:
def __init__(
self, batch_size=1, shuffle=True,
normalize=True, device="cpu", split_ratio=None
) -> None:
self.train_data, self.test_data = get_mnist(normalize, device)
if split_ratio is None:
self.train_loader = DataLoader(
self.train_data, batch_size=batch_size, shuffle=True
)
self.val_loader = None
else:
self.train_data, self.val_data = self._train_val_split(split_ratio)
self.train_loader = DataLoader(
self.train_data, batch_size=batch_size, shuffle=True
)
self.val_loader = DataLoader(
self.val_data, batch_size=batch_size, shuffle=False
)
self.test_loader = DataLoader(
self.test_data, batch_size=batch_size, shuffle=False
)
def _train_val_split(self, ratio):
n_samples = len(self.train_data)
n_train = int(ratio * n_samples)
n_val = n_samples - n_train
train_data, val_data = random_split(self.train_data, [n_train, n_val])
return train_data, val_data
def get_data(self, str):
if str == "x_train":
return self.train_data.tensors[0]
elif str == "x_test":
return self.test_data.tensors[0]
elif str == "y_train":
return self.train_data.tensors[1]
elif str == "y_test":
return self.test_data.tensors[1]
else:
raise (ValueError)
if __name__ == "__main__":
data = MnistLoader(batch_size=128, shuffle=True, normalize=False, val_ratio=0.5)