-
Notifications
You must be signed in to change notification settings - Fork 5
/
dataset.py
64 lines (54 loc) · 2.11 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from torch.utils.data import Dataset
from PIL import Image
import random
import os
import torch
class CelebA(Dataset):
"""Dataset class for the CelebA dataset."""
def __init__(self, image_dir, attr_path, selected_attrs, transform, mode):
"""Initialize and preprocess the CelebA dataset."""
self.image_dir = image_dir
self.attr_path = attr_path
self.selected_attrs = selected_attrs
self.transform = transform
self.mode = mode
self.train_dataset = []
self.test_dataset = []
self.attr2idx = {}
self.idx2attr = {}
self.preprocess()
if mode == 'train':
self.num_images = len(self.train_dataset)
else:
self.num_images = len(self.test_dataset)
def preprocess(self):
"""Preprocess the CelebA attribute file."""
lines = [line.rstrip() for line in open(self.attr_path, 'r')]
all_attr_names = lines[1].split()
for i, attr_name in enumerate(all_attr_names):
self.attr2idx[attr_name] = i
self.idx2attr[i] = attr_name
lines = lines[2:]
random.shuffle(lines)
for i, line in enumerate(lines):
split = line.split()
filename = split[0]
values = split[1:]
label = []
for attr_name in self.selected_attrs:
idx = self.attr2idx[attr_name]
label.append(values[idx] == '1')
if (i+1) < 2000:
self.test_dataset.append([filename, label])
else:
self.train_dataset.append([filename, label])
print('Finished preprocessing the CelebA dataset...')
def __getitem__(self, index):
"""Return one image and its corresponding attribute label."""
dataset = self.train_dataset if self.mode == 'train' else self.test_dataset
filename, label = dataset[index]
image = Image.open(os.path.join(self.image_dir, filename))
return self.transform(image), torch.FloatTensor(label)
def __len__(self):
"""Return the number of images."""
return self.num_images