Skip to content

Commit 774181e

Browse files
committed
custom dataset
1 parent ba9c505 commit 774181e

File tree

3 files changed

+585
-5
lines changed

3 files changed

+585
-5
lines changed

03_pytorch_computer_vision.ipynb

Lines changed: 111 additions & 5 deletions
Large diffs are not rendered by default.

04_custom_data_creation.ipynb

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# PyTorch Custom Data Creation"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 2,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import torch\n",
17+
"import torchvision\n",
18+
"import torchvision.datasets as datasets \n",
19+
"import torchvision.transforms as transforms\n",
20+
"\n",
21+
"# setup directory\n",
22+
"import pathlib\n",
23+
"data_dir = pathlib.Path(\"../data\")\n"
24+
]
25+
},
26+
{
27+
"cell_type": "markdown",
28+
"metadata": {},
29+
"source": [
30+
"\n",
31+
"### Download data\n",
32+
"\n",
33+
"Get the Food101 dataset from PyTorch.\n",
34+
"\n",
35+
"Food101 in torchvision.datasets - https://pytorch.org/vision/stable/generated/torchvision.datasets.Food101.html\n",
36+
"Original Food101 dataset - https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/\n",
37+
"\n",
38+
"Note: Downloading the dataset from PyTorch may take ~10-15 minutes depending on your internet speed. It will download ~5GB of data to the specified root directory.\n",
39+
"\n"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": 3,
45+
"metadata": {},
46+
"outputs": [
47+
{
48+
"name": "stdout",
49+
"output_type": "stream",
50+
"text": [
51+
"Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to ..\\data\\food-101.tar.gz\n"
52+
]
53+
},
54+
{
55+
"name": "stderr",
56+
"output_type": "stream",
57+
"text": [
58+
" 47%|████▋ | 2346024960/4996278331 [03:34<04:53, 9022497.18it/s] "
59+
]
60+
}
61+
],
62+
"source": [
63+
"# Get training data\n",
64+
"train_data = datasets.Food101(root=data_dir,\n",
65+
" split=\"train\",\n",
66+
" #transform=transforms.ToTensor(),\n",
67+
" download=True)\n",
68+
"# Get testing \n",
69+
"test_data = datasets.Food101(root=data_dir,\n",
70+
" split=\"test\",\n",
71+
" #transform=transforms.ToTensor(),\n",
72+
" download=True)"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {},
79+
"outputs": [],
80+
"source": [
81+
"train_data"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": null,
87+
"metadata": {},
88+
"outputs": [],
89+
"source": [
90+
"class_names = train_data.classes\n",
91+
"class_names[:10]"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"# View first sample (PIL image format)\n",
101+
"print(class_names[train_data[0][1]])\n",
102+
"train_data[0][0]"
103+
]
104+
},
105+
{
106+
"cell_type": "markdown",
107+
"metadata": {},
108+
"source": [
109+
"\n",
110+
"### Find subset of appropriate classes\n",
111+
"\n",
112+
"Want: Steak, pizza, sushi.\n",
113+
"\n",
114+
"Current path setup:\n",
115+
"\n",
116+
"../data/food-101/images/CLASS_NAME/IMAGES.jpg\n",
117+
"\n",
118+
"Going to get a list of the different target image classes (pizza, steak, sushi) filenames and then copy the images to separate folders.\n",
119+
"\n",
120+
"I'd like to get a random 10% of the images from the target classes from both datasets.\n"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": null,
126+
"metadata": {},
127+
"outputs": [],
128+
"source": []
129+
},
130+
{
131+
"cell_type": "code",
132+
"execution_count": null,
133+
"metadata": {},
134+
"outputs": [],
135+
"source": []
136+
},
137+
{
138+
"cell_type": "code",
139+
"execution_count": null,
140+
"metadata": {},
141+
"outputs": [],
142+
"source": []
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": null,
147+
"metadata": {},
148+
"outputs": [],
149+
"source": []
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": null,
154+
"metadata": {},
155+
"outputs": [],
156+
"source": []
157+
},
158+
{
159+
"cell_type": "code",
160+
"execution_count": null,
161+
"metadata": {},
162+
"outputs": [],
163+
"source": []
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"metadata": {},
169+
"outputs": [],
170+
"source": []
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {},
176+
"outputs": [],
177+
"source": []
178+
},
179+
{
180+
"cell_type": "code",
181+
"execution_count": null,
182+
"metadata": {},
183+
"outputs": [],
184+
"source": []
185+
},
186+
{
187+
"cell_type": "code",
188+
"execution_count": null,
189+
"metadata": {},
190+
"outputs": [],
191+
"source": []
192+
},
193+
{
194+
"cell_type": "code",
195+
"execution_count": null,
196+
"metadata": {},
197+
"outputs": [],
198+
"source": []
199+
},
200+
{
201+
"cell_type": "code",
202+
"execution_count": null,
203+
"metadata": {},
204+
"outputs": [],
205+
"source": []
206+
}
207+
],
208+
"metadata": {
209+
"kernelspec": {
210+
"display_name": "Python 3",
211+
"language": "python",
212+
"name": "python3"
213+
},
214+
"language_info": {
215+
"codemirror_mode": {
216+
"name": "ipython",
217+
"version": 3
218+
},
219+
"file_extension": ".py",
220+
"mimetype": "text/x-python",
221+
"name": "python",
222+
"nbconvert_exporter": "python",
223+
"pygments_lexer": "ipython3",
224+
"version": "3.12.2"
225+
}
226+
},
227+
"nbformat": 4,
228+
"nbformat_minor": 2
229+
}

0 commit comments

Comments
 (0)