This repository has been archived by the owner on Mar 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathstart.py
217 lines (172 loc) · 7.14 KB
/
start.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import logging
import os
import shutil
import sys
import time
from typing import List
import yaml
from easydict import EasyDict as edict
# view https://github.com/protocolbuffers/protobuf/issues/10051 for detail
os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python')
from tensorboardX import SummaryWriter
from tqdm import tqdm
from ymir_exc import dataset_reader as dr
from ymir_exc import env, monitor
from ymir_exc import result_writer as rw
def get_merged_config() -> edict:
"""
merge ymir_config, cfg.param and code_config
code_config will be overwrited by cfg.param.
"""
def get_code_config(code_config_file: str) -> dict:
if code_config_file is None:
return dict()
else:
with open(code_config_file, 'r') as f:
return yaml.safe_load(f)
exe_cfg = env.get_executor_config()
code_config_file = exe_cfg.get('code_config', None)
code_cfg = get_code_config(code_config_file)
code_cfg.update(exe_cfg)
merged_cfg = edict()
# the hyperparameter information
merged_cfg.param = code_cfg
# the ymir path information
merged_cfg.ymir = env.get_current_env()
return merged_cfg
def start() -> int:
cfg = get_merged_config()
logging.info(f'merged config: {cfg}')
uid = os.getuid()
gid = os.getgid()
logging.info(f'user info: {uid}:{gid}')
if cfg.ymir.run_training:
_run_training(cfg)
else:
if cfg.ymir.run_mining:
_run_mining(cfg)
if cfg.ymir.run_infer:
_run_infer(cfg)
return 0
def _run_training(cfg: edict) -> None:
"""
sample function of training, which shows:
1. how to get config
2. how to read training and validation datasets
3. how to write logs
4. how to write training result
"""
# get config
class_names: List[str] = cfg.param['class_names']
expected_mAP: float = cfg.param.get('map')
model: str = cfg.param.get('model')
# read training dataset items
# note that `dr.item_paths` is a generator
for asset_path, ann_path in dr.item_paths(env.DatasetType.TRAINING):
logging.info(f"asset: {asset_path}, annotation: {ann_path}")
with open(ann_path, 'r') as fp:
lines = fp.readlines()
for line in lines:
class_id, x1, y1, x2, y2 = [int(s)
for s in line.strip().split(',')[0:5]]
name = class_names[class_id]
logging.info(f"{name} xmin={x1} ymin={y1} xmax={x2} ymax={y2}")
break
# write task process percent to monitor.txt
monitor.write_monitor_logger(percent=0.0)
# fake training function
_dummy_work(cfg)
# suppose we have a long time training, and have saved the final model
# use `cfg.ymir.output.models_dir` to get model output dir
models_dir = cfg.ymir.output.models_dir
os.makedirs(models_dir, exist_ok=True)
model_weight = os.path.join(models_dir, f'{model}.pt')
os.system(f'touch {model_weight}')
# write other information
with open(os.path.join(models_dir, 'model.yaml'), 'w') as f:
f.write(f'model: {model}')
shutil.copy('models/vgg.py',
os.path.join(models_dir, 'vgg.py'))
# use `rw.write_training_result` to save training result
# the files in model_names will be saved and can be download from ymir-web
rw.write_training_result(model_names=[f'{model}.pt',
'model.yaml',
'vgg.py'],
mAP=expected_mAP,
classAPs={class_name: expected_mAP
for class_name in class_names})
# if task done, write 100% percent log
logging.info('task done')
monitor.write_monitor_logger(percent=1.0)
def _run_mining(cfg: edict) -> None:
# the weight_files will be the model_names in _run_training()
weight_files = cfg.param['model_params_path']
models_dir = cfg.ymir.input.models_dir
weight_files = [os.path.join(models_dir, f) for f in weight_files]
logging.info(f'use weight files {weight_files}')
# use `dr.item_paths` to read candidate dataset items
# note that annotations path will be empty str if no annotations
asset_paths = []
for asset_path, _ in dr.item_paths(env.DatasetType.CANDIDATE):
asset_paths.append(asset_path)
if len(asset_paths) == 0:
raise ValueError('empty asset paths')
# use `monitor.write_monitor_logger` to write task process to monitor.txt
logging.info(f"assets count: {len(asset_paths)}")
monitor.write_monitor_logger(percent=0.0)
# fake mining function
_dummy_work(cfg)
# write mining result
# here we give a fake score to each assets
total_length = len(asset_paths)
mining_result = [(asset_path, index / total_length)
for index, asset_path in enumerate(asset_paths)]
rw.write_mining_result(mining_result=mining_result)
# if task done, write 100% percent log
logging.info('mining done')
monitor.write_monitor_logger(percent=1.0)
def _run_infer(cfg: edict) -> None:
# the weight_files will be the model_names in _run_training()
weight_files = cfg.param['model_params_path']
class_names: List[str] = cfg.param['class_names']
models_dir = cfg.ymir.input.models_dir
weight_files = [os.path.join(models_dir, f) for f in weight_files]
logging.info(f'use weight files {weight_files}')
# use `dr.item_paths` to read candidate dataset items
# note that annotations path will be empty str if no annotations
asset_paths: List[str] = []
for asset_path, _ in dr.item_paths(env.DatasetType.CANDIDATE):
asset_paths.append(asset_path)
logging.info(f"assets count: {len(asset_paths)}")
if len(asset_paths) == 0 or len(class_names) == 0:
raise ValueError('empty asset paths or class names')
# write log to console and write task process percent to monitor.txt
monitor.write_monitor_logger(percent=0.0)
# fake infer function
_dummy_work(cfg)
# write infer result
fake_annotation = rw.Annotation(
class_name=class_names[0],
score=0.9,
box=rw.Box(x=50, y=50, w=150, h=150))
infer_result = {asset_path: [fake_annotation]
for asset_path in asset_paths}
rw.write_infer_result(infer_result=infer_result)
# if task done, write 100% percent log
logging.info('infer done')
monitor.write_monitor_logger(percent=1.0)
def _dummy_work(cfg: edict) -> None:
if cfg.ymir.run_training:
# use cfg.ymir.output.tensorboard_dir to get tensorboard log directory.
tb_log = SummaryWriter(cfg.ymir.output.tensorboard_dir)
for e in tqdm(range(cfg.param.epoch)):
if cfg.ymir.run_training:
tb_log.add_scalar("fake_loss", 10/(1+e), e)
time.sleep(1)
monitor.write_monitor_logger(percent=e/cfg.param.epoch)
if __name__ == '__main__':
logging.basicConfig(stream=sys.stdout,
format='%(levelname)-8s: [%(asctime)s] %(message)s',
datefmt='%Y%m%d-%H:%M:%S',
level=logging.INFO)
sys.exit(start())