Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ Please refer to [env/README.md](env/README.md) for detailed environment setup in
Run demo with different modes:
```bash
# Reconstruct the input image
python run_demo.py --render_mode reconstruct
python run_demo.py --render_mode reconstruct --low_ram

# Generate novel poses (animation)
python run_demo.py --render_mode novel_pose
python run_demo.py --render_mode novel_pose --low_ram

# Generate 360-degree view
python run_demo.py --render_mode novel_pose_A
python run_demo.py --render_mode novel_pose_A --low_ram
```

### Training
Expand Down
286 changes: 143 additions & 143 deletions configs/idol_v0.yaml
Original file line number Diff line number Diff line change
@@ -1,144 +1,144 @@

debug: True
# code_size: [32, 256, 256]
code_size: [32, 1024, 1024]
model:
# base_learning_rate: 2.0e-04 # yy Need to check
target: lib.SapiensGS_SA_v1
params:
# optimizer add
# use_bf16: true
max_steps: 100_000
warmup_steps: 10_000 #12_000
use_checkpoint: true
lambda_depth_tv: 0.05
lambda_lpips: 10 #2.0
lambda_mse: 20 #1.0
lambda_offset: 1 #offset_weight: 50 mse 20, lpips 0.1
neck_learning_rate: 5e-4
decoder_learning_rate: 5e-4


output_hidden_states: true # if True, will output the hidden states from sapiens shallow layer, for the neck decoder

loss_coef: 0.5
init_iter: 500
scale_weight: 0.01
smplx_path: 'work_dirs/demo_data/Ways_to_Catch_360_clip1.json'

code_reshape: [32, 96, 96]
patch_size: 1
code_activation:
type: tanh
mean: 0.0
std: 0.5
clip_range: 2
grid_size: 64
encoder:
target: lib.models.sapiens.SapiensWrapper_ts
params:
# model_path: work_dirs/ckpt/sapiens_1b_epoch_173_torchscript.pt2
model_path: /apdcephfs_cq8/share_1367250/harriswen/projects/sapiens_convert/checkpoints//sapiens_1b_epoch_173_torchscript.pt2
layer_num: 40
img_size: [1024, 736]
freeze: True
neck:
target: lib.models.transformer_sa.neck_SA_v3_skip # TODO!! add a self attention version
params:
patch_size: 4 #4,
in_chans: 32 #32, # the uv code dims
num_patches: 9216 #4096 #num_patches #,#4096, # 16*16
embed_dim: 1536 # 1920 # 1920 for sapiens encoder2 #1024 # the feature extrators outputs
decoder_embed_dim: 1536 # 1024
decoder_depth: 16 # 8
decoder_num_heads: 16 #16,
total_num_hidden_states: 40
mlp_ratio: 4.
decoder:
target: lib.models.decoders.UVNDecoder_gender
params:
interp_mode: bilinear
base_layers: [16, 64]
density_layers: [64, 1]
color_layers: [16, 128, 9]
offset_layers: [64, 3]
use_dir_enc: false
dir_layers: [16, 64]
activation: silu
bg_color: 1
sigma_activation: sigmoid
sigmoid_saturation: 0.001
gender: neutral
is_sub2: true ## update, make it into 10w gs points
multires: 0
image_size: [640, 896]
superres: false
focal: 1120
up_cnn_in_channels: 1536 # be the same as decoder_embed_dim
reshape_type: VitHead
vithead_param:
in_channels: 1536 # be the same as decoder_embed_dim
out_channels: 32
deconv_out_channels: [512, 512, 512, 256]
deconv_kernel_sizes: [4, 4, 4, 4]
conv_out_channels: [128, 128]
conv_kernel_sizes: [3, 3]
fix_sigma: true

dataset:
target: lib.datasets.dataloader.DataModuleFromConfig
params:
batch_size: 1 #16 # 6 for lpips
num_workers: 2 #2
# working when in debug mode
debug_cache_path: ./processed_data/flux_batch1_5000_test_50_local.npy

train:
target: lib.datasets.AvatarDataset
params:
data_prefix: None

cache_path: [
./processed_data/deepfashion_train_140_local.npy,
./processed_data/flux_batch1_5000_train_140_local.npy
]

specific_observation_num: 5
better_range: true
first_is_front: true
if_include_video_ref_img: true
prob_include_video_ref_img: 0.5
img_res: [640, 896]
validation:
target: lib.datasets.AvatarDataset
params:
data_prefix: None
load_imgs: true
specific_observation_num: 5
better_range: true
first_is_front: true
img_res: [640, 896]
cache_path: [
./processed_data/deepfashion_val_10_local.npy,
./processed_data/flux_batch1_5000_val_10_local.npy
]



lightning:
modelcheckpoint:
params:
every_n_train_steps: 4000 #2000
save_top_k: -1
save_last: true
monitor: 'train/loss_mse' # ADD this logging in the wrapper_sa
mode: "min"
filename: 'sample-synData-epoch{epoch:02d}-val_loss{val/loss:.2f}'
callbacks: {}
trainer:
num_sanity_val_steps: 0
accumulate_grad_batches: 1
gradient_clip_val: 10.0
max_steps: 80000
check_val_every_n_epoch: 1 ## check validation set every 1 training batches in the current epoch
debug: True
# code_size: [32, 256, 256]
code_size: [32, 1024, 1024]
model:
# base_learning_rate: 2.0e-04 # yy Need to check
target: lib.SapiensGS_SA_v1
params:
# optimizer add
# use_bf16: true
max_steps: 100_000
warmup_steps: 10_000 #12_000
use_checkpoint: true
lambda_depth_tv: 0.05
lambda_lpips: 10 #2.0
lambda_mse: 20 #1.0
lambda_offset: 1 #offset_weight: 50 mse 20, lpips 0.1
neck_learning_rate: 5e-4
decoder_learning_rate: 5e-4
output_hidden_states: true # if True, will output the hidden states from sapiens shallow layer, for the neck decoder
loss_coef: 0.5
init_iter: 500
scale_weight: 0.01
smplx_path: 'work_dirs/demo_data/Ways_to_Catch_360_clip1.json'
code_reshape: [32, 96, 96]
patch_size: 1
code_activation:
type: tanh
mean: 0.0
std: 0.5
clip_range: 2
grid_size: 64
encoder:
target: lib.models.sapiens.SapiensWrapper_ts
params:
model_path: work_dirs/ckpt/sapiens_1b_epoch_173_torchscript.pt2
# model_path: /apdcephfs_cq8/share_1367250/harriswen/projects/sapiens_convert/checkpoints//sapiens_1b_epoch_173_torchscript.pt2
layer_num: 40
img_size: [1024, 736]
freeze: True
neck:
target: lib.models.transformer_sa.neck_SA_v3_skip # TODO!! add a self attention version
params:
patch_size: 4 #4,
in_chans: 32 #32, # the uv code dims
num_patches: 9216 #4096 #num_patches #,#4096, # 16*16
embed_dim: 1536 # 1920 # 1920 for sapiens encoder2 #1024 # the feature extrators outputs
decoder_embed_dim: 1536 # 1024
decoder_depth: 16 # 8
decoder_num_heads: 16 #16,
total_num_hidden_states: 40
mlp_ratio: 4.
decoder:
target: lib.models.decoders.UVNDecoder_gender
params:
interp_mode: bilinear
base_layers: [16, 64]
density_layers: [64, 1]
color_layers: [16, 128, 9]
offset_layers: [64, 3]
use_dir_enc: false
dir_layers: [16, 64]
activation: silu
bg_color: 1
sigma_activation: sigmoid
sigmoid_saturation: 0.001
gender: neutral
is_sub2: true ## update, make it into 10w gs points
multires: 0
image_size: [640, 896]
superres: false
focal: 1120
up_cnn_in_channels: 1536 # be the same as decoder_embed_dim
reshape_type: VitHead
vithead_param:
in_channels: 1536 # be the same as decoder_embed_dim
out_channels: 32
deconv_out_channels: [512, 512, 512, 256]
deconv_kernel_sizes: [4, 4, 4, 4]
conv_out_channels: [128, 128]
conv_kernel_sizes: [3, 3]
fix_sigma: true
dataset:
target: lib.datasets.dataloader.DataModuleFromConfig
params:
batch_size: 1 #16 # 6 for lpips
num_workers: 2 #2
# working when in debug mode
debug_cache_path: ./processed_data/flux_batch1_5000_test_50_local.npy
train:
target: lib.datasets.AvatarDataset
params:
data_prefix: None
cache_path: [
./processed_data/deepfashion_train_140_local.npy,
./processed_data/flux_batch1_5000_train_140_local.npy
]
specific_observation_num: 5
better_range: true
first_is_front: true
if_include_video_ref_img: true
prob_include_video_ref_img: 0.5
img_res: [640, 896]
validation:
target: lib.datasets.AvatarDataset
params:
data_prefix: None
load_imgs: true
specific_observation_num: 5
better_range: true
first_is_front: true
img_res: [640, 896]
cache_path: [
./processed_data/deepfashion_val_10_local.npy,
./processed_data/flux_batch1_5000_val_10_local.npy
]
lightning:
modelcheckpoint:
params:
every_n_train_steps: 4000 #2000
save_top_k: -1
save_last: true
monitor: 'train/loss_mse' # ADD this logging in the wrapper_sa
mode: "min"
filename: 'sample-synData-epoch{epoch:02d}-val_loss{val/loss:.2f}'
callbacks: {}
trainer:
num_sanity_val_steps: 0
accumulate_grad_batches: 1
gradient_clip_val: 10.0
max_steps: 80000
check_val_every_n_epoch: 1 ## check validation set every 1 training batches in the current epoch
benchmark: true
Loading