Spaces:
Running
on
Zero
Running
on
Zero
| { | |
| "image_size_encoder": 256, | |
| "triplane_scaling_divider": 0.96806, | |
| "diffusion_input_size": 32, | |
| "trainer_name": "flow_matching", | |
| "use_amp": false, | |
| "clip_denoised": false, | |
| "num_samples": 1, | |
| "num_instances": 10, | |
| "use_ddim": false, | |
| "ddpm_model_path": "", | |
| "cldm_model_path": "", | |
| "rec_model_path": "", | |
| "logdir": "./", | |
| "data_dir": "NONE", | |
| "eval_data_dir": "/cpfs01/user/lanyushi.p/Repo/eccv24/open-source/InstantMesh/test_dir", | |
| "eval_batch_size": 1, | |
| "num_workers": 0, | |
| "overfitting": false, | |
| "image_size": 256, | |
| "iterations": 5000001, | |
| "schedule_sampler": "uniform", | |
| "anneal_lr": false, | |
| "lr": 2e-05, | |
| "weight_decay": 0.05, | |
| "lr_anneal_steps": 0, | |
| "batch_size": 1, | |
| "microbatch": 1, | |
| "ema_rate": "0.9999", | |
| "log_interval": 50, | |
| "eval_interval": 5000, | |
| "save_interval": 10000, | |
| "resume_checkpoint": "checkpoints/objaverse/objaverse-dit/i23d/model_joint_denoise_rec_model2990000.safetensors", | |
| "resume_cldm_checkpoint": "", | |
| "resume_checkpoint_EG3D": "", | |
| "use_fp16": false, | |
| "fp16_scale_growth": 0.001, | |
| "load_submodule_name": "", | |
| "ignore_resume_opt": false, | |
| "freeze_ae": false, | |
| "denoised_ae": true, | |
| "prompt": "a red chair", | |
| "interval": 5, | |
| "save_img": false, | |
| "use_train_trajectory": false, | |
| "unconditional_guidance_scale": 6.5, | |
| "use_eos_feature": false, | |
| "export_mesh": false, | |
| "cond_key": "img", | |
| "allow_tf32": true, | |
| "num_channels": 320, | |
| "num_res_blocks": 2, | |
| "num_heads": 8, | |
| "num_heads_upsample": -1, | |
| "num_head_channels": -1, | |
| "attention_resolutions": "4,2,1", | |
| "channel_mult": "", | |
| "dropout": 0.0, | |
| "class_cond": false, | |
| "use_checkpoint": false, | |
| "use_scale_shift_norm": true, | |
| "resblock_updown": false, | |
| "use_new_attention_order": false, | |
| "denoise_in_channels": 4, | |
| "denoise_out_channels": 4, | |
| "create_controlnet": false, | |
| "create_dit": true, | |
| "i23d": true, | |
| "create_unet_with_hint": false, | |
| "dit_model_arch": "DiT-PixArt-L/2", | |
| "use_spatial_transformer": true, | |
| "transformer_depth": 1, | |
| "context_dim": 1024, | |
| "pooling_ctx_dim": 768, | |
| "roll_out": true, | |
| "n_embed": null, | |
| "legacy": true, | |
| "mixing_logit_init": -6, | |
| "hint_channels": 3, | |
| "learn_sigma": false, | |
| "diffusion_steps": 1000, | |
| "noise_schedule": "linear", | |
| "standarization_xt": false, | |
| "timestep_respacing": "", | |
| "use_kl": false, | |
| "predict_xstart": false, | |
| "predict_v": true, | |
| "rescale_timesteps": false, | |
| "rescale_learned_sigmas": false, | |
| "mixed_prediction": false, | |
| "dino_version": "mv-sd-dit-dynaInp-trilatent", | |
| "encoder_in_channels": 10, | |
| "img_size": [ | |
| 256 | |
| ], | |
| "patch_size": 14, | |
| "in_chans": 384, | |
| "num_classes": 0, | |
| "embed_dim": 384, | |
| "depth": 6, | |
| "mlp_ratio": 4.0, | |
| "qkv_bias": false, | |
| "qk_scale": null, | |
| "drop_rate": 0.1, | |
| "attn_drop_rate": 0.0, | |
| "drop_path_rate": 0.0, | |
| "norm_layer": "nn.LayerNorm", | |
| "cls_token": false, | |
| "encoder_cls_token": false, | |
| "decoder_cls_token": false, | |
| "sr_kwargs": {}, | |
| "sr_ratio": 2, | |
| "use_clip": false, | |
| "arch_encoder": "vits", | |
| "arch_decoder": "vitb", | |
| "load_pretrain_encoder": false, | |
| "encoder_lr": 1e-05, | |
| "encoder_weight_decay": 0.001, | |
| "no_dim_up_mlp": true, | |
| "dim_up_mlp_as_func": false, | |
| "decoder_load_pretrained": false, | |
| "uvit_skip_encoder": true, | |
| "vae_p": 2, | |
| "ldm_z_channels": 4, | |
| "ldm_embed_dim": 4, | |
| "use_conf_map": false, | |
| "sd_E_ch": 64, | |
| "z_channels": 12, | |
| "sd_E_num_res_blocks": 1, | |
| "num_frames": 6, | |
| "arch_dit_decoder": "DiT2-L/2", | |
| "return_all_dit_layers": false, | |
| "lrm_decoder": false, | |
| "plane_n": 3, | |
| "gs_rendering": false, | |
| "decomposed": true, | |
| "triplane_fg_bg": false, | |
| "cfg": "objverse_tuneray_aug_resolution_64_64_auto", | |
| "density_reg": 0.0, | |
| "density_reg_p_dist": 0.004, | |
| "reg_type": "l1", | |
| "triplane_decoder_lr": 5e-05, | |
| "super_resolution_lr": 5e-05, | |
| "c_scale": 1, | |
| "nsr_lr": 0.02, | |
| "triplane_size": 224, | |
| "decoder_in_chans": 32, | |
| "triplane_in_chans": 32, | |
| "decoder_output_dim": 3, | |
| "out_chans": 96, | |
| "c_dim": 25, | |
| "ray_start": 0.6, | |
| "ray_end": 1.8, | |
| "rendering_kwargs": { | |
| "image_resolution": 256, | |
| "disparity_space_sampling": false, | |
| "clamp_mode": "softplus", | |
| "c_gen_conditioning_zero": true, | |
| "c_scale": 1, | |
| "superresolution_noise_mode": "none", | |
| "density_reg": 0.0, | |
| "density_reg_p_dist": 0.004, | |
| "reg_type": "l1", | |
| "decoder_lr_mul": 1, | |
| "decoder_activation": "sigmoid", | |
| "sr_antialias": true, | |
| "return_triplane_features": false, | |
| "return_sampling_details_flag": true, | |
| "superresolution_module": "utils.torch_utils.components.NearestConvSR", | |
| "depth_resolution": 64, | |
| "depth_resolution_importance": 64, | |
| "ray_start": "auto", | |
| "ray_end": "auto", | |
| "box_warp": 0.9, | |
| "white_back": true, | |
| "radius_range": [ | |
| 1.5, | |
| 2 | |
| ], | |
| "sampler_bbox_min": -0.45, | |
| "sampler_bbox_max": 0.45, | |
| "filter_out_of_bbox": true, | |
| "PatchRaySampler": true, | |
| "patch_rendering_resolution": 45, | |
| "z_near": 1.05, | |
| "z_far": 2.45 | |
| }, | |
| "sr_training": false, | |
| "bcg_synthesis": false, | |
| "bcg_synthesis_kwargs": {}, | |
| "patch_rendering_resolution": 45, | |
| "vit_decoder_lr": 1e-05, | |
| "vit_decoder_wd": 0.001, | |
| "ae_classname": "vit.vit_triplane.RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder", | |
| "color_criterion": "mse", | |
| "l2_lambda": 1.0, | |
| "lpips_lambda": 0.8, | |
| "lpips_delay_iter": 0, | |
| "sr_delay_iter": 0, | |
| "kl_anneal": false, | |
| "latent_lambda": 0.0, | |
| "latent_criterion": "mse", | |
| "kl_lambda": 0.0, | |
| "ssim_lambda": 0.0, | |
| "l1_lambda": 0.0, | |
| "id_lambda": 0.0, | |
| "depth_lambda": 0.0, | |
| "alpha_lambda": 1.0, | |
| "fg_mse": false, | |
| "bg_lamdba": 0.01, | |
| "density_reg_every": 4, | |
| "shape_uniform_lambda": 0.005, | |
| "shape_importance_lambda": 0.01, | |
| "shape_depth_lambda": 0.0, | |
| "rec_cvD_lambda": 0.01, | |
| "nvs_cvD_lambda": 0.025, | |
| "patchgan_disc_factor": 0.01, | |
| "patchgan_disc_g_weight": 0.2, | |
| "r1_gamma": 1.0, | |
| "sds_lamdba": 1.0, | |
| "nvs_D_lr_mul": 1, | |
| "cano_D_lr_mul": 1, | |
| "ce_balanced_kl": 1.0, | |
| "p_eps_lambda": 1, | |
| "symmetry_loss": false, | |
| "depth_smoothness_lambda": 0.0, | |
| "ce_lambda": 0.5, | |
| "negative_entropy_lambda": 0.5, | |
| "grad_clip": true, | |
| "online_mask": false, | |
| "sde_time_eps": 0.01, | |
| "sde_beta_start": 0.1, | |
| "sde_beta_end": 20.0, | |
| "sde_sde_type": "vpsde", | |
| "sde_sigma2_0": 0.0, | |
| "iw_sample_p": "drop_sigma2t_iw", | |
| "iw_sample_q": "ll_iw", | |
| "iw_subvp_like_vp_sde": false, | |
| "train_vae": false, | |
| "pred_type": "v", | |
| "p_rendering_loss": false, | |
| "unfix_logit": false, | |
| "loss_type": "eps", | |
| "loss_weight": "simple", | |
| "diffusion_ce_anneal": true, | |
| "enable_mixing_normal": false, | |
| "only_mid_control": false, | |
| "control_key": "img", | |
| "normalize_clip_encoding": true, | |
| "scale_clip_encoding": 1.0, | |
| "cfg_dropout_prob": 0.1, | |
| "use_lmdb": false, | |
| "use_wds": false, | |
| "use_lmdb_compressed": false, | |
| "compile": false, | |
| "objv_dataset": true, | |
| "decode_encode_img_only": false, | |
| "load_wds_diff": true, | |
| "load_wds_latent": false, | |
| "eval_load_wds_instance": true, | |
| "shards_lst": "", | |
| "eval_shards_lst": "", | |
| "mv_input": true, | |
| "duplicate_sample": true, | |
| "orthog_duplicate": false, | |
| "split_chunk_input": false, | |
| "load_real": true, | |
| "four_view_for_latent": false, | |
| "single_view_for_i23d": false, | |
| "shuffle_across_cls": true, | |
| "load_extra_36_view": false, | |
| "mv_latent_dir": "", | |
| "append_depth": false, | |
| "plucker_embedding": true, | |
| "gs_cam_format": false, | |
| "split_chunk_size": 8, | |
| "path_type": "Linear", | |
| "prediction": "velocity", | |
| "sample_eps": null, | |
| "train_eps": null, | |
| "snr_type": "lognorm", | |
| "local_rank": 0, | |
| "gpus": 1 | |
| } |