conformer_args: dropout_p: 0.2 encoder: - mhsa_pro - conv - ffn encoder_dim: 2048 kernel_size: 3 norm: postnorm num_heads: 8 num_layers: 8 timeshift: false data_args: batch_size: 64 continuum_norm: true create_umap: false data_dir: /data/lamost/data dataset: SpectraDataset exp_num: 4 lc_freq: 0.0208 log_dir: /data/lightSpec/logs max_days_lc: 720 max_len_spectra: 4096 model_name: MultiTaskRegressor num_epochs: 1000 test_run: false model_args: activation: silu avg_output: true beta: 1 checkpoint_num: 1 checkpoint_path: /data/lightSpec/logs/spec_decode2_2025-02-15/MultiTaskRegressor_spectra_decode_3.pth dropout_p: 0.2 encoder_dims: - 64 - 128 - 256 - 1024 - 2048 in_channels: 1 kernel_size: 3 load_checkpoint: true num_layers: 5 num_quantiles: 5 output_dim: 3 stride: 1 transformer_layers: 4 model_name: MultiTaskRegressor model_structure: "DistributedDataParallel(\n (module): MultiTaskRegressor(\n (encoder):\ \ MultiEncoder(\n (backbone): CNNEncoder(\n (activation): SiLU()\n \ \ (embedding): Sequential(\n (0): Conv1d(1, 64, kernel_size=(3,),\ \ stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05,\ \ momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n\ \ )\n (layers): ModuleList(\n (0): ConvBlock(\n \ \ (activation): SiLU()\n (layers): Sequential(\n (0):\ \ Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n \ \ (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n )\n (1): ConvBlock(\n\ \ (activation): SiLU()\n (layers): Sequential(\n \ \ (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ \ (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n )\n (2): ConvBlock(\n\ \ (activation): SiLU()\n (layers): Sequential(\n \ \ (0): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ \ (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n )\n (3): ConvBlock(\n\ \ (activation): SiLU()\n (layers): Sequential(\n \ \ (0): Conv1d(1024, 2048, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ \ (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n )\n (4): ConvBlock(\n\ \ (activation): SiLU()\n (layers): Sequential(\n \ \ (0): Conv1d(2048, 2048, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ \ (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n )\n )\n (pool):\ \ MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n \ \ )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n \ \ (blocks): ModuleList(\n (0-7): 8 x ConformerBlock(\n (modlist):\ \ ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n\ \ (query): Linear(in_features=2048, out_features=2048, bias=True)\n\ \ (key): Linear(in_features=2048, out_features=2048, bias=True)\n\ \ (value): Linear(in_features=2048, out_features=2048, bias=True)\n\ \ (rotary_emb): RotaryEmbedding()\n (output):\ \ Linear(in_features=2048, out_features=2048, bias=True)\n )\n \ \ (norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n\ \ )\n (1): PostNorm(\n (module): ConvBlock(\n\ \ (layers): Sequential(\n (0): Conv1d(2048,\ \ 2048, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n \ \ (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n )\n \ \ (norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n \ \ )\n (2): PostNorm(\n (module): FeedForwardModule(\n\ \ (sequential): Sequential(\n (0): LayerNorm((2048,),\ \ eps=1e-05, elementwise_affine=True)\n (1): Linear(\n \ \ (linear): Linear(in_features=2048, out_features=8192, bias=True)\n\ \ )\n (2): SiLU()\n (3):\ \ Dropout(p=0.2, inplace=False)\n (4): Linear(\n \ \ (linear): Linear(in_features=8192, out_features=2048, bias=True)\n \ \ )\n (5): Dropout(p=0.2, inplace=False)\n \ \ )\n )\n (norm): LayerNorm((2048,),\ \ eps=1e-05, elementwise_affine=True)\n )\n )\n \ \ )\n )\n )\n )\n (decoder): CNNDecoder(\n (activation):\ \ SiLU()\n (initial_expand): Linear(in_features=2048, out_features=8192, bias=True)\n\ \ (layers): ModuleList(\n (0): Sequential(\n (0): ConvTranspose1d(2048,\ \ 1024, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1):\ \ BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n (1): Sequential(\n (0): ConvTranspose1d(1024,\ \ 256, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1):\ \ BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n (2): Sequential(\n (0): ConvTranspose1d(256,\ \ 128, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1):\ \ BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n )\n (3): Sequential(\n (0): ConvTranspose1d(128,\ \ 64, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1): BatchNorm1d(64,\ \ eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2):\ \ SiLU()\n )\n (4): Sequential(\n (0): ConvTranspose1d(64,\ \ 64, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1): BatchNorm1d(64,\ \ eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2):\ \ SiLU()\n )\n )\n (final_conv): ConvTranspose1d(64, 1, kernel_size=(3,),\ \ stride=(1,), padding=(1,))\n )\n (activation): SiLU()\n (regressor):\ \ Sequential(\n (0): Linear(in_features=2048, out_features=1024, bias=True)\n\ \ (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ \ (2): SiLU()\n (3): Dropout(p=0.2, inplace=False)\n (4): Linear(in_features=1024,\ \ out_features=15, bias=True)\n )\n )\n)" num_params: 551944464 optim_args: max_lr: 2e-5 quantiles: - 0.1 - 0.25 - 0.5 - 0.75 - 0.9 steps_per_epoch: 3500 warmup_pct: 0.3 weight_decay: 5e-6 transforms: "Compose(\n LAMOSTSpectrumPreprocessor(blue_range=(3841, 5800), red_range=(5800,\ \ 8798), resample_step=0.0001)\n ToTensor\n)"