Spaces:
Runtime error
Runtime error
| # data settings | |
| data: | |
| data_dir: [] | |
| caption_proportion: | |
| prompt: 1 | |
| external_caption_suffixes: [] | |
| external_clipscore_suffixes: [] | |
| clip_thr_temperature: 1.0 | |
| clip_thr: 0.0 | |
| sort_dataset: false | |
| load_text_feat: false | |
| load_vae_feat: false | |
| transform: default_train | |
| type: SanaWebDatasetMS | |
| image_size: 512 | |
| hq_only: false | |
| valid_num: 0 | |
| # model settings | |
| model: | |
| model: SanaMS_600M_P1_D28 | |
| image_size: 512 | |
| mixed_precision: fp16 # ['fp16', 'fp32', 'bf16'] | |
| fp32_attention: true | |
| load_from: | |
| resume_from: | |
| checkpoint: | |
| load_ema: false | |
| resume_lr_scheduler: true | |
| resume_optimizer: true | |
| aspect_ratio_type: ASPECT_RATIO_1024 | |
| multi_scale: true | |
| pe_interpolation: 1.0 | |
| micro_condition: false | |
| attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear' | |
| cross_norm: false | |
| autocast_linear_attn: false | |
| ffn_type: glumbconv | |
| mlp_acts: | |
| - silu | |
| - silu | |
| - | |
| mlp_ratio: 2.5 | |
| use_pe: false | |
| qk_norm: false | |
| class_dropout_prob: 0.0 | |
| linear_head_dim: 32 | |
| # CFG & PAG settings | |
| cfg_scale: 4 | |
| guidance_type: classifier-free | |
| pag_applied_layers: [14] | |
| # text encoder settings | |
| text_encoder: | |
| text_encoder_name: gemma-2-2b-it | |
| caption_channels: 2304 | |
| y_norm: false | |
| y_norm_scale_factor: 1.0 | |
| model_max_length: 300 | |
| chi_prompt: [] | |
| # VAE settings | |
| vae: | |
| vae_type: dc-ae | |
| vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0 | |
| scale_factor: 0.41407 | |
| vae_latent_dim: 32 | |
| vae_downsample_rate: 32 | |
| sample_posterior: true | |
| # Scheduler settings | |
| scheduler: | |
| train_sampling_steps: 1000 | |
| predict_v: True | |
| noise_schedule: linear_flow | |
| pred_sigma: false | |
| flow_shift: 1.0 | |
| weighting_scheme: logit_normal | |
| logit_mean: 0.0 | |
| logit_std: 1.0 | |
| vis_sampler: flow_dpm-solver | |
| # training settings | |
| train: | |
| num_workers: 4 | |
| seed: 43 | |
| train_batch_size: 32 | |
| num_epochs: 100 | |
| gradient_accumulation_steps: 1 | |
| grad_checkpointing: false | |
| gradient_clip: 1.0 | |
| gc_step: 1 | |
| # optimizer settings | |
| optimizer: | |
| eps: 1.0e-10 | |
| lr: 0.0001 | |
| type: AdamW | |
| weight_decay: 0.03 | |
| lr_schedule: constant | |
| lr_schedule_args: | |
| num_warmup_steps: 500 | |
| auto_lr: | |
| rule: sqrt | |
| ema_rate: 0.9999 | |
| eval_batch_size: 16 | |
| use_fsdp: false | |
| use_flash_attn: false | |
| eval_sampling_steps: 250 | |
| lora_rank: 4 | |
| log_interval: 50 | |
| mask_type: 'null' | |
| mask_loss_coef: 0.0 | |
| load_mask_index: false | |
| snr_loss: false | |
| real_prompt_ratio: 1.0 | |
| debug_nan: false | |
| # checkpoint settings | |
| save_image_epochs: 1 | |
| save_model_epochs: 1 | |
| save_model_steps: 1000000 | |
| # visualization settings | |
| visualize: false | |
| null_embed_root: output/pretrained_models/ | |
| valid_prompt_embed_root: output/tmp_embed/ | |
| validation_prompts: | |
| - dog | |
| - portrait photo of a girl, photograph, highly detailed face, depth of field | |
| - Self-portrait oil painting, a beautiful cyborg with golden hair, 8k | |
| - Astronaut in a jungle, cold color palette, muted colors, detailed, 8k | |
| - A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece | |
| local_save_vis: false | |
| deterministic_validation: true | |
| online_metric: false | |
| eval_metric_step: 5000 | |
| online_metric_dir: metric_helper | |
| # work dir settings | |
| work_dir: /cache/exps/ | |
| skip_step: 0 | |
| # LCM settings | |
| loss_type: huber | |
| huber_c: 0.001 | |
| num_ddim_timesteps: 50 | |
| w_max: 15.0 | |
| w_min: 3.0 | |
| ema_decay: 0.95 | |