hdlm-group
/

hdlm-base-epsilon-0.05

Text Generation

diffusion-language-model

Model card Files Files and versions

hdlm-base-epsilon-0.05 / config.json

nimafathi's picture

Upload config.json with huggingface_hub

a3d5b24 verified 10 months ago

history blame contribute delete

1.86 kB

	{
	"hf_model_id": "hdlm-group/hdlm-base-epsilon-0.0",
	"reset_step_for_finetuning": true,
	"ngpus": 4,
	"type": "aligned",
	"gradient_accumulation_steps": 8,
	"model_type": "epsilon_hybrid",
	"tokenizer": {
	"tokens": 50257,
	"model": "gpt2"
	},
	"training": {
	"batch_size": 512,
	"accum": 8,
	"n_iters": 500000,
	"snapshot_freq": 5000,
	"log_freq": 500,
	"eval_freq": 5000,
	"snapshot_freq_for_preemption": 1000,
	"snapshot_sampling": true,
	"ema": 0.9999,
	"warmup_iter": 50000,
	"loss_type": "hybrid",
	"epsilon": 0.05,
	"lambda": 5.0,
	"lr": 1e-05
	},
	"data": {
	"train": "openwebtext-train",
	"valid": "wikitext103",
	"cache_dir": "/home/toolkit/research-diffcodegen/data",
	"debug": false
	},
	"annealing": {
	"type": "none",
	"efficient": false,
	"width": 1024,
	"tau": 1024,
	"eval_tau": 1024,
	"sampling_method": "sdlm",
	"sampling_eps": 0.0001,
	"attention": {
	"context_type": "block_causal",
	"block_type": "full"
	},
	"match_inference": true
	},
	"eval": {
	"batch_size": 32,
	"perplexity": true,
	"perplexity_batch_size": 16
	},
	"optim": {
	"weight_decay": 0.1,
	"optimizer": "AdamW",
	"lr": 5e-05,
	"beta1": 0.9,
	"beta2": 0.95,
	"eps": 1e-08,
	"warmup": 10000,
	"grad_clip": 1.0,
	"scheduler": "cosine"
	},
	"experiment": {
	"name": "ft_epsilon_0.05_lambda_5.0",
	"wandb_project": "Hybrid-SDLM-ALIGNED"
	},
	"model": {
	"name": "epsilon_hdlm",
	"type": "ddit",
	"hidden_size": 768,
	"cond_dim": 128,
	"length": 1024,
	"n_blocks": 12,
	"n_heads": 12,
	"dropout": 0.1,
	"scale_by_sigma": false,
	"transformer_sigma_conditioning": false,
	"hybrid_sigma_embedding": false,
	"post_process_logits": false,
	"use_timestep_embedding": false
	}
	}