加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
train_ddpm.py 3.53 KB
一键复制 编辑 原始数据 按行查看 历史
import torch
torch.autograd.set_detect_anomaly(True)
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets
import os
from tqdm import tqdm
from Config import ConfigMnist, get_diffusion
import wandb
wandb.require("core")
# 输入图片放缩到[-1, 1]
def get_transform():
class RescaleChannels(object):
def __call__(self, sample):
return 2 * sample - 1
return torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
RescaleChannels(),
])
def cycle(dl):
while True:
for data in dl:
yield data
def main():
config = ConfigMnist()
# 初始化wandb
wandb.init(project=config.wandb_project, name=config.wandb_name)
# 定义网络
diffusion = get_diffusion(config).to(config.device)
print("网络载入完毕")
# 定义优化器
optimizer = torch.optim.Adam(diffusion.parameters(), lr=config.learning_rate)
print("优化器准备完毕")
# 定义数据集
batch_size = config.batch_size
train_dataset = datasets.MNIST(
root=config.dataset_root,
train=True,
download=True,
transform=get_transform(),
)
test_dataset = datasets.MNIST(
root=config.dataset_root,
train=False,
download=True,
transform=get_transform(), # 将图片放缩到[-1,1]之间
)
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
drop_last=True,
num_workers=2)
train_loader = cycle(train_loader)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True, num_workers=2)
print("数据集载入完毕")
print("开始训练")
# 开始训练
acc_train_loss = 0 # 刚开始进来,让训练loss为0
diffusion.train()
for iteration in tqdm(range(1, config.iterations + 1)):
# 进行一次梯度回传
x, _ = next(train_loader)
x = x.to(config.device)
loss = diffusion(x)
acc_train_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 如果需要打印了
if iteration % config.log_rate == 0:
acc_train_loss /= config.log_rate
print("train: {}/{} --> {}".format(iteration, config.iterations, acc_train_loss))
wandb.log({"train_loss": acc_train_loss})
acc_train_loss = 0
# 如果需要保存了
if iteration % config.checkpoint_rate == 0:
if not os.path.exists(config.log_dir):
os.mkdir(config.log_dir)
# 首先打印测试集的损失
test_loss = 0
with torch.no_grad():
diffusion.eval()
for x, _ in test_loader:
x = x.to(config.device)
loss = diffusion(x)
test_loss += loss.item()
test_loss /= len(test_loader)
print("-"*20)
print("test: {}/{} --> {}".format(iteration, config.iterations, test_loss))
wandb.log({"test_loss": test_loss})
print("-"*20)
# 保存模型
model_filename = os.path.join(config.log_dir, "{}-model.pth".format(iteration))
optim_filename = os.path.join(config.log_dir, "{}-optim.pth".format(iteration))
torch.save(diffusion.state_dict(), model_filename)
torch.save(optimizer.state_dict(), optim_filename)
diffusion.train()
wandb.finish()
if __name__ == "__main__":
main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化