Note
Go to the end to download the full example code
Training a TFNO on Darcy-Flow
In this example, we demonstrate how to use the small Darcy-Flow example we ship with the package to train a Tensorized Fourier-Neural Operator
import torch
import matplotlib.pyplot as plt
import sys
from neuralop.models import TFNO
from neuralop import Trainer
from neuralop.training import CheckpointCallback
from neuralop.datasets import load_darcy_flow_small
from neuralop.utils import count_model_params
from neuralop import LpLoss, H1Loss
device = 'cpu'
Loading the Navier-Stokes dataset in 128x128 resolution
train_loader, test_loaders, data_processor = load_darcy_flow_small(
n_train=1000, batch_size=32,
test_resolutions=[16, 32], n_tests=[100, 50],
test_batch_sizes=[32, 32],
)
We create a tensorized FNO model
model = TFNO(n_modes=(16, 16), hidden_channels=32, projection_channels=64, factorization='tucker', rank=0.42)
model = model.to(device)
n_params = count_model_params(model)
print(f'\nOur model has {n_params} parameters.')
sys.stdout.flush()
Create the optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=8e-3,
weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30)
Creating the losses
l2loss = LpLoss(d=2, p=2)
h1loss = H1Loss(d=2)
train_loss = h1loss
eval_losses={'h1': h1loss, 'l2': l2loss}
print('\n### MODEL ###\n', model)
print('\n### OPTIMIZER ###\n', optimizer)
print('\n### SCHEDULER ###\n', scheduler)
print('\n### LOSSES ###')
print(f'\n * Train: {train_loss}')
print(f'\n * Test: {eval_losses}')
sys.stdout.flush()
Create the trainer
trainer = Trainer(model=model, n_epochs=20,
device=device,
callbacks=[
CheckpointCallback(save_dir='./checkpoints',
save_interval=10,
save_optimizer=True,
save_scheduler=True)
],
data_processor=data_processor,
wandb_log=False,
log_test_interval=3,
use_distributed=False,
verbose=True)
Actually train the model on our small Darcy-Flow dataset
trainer.train(train_loader=train_loader,
test_loaders={},
optimizer=optimizer,
scheduler=scheduler,
regularizer=False,
training_loss=train_loss)
# resume training from saved checkpoint at epoch 10
trainer = Trainer(model=model, n_epochs=20,
device=device,
data_processor=data_processor,
callbacks=[
CheckpointCallback(save_dir='./new_checkpoints',
resume_from_dir='./checkpoints/ep_10')
],
wandb_log=False,
log_test_interval=3,
use_distributed=False,
verbose=True)
trainer.train(train_loader=train_loader,
test_loaders={},
optimizer=optimizer,
scheduler=scheduler,
regularizer=False,
training_loss=train_loss)