Training an FNO with incremental meta-learning

A demo of the Incremental FNO meta-learning algorithm on our small Darcy-Flow dataset.

import torch
import matplotlib.pyplot as plt
import sys
from neuralop.models import FNO
from neuralop.data.datasets import load_darcy_flow_small
from neuralop.utils import count_model_params
from neuralop.training import AdamW
from neuralop.training.incremental import IncrementalFNOTrainer
from neuralop.data.transforms.data_processors import IncrementalDataProcessor
from neuralop import LpLoss, H1Loss

Loading the Darcy flow dataset

train_loader, test_loaders, output_encoder = load_darcy_flow_small(
    n_train=100,
    batch_size=16,
    test_resolutions=[16, 32],
    n_tests=[100, 50],
    test_batch_sizes=[32, 32],
)
Loading test db for resolution 16 with 100 samples
Loading test db for resolution 32 with 50 samples

Choose device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Set up the incremental FNO model We start with 2 modes in each dimension We choose to update the modes by the incremental gradient explained algorithm

incremental = True
if incremental:
    starting_modes = (2, 2)
else:
    starting_modes = (16, 16)

set up model

model = FNO(
    max_n_modes=(16, 16),
    n_modes=starting_modes,
    hidden_channels=32,
    in_channels=1,
    out_channels=1,
)
model = model.to(device)
n_params = count_model_params(model)

Set up the optimizer and scheduler

optimizer = AdamW(model.parameters(), lr=8e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30)


# If one wants to use Incremental Resolution, one should use the IncrementalDataProcessor - When passed to the trainer, the trainer will automatically update the resolution
# Incremental_resolution : bool, default is False
#    if True, increase the resolution of the input incrementally
#    uses the incremental_res_gap parameter
#    uses the subsampling_rates parameter - a list of resolutions to use
#    uses the dataset_indices parameter - a list of indices of the dataset to slice to regularize the input resolution
#    uses the dataset_resolution parameter - the resolution of the input
#    uses the epoch_gap parameter - the number of epochs to wait before increasing the resolution
#    uses the verbose parameter - if True, print the resolution and the number of modes
data_transform = IncrementalDataProcessor(
    in_normalizer=None,
    out_normalizer=None,
    device=device,
    subsampling_rates=[2, 1],
    dataset_resolution=16,
    dataset_indices=[2, 3],
    epoch_gap=10,
    verbose=True,
)

data_transform = data_transform.to(device)
Original Incre Res: change index to 0
Original Incre Res: change sub to 2
Original Incre Res: change res to 8

Set up the losses

l2loss = LpLoss(d=2, p=2)
h1loss = H1Loss(d=2)
train_loss = h1loss
eval_losses = {"h1": h1loss, "l2": l2loss}
print("\n### N PARAMS ###\n", n_params)
print("\n### OPTIMIZER ###\n", optimizer)
print("\n### SCHEDULER ###\n", scheduler)
print("\n### LOSSES ###")
print("\n### INCREMENTAL RESOLUTION + GRADIENT EXPLAINED ###")
print(f"\n * Train: {train_loss}")
print(f"\n * Test: {eval_losses}")
sys.stdout.flush()
### N PARAMS ###
 2110305

### OPTIMIZER ###
 AdamW (
Parameter Group 0
    betas: (0.9, 0.999)
    correct_bias: True
    eps: 1e-06
    initial_lr: 0.008
    lr: 0.008
    weight_decay: 0.0001
)

### SCHEDULER ###
 <torch.optim.lr_scheduler.CosineAnnealingLR object at 0x7fcedb398640>

### LOSSES ###

### INCREMENTAL RESOLUTION + GRADIENT EXPLAINED ###

 * Train: <neuralop.losses.data_losses.H1Loss object at 0x7fceca89b110>

 * Test: {'h1': <neuralop.losses.data_losses.H1Loss object at 0x7fceca89b110>, 'l2': <neuralop.losses.data_losses.LpLoss object at 0x7fcedb398050>}

Set up the IncrementalTrainer other options include setting incremental_loss_gap = True If one wants to use incremental resolution set it to True In this example we only update the modes and not the resolution When using the incremental resolution one should keep in mind that the numnber of modes initially set should be strictly less than the resolution Again these are the various paramaters for the various incremental settings incremental_grad : bool, default is False

if True, use the base incremental algorithm which is based on gradient variance uses the incremental_grad_eps parameter - set the threshold for gradient variance uses the incremental_buffer paramater - sets the number of buffer modes to calculate the gradient variance uses the incremental_max_iter parameter - sets the initial number of iterations uses the incremental_grad_max_iter parameter - sets the maximum number of iterations to accumulate the gradients

incremental_loss_gapbool, default is False

if True, use the incremental algorithm based on loss gap uses the incremental_loss_eps parameter

# Finally pass all of these to the Trainer
trainer = IncrementalFNOTrainer(
    model=model,
    n_epochs=20,
    data_processor=data_transform,
    device=device,
    verbose=True,
    incremental_loss_gap=False,
    incremental_grad=True,
    incremental_grad_eps=0.9999,
    incremental_loss_eps = 0.001,
    incremental_buffer=5,
    incremental_max_iter=1,
    incremental_grad_max_iter=2,
)

Train the model

trainer.train(
    train_loader,
    test_loaders,
    optimizer,
    scheduler,
    regularizer=False,
    training_loss=train_loss,
    eval_losses=eval_losses,
)
Training on 100 samples
Testing on [50, 50] samples         on resolutions [16, 32].
Raw outputs of shape torch.Size([16, 1, 8, 8])
[0] time=0.22, avg_loss=0.9248, train_err=13.2110
Eval: 16_h1=0.8715, 16_l2=0.5382, 32_h1=0.9804, 32_l2=0.5430
[1] time=0.20, avg_loss=0.7686, train_err=10.9799
Eval: 16_h1=0.7955, 16_l2=0.4911, 32_h1=0.9059, 32_l2=0.4990
[2] time=0.21, avg_loss=0.6952, train_err=9.9317
Eval: 16_h1=0.8524, 16_l2=0.4997, 32_h1=1.0478, 32_l2=0.5265
[3] time=0.21, avg_loss=0.6403, train_err=9.1471
Eval: 16_h1=0.7306, 16_l2=0.4116, 32_h1=0.9265, 32_l2=0.4323
[4] time=0.20, avg_loss=0.5816, train_err=8.3079
Eval: 16_h1=0.7433, 16_l2=0.4063, 32_h1=0.9747, 32_l2=0.4335
[5] time=0.20, avg_loss=0.5372, train_err=7.6741
Eval: 16_h1=0.7209, 16_l2=0.4594, 32_h1=0.9587, 32_l2=0.4831
[6] time=0.21, avg_loss=0.5298, train_err=7.5692
Eval: 16_h1=0.8101, 16_l2=0.4118, 32_h1=1.1886, 32_l2=0.4485
[7] time=0.21, avg_loss=0.4908, train_err=7.0114
Eval: 16_h1=0.8033, 16_l2=0.4142, 32_h1=1.1472, 32_l2=0.4418
[8] time=0.21, avg_loss=0.4923, train_err=7.0334
Eval: 16_h1=0.6959, 16_l2=0.4219, 32_h1=0.8557, 32_l2=0.4439
[9] time=0.21, avg_loss=0.5041, train_err=7.2018
Eval: 16_h1=0.6499, 16_l2=0.3609, 32_h1=0.8811, 32_l2=0.3959
Incre Res Update: change index to 1
Incre Res Update: change sub to 1
Incre Res Update: change res to 16
[10] time=0.26, avg_loss=0.5538, train_err=7.9114
Eval: 16_h1=0.5183, 16_l2=0.3240, 32_h1=0.5924, 32_l2=0.3180
[11] time=0.28, avg_loss=0.4614, train_err=6.5915
Eval: 16_h1=0.6746, 16_l2=0.4599, 32_h1=0.7945, 32_l2=0.4457
[12] time=0.26, avg_loss=0.5361, train_err=7.6586
Eval: 16_h1=0.4720, 16_l2=0.2841, 32_h1=0.6427, 32_l2=0.3077
[13] time=0.26, avg_loss=0.4414, train_err=6.3060
Eval: 16_h1=0.4261, 16_l2=0.2796, 32_h1=0.5137, 32_l2=0.2771
[14] time=0.26, avg_loss=0.3862, train_err=5.5171
Eval: 16_h1=0.4715, 16_l2=0.2802, 32_h1=0.5789, 32_l2=0.2892
[15] time=0.26, avg_loss=0.4161, train_err=5.9443
Eval: 16_h1=0.3858, 16_l2=0.2389, 32_h1=0.4873, 32_l2=0.2492
[16] time=0.26, avg_loss=0.3566, train_err=5.0950
Eval: 16_h1=0.3797, 16_l2=0.2440, 32_h1=0.5018, 32_l2=0.2500
[17] time=0.27, avg_loss=0.3412, train_err=4.8742
Eval: 16_h1=0.5346, 16_l2=0.3388, 32_h1=0.6856, 32_l2=0.3489
[18] time=0.27, avg_loss=0.4428, train_err=6.3258
Eval: 16_h1=0.4426, 16_l2=0.2905, 32_h1=0.5216, 32_l2=0.2955
[19] time=0.26, avg_loss=0.3822, train_err=5.4596
Eval: 16_h1=0.4097, 16_l2=0.2562, 32_h1=0.5603, 32_l2=0.2669

{'train_err': 5.459584202085223, 'avg_loss': 0.38217089414596556, 'avg_lasso_loss': None, 'epoch_train_time': 0.264077172000043, '16_h1': tensor(0.4097), '16_l2': tensor(0.2562), '32_h1': tensor(0.5603), '32_l2': tensor(0.2669)}

Plot the prediction, and compare with the ground-truth Note that we trained on a very small resolution for a very small number of epochs In practice, we would train at larger resolution, on many more samples.

However, for practicity, we created a minimal example that i) fits in just a few Mb of memory ii) can be trained quickly on CPU

In practice we would train a Neural Operator on one or multiple GPUs

test_samples = test_loaders[32].dataset

fig = plt.figure(figsize=(7, 7))
for index in range(3):
    data = test_samples[index]
    # Input x
    x = data["x"].to(device)
    # Ground-truth
    y = data["y"].to(device)
    # Model prediction
    out = model(x.unsqueeze(0))
    ax = fig.add_subplot(3, 3, index * 3 + 1)
    x = x.cpu().squeeze().detach().numpy()
    y = y.cpu().squeeze().detach().numpy()
    ax.imshow(x, cmap="gray")
    if index == 0:
        ax.set_title("Input x")
    plt.xticks([], [])
    plt.yticks([], [])

    ax = fig.add_subplot(3, 3, index * 3 + 2)
    ax.imshow(y.squeeze())
    if index == 0:
        ax.set_title("Ground-truth y")
    plt.xticks([], [])
    plt.yticks([], [])

    ax = fig.add_subplot(3, 3, index * 3 + 3)
    ax.imshow(out.cpu().squeeze().detach().numpy())
    if index == 0:
        ax.set_title("Model prediction")
    plt.xticks([], [])
    plt.yticks([], [])

fig.suptitle("Inputs, ground-truth output and prediction.", y=0.98)
plt.tight_layout()
fig.show()
Inputs, ground-truth output and prediction., Input x, Ground-truth y, Model prediction

Total running time of the script: (0 minutes 6.615 seconds)

Gallery generated by Sphinx-Gallery