Source code for neuralop.data.datasets.burgers

from pathlib import Path
from typing import Optional, List, Union
from torch.utils.data import DataLoader

from .pt_dataset import PTDataset
from ..transforms.data_processors import DefaultDataProcessor


class Burgers1dTimeDataProcessor(DefaultDataProcessor):
    """Burgers1dTimeDataProcessor wraps the DefaultDataProcessor
    but adds one line to ``.preprocess`` to repeat the input ``x`` along
    the temporal dimension.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def preprocess(self, data_dict, batched=True):
        """preprocess does the same thing as ``DefaultDataProcessor.preprocess()``,
        with the addition of unsqueezing ``x`` along the temporal dimension and repeating
        to match ``y``'s shape.

        Parameters
        ----------
        data_dict : dict
            one batch of input
        batched : bool, optional
            Whether inputs are batched, by default True
        """
        _, _, temporal_len, _ = data_dict["y"].shape
        # x starts as shape b, 1, spatial_len
        x = data_dict["x"]
        x = x.unsqueeze(-2).repeat([1, 1, temporal_len, 1])
        data_dict["x"] = x
        return super().preprocess(data_dict, batched)



[docs]
class Burgers1dTimeDataset(PTDataset):
    """
    Burgers1dTimeDataset wraps data from the viscous
    Burger's equation in 1 spatial dimension.
    This dataset is not available for download online, but we
    provide a low-res version on 16 spatial points

    Parameters
    ----------
    root_dir : Union[Path, str]
        root at which to download data files
    n_train : int
        number of train instances
    n_tests : List[int]
        number of test instances per test dataset
    batch_size : int
        batch size of training set
    test_batch_sizes : List[int]
        batch size of test sets
    train_resolution : int
        resolution of data for training set
    test_resolutions : List[int], optional
        resolution of data for testing sets, by default [16]
    temporal_subsample : int, optional
        rate at which to subsample the temporal dimension, by default None
    spatial_subsample : int, optional
        rate at which to subsample along the spatial dimension, by default None

    Attributes
    ----------
    train_db: torch.utils.data.Dataset of training examples
    test_db:  ""                       of test examples
    data_processor: neuralop.data.transforms.DataProcessor to process data examples
        optional, default is None
    """

    def __init__(
        self,
        root_dir: Union[Path, str],
        n_train: int,
        n_tests: list[int],
        train_resolution: int = 16,
        test_resolutions: List[int] = [16],
        batch_size: int = 32,
        test_batch_sizes: List[int] = 32,
        temporal_subsample: Optional[int] = None,
        spatial_subsample: Optional[int] = None,
        pad: int = 0,
    ):
        # convert root dir to path
        if isinstance(root_dir, str):
            root_dir = Path(root_dir)
        if not root_dir.exists():
            root_dir.mkdir(parents=True)

        available_resolutions = [16, 128]
        assert (
            train_resolution in available_resolutions
        ), f"Resolutions available: {available_resolutions}, got {train_resolution}"
        for res in test_resolutions:
            assert (
                res in available_resolutions
            ), f"Resolutions available: {available_resolutions}, got {res}"

        super().__init__(
            root_dir=root_dir,
            n_train=n_train,
            n_tests=n_tests,
            batch_size=batch_size,
            test_batch_sizes=test_batch_sizes,
            train_resolution=train_resolution,
            test_resolutions=test_resolutions,
            input_subsampling_rate=spatial_subsample,
            output_subsampling_rate=[temporal_subsample, spatial_subsample],
            encode_input=True,
            encode_output=True,
            encoding="channel-wise",
            channel_dim=1,
            dataset_name="burgers",
        )
        self._data_processor = Burgers1dTimeDataProcessor(
            self._data_processor.in_normalizer, self.data_processor.out_normalizer
        )




[docs]
def load_mini_burgers_1dtime(
    data_path: Union[Path, str],
    n_train: int,
    n_test: int,
    batch_size: int,
    test_batch_size: int,
    temporal_subsample: int = 1,
    spatial_subsample: int = 1,
):
    """
    Legacy function to load mini Burger's equation dataset

    Parameters
    ----------
    root_dir : Union[Path, str]
        root at which to download data files
    n_train : int
        number of train instances
    n_test : int
        number of test instances per test dataset
    batch_size : int
        batch size of training set
    test_batch_size : int
        batch size of test set
    temporal_subsample : int, optional
        rate at which to subsample the temporal dimension, by default None
    spatial_subsample : int, optional
        rate at which to subsample along the spatial dimension, by default None
    """
    burgers_dataset = Burgers1dTimeDataset(
        root_dir=data_path,
        n_train=n_train,
        n_tests=[n_test],
        batch_size=batch_size,
        test_batch_sizes=[test_batch_size],
        train_resolution=16,
        test_resolutions=[16],
        temporal_subsample=temporal_subsample,
        spatial_subsample=spatial_subsample,
    )
    
    train_loader = DataLoader(
        burgers_dataset.train_db, batch_size=batch_size, shuffle=True
    )

    test_loaders = {
        16: DataLoader(
            burgers_dataset.test_dbs[16], batch_size=test_batch_size, shuffle=False
        )
    }

    return train_loader, test_loaders, burgers_dataset.data_processor