Source code for neuralop.training.torch_setup

import torch
import neuralop.mpu.comm as comm


[docs] def setup(config): """A convenience function to intialize the device, setup torch settings and check multi-grid and other values. It sets up distributed communitation, if used. Parameters ---------- config : dict this function checks: * config.distributed (use_distributed, seed) * config.data (n_train, batch_size, test_batch_sizes, n_tests, test_resolutions) Returns ------- device, is_logger device : torch.device is_logger : bool """ seed = config.distributed.seed if config.distributed.use_distributed: comm.init( model_parallel_size=config.distributed.model_parallel_size, verbose=config.verbose, ) # Set process 0 to log screen and wandb is_logger = comm.get_local_rank() == 0 # Set device and random seed device = torch.device(f"cuda:{comm.get_local_rank()}") if seed is not None: seed = seed + comm.get_data_parallel_rank() # Ensure batch can be evenly split among the model-parallel group if config.patching.levels > 0: assert(config.data.batch_size*(2**(2*config.patching.levels)) % comm.get_model_parallel_size() == 0), ( f'With MG patching, total batch-size of {config.data.batch_size*(2**(2*config.patching.levels))}' f' ({config.data.batch_size} times {(2**(2*config.patching.levels))}).' f' However, this total batch-size cannot be evenly split among the {comm.get_model_parallel_size()} model-parallel groups.' ) for b_size in config.data.test_batch_sizes: assert (b_size*(2**(2*config.patching.levels)) % comm.get_model_parallel_size() == 0), ( f'With MG patching, for test resolution of {config.data.test_resolutions[j]}' f' the total batch-size is {config.data.batch_size*(2**(2*config.patching.levels))}' f' ({config.data.batch_size} times {(2**(2*config.patching.levels))}).' f' However, this total batch-size cannot be evenly split among the {comm.get_model_parallel_size()} model-parallel groups.' ) else: is_logger = True if torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") # Set device, random seed and optimization if torch.cuda.is_available(): torch.cuda.set_device(device.index) if seed is not None: torch.cuda.manual_seed(seed) increase_l2_fetch_granularity() try: torch.set_float32_matmul_precision("high") except AttributeError: pass torch.backends.cudnn.benchmark = True if seed is not None: torch.manual_seed(seed) return device, is_logger
def increase_l2_fetch_granularity(): try: import ctypes _libcudart = ctypes.CDLL("libcudart.so") # Set device limit on the current device # cudaLimitMaxL2FetchGranularity = 0x05 pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int)) _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128)) _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05)) assert pValue.contents.value == 128 except: return