DistributedSampler自动拆分数据集示例

admin 2021-08-25 pytorch 222 0 喜欢 (2)

示例代码


import torch
import random
import torch.utils.data as data
import torch.distributed as dist
import torch.backends.cudnn as cudnn
import torch.multiprocessing as mp
from torch.utils.data.distributed import DistributedSampler

class TestDataset(data.Dataset): 
    def __init__(self):
        pass
     
    def __getitem__(self, index):

        return index
    def __len__(self): 
        return 100

def train_worker(gpu_id, nprocs):
    '''独立进程运行
    '''
    random.seed(0)
    torch.manual_seed(0)
    cudnn.deterministic = True
    # 提升速度,主要对input shape是固定时有效,如果是动态的,耗时反而慢
    torch.backends.cudnn.benchmark = True
    dist.init_process_group(backend='nccl',
                            init_method='tcp://127.0.0.1:22021',
                            world_size=nprocs,
                            rank=gpu_id)

    torch.cuda.set_device(gpu_id)
    # 按batch分割给各个GPU
    dataset = TestDataset()
    sampler = DistributedSampler(dataset) # 这个sampler会自动分配数据到各个gpu上
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        pin_memory=True,
        sampler=sampler,
        drop_last=True,   # 多余的部分去除
    )
    dd = []
    print(len(dataloader))
    for data in dataloader:
        dd.append( data.numpy()[0])
    
    print(gpu_id, sorted(dd))

def train_main():
    gpu_nums = 2
    mp.spawn(train_worker, nprocs=gpu_nums, args=(gpu_nums,))

if __name__ == '__main__':
    train_main()

控制台输出


50
50
1 [0, 1, 5, 7, 8, 10, 12, 15, 17, 19, 21, 23, 24, 27, 28, 29, 30, 34, 40, 41, 42, 43, 46, 47, 48, 50, 51, 52, 56, 58, 60, 61, 63, 66, 67, 68, 69, 70, 72, 73, 77, 83, 84, 85, 88, 90, 91, 92, 95, 96]
0 [2, 3, 4, 6, 9, 11, 13, 14, 16, 18, 20, 22, 25, 26, 31, 32, 33, 35, 36, 37, 38, 39, 44, 45, 49, 53, 54, 55, 57, 59, 62, 64, 65, 71, 74, 75, 76, 78, 79, 80, 81, 82, 86, 87, 89, 93, 94, 97, 98, 99]

转载请注明来自askonline.tech,本文标题:DistributedSampler自动拆分数据集示例

喜欢 2 发布评论
0 条回复
    来第一个评论吧!
发表评论


 Top