maskrcnn_benchmark源码解析——dataloader篇(上)

news/2024/7/10 1:31:55 标签: vue, 深度学习

由于参考代码Stitcher主要改动的就是dataloader,因此先介绍dataloader

参考代码https://github.com/yukang2017/Stitcher/tree/master/maskrcnn_benchmark

本篇涉及pytorch源码内容较多,篇幅可能稍长,分为上、下两部分讲


同样从train_net.py起

#train_net.py
'''
from maskrcnn_benchmark.config import cfg
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
'''
	model = train(cfg, args.local_rank, args.distributed)

def train(cfg, local_rank, distributed):
    '''
    省略内容
    '''
	#from maskrcnn_benchmark.data import make_data_loader
    data_loader = make_data_loader( #看这里
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        distributed,
    )

    return model

from maskrcnn_benchmark.data import make_data_loader:导入的包,同样先看data目录里的__init_.py

#__init__.py
from .build import make_data_loader
#build.py
def make_data_loader(cfg, is_train=True, is_distributed=False, start_iter=0, batch_stitch=False):
    num_gpus = get_world_size()
    if is_train:#训练模式
        images_per_batch = cfg.SOLVER.IMS_PER_BATCH#batch_size
        if batch_stitch:#如果用batch的stitch,即图片长宽缩小一半后按batch维度拼,则同样内存占用的情况下,batch_size扩大4倍
            #[n,c,h,w]->[4n,c,h/2,w/2]
            images_per_batch *= 4
        assert (#batch_size必须是gpu数量的整数倍
            images_per_batch % num_gpus == 0
        ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(
            images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = True#注意这个shuffle
        num_iters = cfg.SOLVER.MAX_ITER#最大迭代次数
    else:
        images_per_batch = cfg.TEST.IMS_PER_BATCH
        assert (
            images_per_batch % num_gpus == 0
        ), "TEST.IMS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(
            images_per_batch, num_gpus)
        images_per_gpu = images_per_batch // num_gpus
        shuffle = False if not is_distributed else True
        num_iters = None
        start_iter = 0

    if images_per_gpu > 1:
        logger = logging.getLogger(__name__)
        logger.warning(
            "When using more than one image per GPU you may encounter "
            "an out-of-memory (OOM) error if your GPU does not have "
            "sufficient memory. If this happens, you can reduce "
            "SOLVER.IMS_PER_BATCH (for training) or "
            "TEST.IMS_PER_BATCH (for inference). For training, you must "
            "also adjust the learning rate and schedule length according "
            "to the linear scaling rule. See for example: "
            "https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14"
        )

    # group images which have similar aspect ratio. In this case, we only
    # group in two cases: those with width / height > 1, and the other way around,
    # but the code supports more general grouping strategy
    aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []#是否按长宽比分组,即长宽比>1的才能在同一个batch。因为同一batch图片大小需相等,以最大的长、宽为目标。如果长宽比相差太大会导致大量padding

    paths_catalog = import_file(#导入各个数据、模型的位置config
        "maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True
    )
    DatasetCatalog = paths_catalog.DatasetCatalog
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST

    # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
    
    transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else build_transforms(cfg, is_train, batch_stitch=batch_stitch)#如果在训练 则build_transforms,测试的时候把transforms置None
    datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train)#第169行
    ### 暂时就先看到这里

最后两行build_transforms和build_dataset
(1)from .transforms import build_transforms
看data/transforms文件夹下

#__init__.py
from .transforms import Compose
from .transforms import Resize
from .transforms import RandomHorizontalFlip
from .transforms import ToTensor
from .transforms import Normalize

from .build import build_transforms

#__build__.py
from . import transforms as T
#这里引用的是他自己所在的这个包,看上面的__init__.py,transforms包含上面的所有对象

def build_transforms(cfg, is_train=True, batch_stitch=False):
    if is_train:
        min_size = cfg.INPUT.MIN_SIZE_TRAIN
        max_size = cfg.INPUT.MAX_SIZE_TRAIN
        if batch_stitch:#如果用batch做stitch,则每张图片缩小为1/2(不过两种stitch方法都得缩小1/2啊)
            min_size = tuple(s//2 for s in list(min_size))#min_size (800,)
            max_size //= 2#max_size:1333
        flip_horizontal_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
        flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN
        brightness = cfg.INPUT.BRIGHTNESS
        contrast = cfg.INPUT.CONTRAST
        saturation = cfg.INPUT.SATURATION
        hue = cfg.INPUT.HUE
    else:
        min_size = cfg.INPUT.MIN_SIZE_TEST
        max_size = cfg.INPUT.MAX_SIZE_TEST
        flip_horizontal_prob = 0.0
        flip_vertical_prob = 0.0
        brightness = 0.0
        contrast = 0.0
        saturation = 0.0
        hue = 0.0

    to_bgr255 = cfg.INPUT.TO_BGR255#转成caffe的图片读取模式,BGR,0~255
    normalize_transform = T.Normalize(
        mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
    )
    color_jitter = T.ColorJitter(
        brightness=brightness,
        contrast=contrast,
        saturation=saturation,
        hue=hue,
    )

    transform = T.Compose(
        [
            color_jitter,
            T.Resize(min_size, max_size),#注意这个Resize
            T.RandomHorizontalFlip(flip_horizontal_prob),
            T.RandomVerticalFlip(flip_vertical_prob),
            T.ToTensor(),
            normalize_transform,
        ]
    )
    return transform

build_transforms根据cfg里面的布尔值,为dataloader添加各种transforms,这里主要讲一下Resize

class Resize(object):
    def __init__(self, min_size, max_size):
        if not isinstance(min_size, (list, tuple)):
            min_size = (min_size,)
        self.min_size = min_size
        self.max_size = max_size

    # modified from torchvision to add support for max size
    def get_size(self, image_size):#保持长宽比不变,同时使得图像满足max/min的resize大小
		***
		***
        return (oh, ow)

    def __call__(self, image, target=None):
        size = self.get_size(image.size)
        image = F.resize(image, size)#根据min_size,max_size和原图大小,获取要resize的大小后,直接用resize插值
        if target is None:
            return image
        target = target.resize(image.size)#这里把target图像也做了相同resize?
        return image, target

由此可得,build_transforms返回T.compose的对象,包含了各种图像处理操作,且如果做batch_stitch的话,所有图像要直接缩小为1/2
如果不做batch_stitch,可能后面也会缩小
(2)datasets = build_dataset(dataset_list, transforms, DatasetCatalog, is_train)
dataset_list是一个字符串list,包含用于训练的数据集名字
就是返回一个dataset对象,把list里的dataset合并成一个,同时应用各种transforms


http://www.niftyadmin.cn/n/507692.html

相关文章

sampler, batch_sampler, collate_fn, dataset, dataloader的关系

参考https://blog.csdn.net/qq_38962621/article/details/111146427 (1)Dataset定义一个数据集 (2)Dataloader定义这个数据集的加载器 (3)sampler定义从这个数据集里面取样本的方式(迭代器,每次返回一个样本) (4)batch_sampler把sampler的采样的样本根据batch_size组…

maskrcnn_benchmark源码解析——dataloader篇(中)

上篇主要讲了dataloader的常规内容,数据增强,合并等操作,本篇开始讲data sampler的内容 承接上文从data/build.py的后半段开始 data_loaders []for dataset in datasets:#看似用了遍历,其实datasets只有一个,前面都合…

coco数据集

coco2017 COCO_2017_train COCO_2014_train valminusminival COCO_2017_val minival 注意这里coco_2017_train是两个数据集,所以用2017的时候可能会用到数据集合并的操作。但是如果事先已经合并了,就不用在代码里合并了

小目标检测感想

研究了一段时间coco上的小目标检测,打算放弃了,不过还是得做个总结和感想。 小目标检测有个主要的问题:小 小意味着: 1、信息量少 这是一个比较抽象的问题,小目标,纹理和轮廓都堆在一块儿,而且…

FPN对于目标检测的优点

之前没细想,以为FPN只是做了一个多尺度融合,没想到FPN在目标检测中解决了一些关键问题。 参考https://zhuanlan.zhihu.com/p/55824651 (1)多尺度信息融合 同时融合了低层的细节信息和高层的语义信息,增大了低层的感受野…

必备的能力

1、快速阅读论文的能力 2、快速实现想法的能力

中心极限定理和标准误差

随机变量 随机变量不是一个确定的值,是一个变量,这个变量的可能取值是一个集合 并且,随机变量取这个集合中的每一个值都有其对应的概率 eg. 掷一个色子,结果可能为1,2,3,4,5&#x…

概率函数,概率分布函数,概率密度函数

概率函数 又称为“分布律”,通常用来表示离散型随机变量在每个点的概率 如上图所示,P{XXk}pkP\{XX_k\} p_kP{XXk​}pk​即为概率函数 概率分布函数累计概率函数 而将小于x的所有值的概率累加,就得到关于x的概率分布函数F(x) 概率密度函…