Source code for neodroidvision.data.mixed.penn_fudan

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = "Christian Heider Nielsen"
__doc__ = r"""

           Created on 22/03/2020
           """

from enum import Enum
from pathlib import Path
from typing import Tuple, Union

import numpy
import torch
from PIL import Image
from draugr.numpy_utilities import SplitEnum, chw_to_hwc, mix_channels
from draugr.opencv_utilities import (
    InterpolationEnum,
    cv2_resize,
    draw_boxes,
    draw_masks,
)
from draugr.torch_utilities import (
    SupervisedDataset,
    float_chw_to_hwc_uint_tensor,
    global_torch_device,
    to_tensor,
    uint_hwc_to_chw_float_tensor,
)
from matplotlib import pyplot
from sorcery import assigned_names
from torchvision.transforms import Compose, Resize, ToTensor

__all__ = ["PennFudanDataset"]

from neodroidvision.utilities import (
    TupleCompose,
    TupleRandomHorizontalFlip,
    TupleToTensor,
)


[docs]class PennFudanDataset(SupervisedDataset): """description""" predictor_channels = 3 # RGB input response_channels_two_classes = ( 2 # our dataset has two classes only - background and person ) response_channels_binary = 1 response_channels_instanced = None image_size = (256, 256) image_size_T = image_size[::-1] categories = ("void", "person")
[docs] class PennFudanReturnVariantEnum(Enum): """ Return binary mask, instanced or all annotations """ binary, instanced, all = assigned_names()
@property def response_shape(self) -> Tuple[int, ...]: """ :return: :rtype:""" if self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.binary: return *self.image_size_T, self.response_channels_binary elif ( self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.instanced ): return *self.image_size_T, self.response_channels_instanced elif self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.all: return *self.image_size_T, self.response_channels_two_classes raise NotImplementedError @property def predictor_shape(self) -> Tuple[int, ...]: """ :return: :rtype:""" return *self.image_size_T, self.predictor_channels
[docs] @staticmethod def get_transforms(split: SplitEnum): """ :param split: :type split: :return: :rtype:""" transforms = [Resize(PennFudanDataset.image_size_T), ToTensor()] # if split == SplitEnum.training: # transforms.append(RandomHorizontalFlip(0.5)) return Compose(transforms)
[docs] @staticmethod def get_tuple_transforms(split: SplitEnum): """ :param split: :type split: :return: :rtype:""" transforms = [ # Resize(PennFudanDataset.image_size_T), TupleToTensor() ] if split == SplitEnum.training: transforms.append(TupleRandomHorizontalFlip(0.5)) return TupleCompose(transforms)
[docs] def __init__( self, root: Union[str, Path], split: SplitEnum = SplitEnum.training, return_variant: PennFudanReturnVariantEnum = PennFudanReturnVariantEnum.binary, ): """ :param root: :type root: :param split: :type split:""" super().__init__() if not isinstance(root, Path): root = Path(root) self._root_data_path = root self._return_variant = return_variant if self._return_variant != PennFudanDataset.PennFudanReturnVariantEnum.all: self._transforms = self.get_transforms(split) else: self._transforms = self.get_tuple_transforms(split) if self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.binary: self._getter = self.get_binary elif ( self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.instanced ): self._getter = self.get_instanced elif self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.all: self._getter = self.get_all else: raise NotImplementedError self._img_path = root / "PNGImages" self._ped_path = root / "PedMasks" self.imgs = list( sorted(self._img_path.iterdir()) ) # load all image files, sorting them to self.masks = list( sorted(self._ped_path.iterdir()) ) # ensure that they are aligned if ( self._return_variant == PennFudanDataset.PennFudanReturnVariantEnum.instanced ): max_num_instance = 0 for m in self.masks: mask = numpy.array(Image.open(self._ped_path / m)) num_unique = numpy.unique(mask).shape[0] if max_num_instance < num_unique: max_num_instance = num_unique PennFudanDataset.response_channels_instanced = max_num_instance self.zero_mask = numpy.zeros( self.response_shape[::-1] ) # reversed order numpy array of torch tensor output
def __getitem__(self, idx: int): """ :param idx: :type idx: :return: :rtype:""" return self._getter(idx)
[docs] def get_binary(self, idx): """ Return a single binary channel target for all instances in image :param idx: :type idx: :return: :rtype:""" img = numpy.array(Image.open(self._img_path / self.imgs[idx]).convert("RGB")) mask = numpy.array(Image.open(self._ped_path / self.masks[idx])) mask[mask != 0] = 1.0 img = cv2_resize(img, self.image_size_T) mask = cv2_resize(mask, self.image_size_T, InterpolationEnum.nearest) return ( uint_hwc_to_chw_float_tensor(to_tensor(img, dtype=torch.uint8)), to_tensor(mask).unsqueeze(0), )
[docs] def get_instanced(self, idx): """ Return a separate channel target for each instance in image :param idx: :type idx: :return: :rtype:""" img = numpy.array(Image.open(self._img_path / self.imgs[idx]).convert("RGB")) mask = numpy.array(Image.open(self._ped_path / self.masks[idx])) img = cv2_resize(img, self.image_size_T) mask = cv2_resize(mask, self.image_size_T, InterpolationEnum.nearest) obj_ids = numpy.unique(mask) # instances are encoded as different colors obj_ids = obj_ids[1:] # first id is the background, so remove it # split the color-encoded mask into a set of binary masks masks = mask == obj_ids[:, None, None] zero_mask_clone = self.zero_mask.copy() zero_mask_clone[: masks.shape[0]] = masks return ( uint_hwc_to_chw_float_tensor(to_tensor(img, dtype=torch.uint8)), torch.as_tensor(zero_mask_clone, dtype=torch.uint8), )
[docs] def get_all(self, idx): """ Return all info including bounding boxes for each instance :param idx: :type idx: :return: :rtype:""" mask = torch.as_tensor( numpy.array(Image.open(self._ped_path / self.masks[idx])) ) # note that we haven't converted the mask to RGB, # because each color corresponds to a different instance # with 0 being background obj_ids = torch.unique(mask) # instances are encoded as different colors obj_ids = obj_ids[1:] # first id is the background, so remove it # split the color-encoded mask into a set of binary masks masks = mask == obj_ids[:, None, None] # get bounding box coordinates for each mask num_objs = len(obj_ids) boxes = [] for i in range(num_objs): pos = torch.where(masks[i]) xmin = torch.min(pos[1]) xmax = torch.max(pos[1]) ymin = torch.min(pos[0]) ymax = torch.max(pos[0]) boxes.append([xmin, ymin, xmax, ymax]) boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.ones((num_objs,), dtype=torch.int64) # there is only one class masks = torch.as_tensor(masks, dtype=torch.uint8) # TODO: IMPLEMENT RESIZING OF PICTURES image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) is_crowd = torch.zeros( (num_objs,), dtype=torch.int64 ) # suppose all instances are not crowd return self._transforms( Image.open(self._img_path / self.imgs[idx]).convert("RGB"), dict( boxes=boxes, labels=labels, masks=masks, image_id=image_id, area=area, iscrowd=is_crowd, ), )
def __len__(self): return len(self.imgs)
if __name__ == "__main__": def main_binary(p=Path.home() / "Data" / "Datasets" / "PennFudanPed"): """ :param p: :type p: """ dataset = PennFudanDataset(p, SplitEnum.training) global_torch_device(override=global_torch_device("cpu")) idx = -2 img, mask = dataset[idx] print(img) print(img.shape, mask.shape) pyplot.imshow(float_chw_to_hwc_uint_tensor(img)) pyplot.show() pyplot.imshow(mask.squeeze(0)) pyplot.show() def main_instanced(p=Path.home() / "Data" / "Datasets" / "PennFudanPed"): """ :param p: :type p: """ dataset = PennFudanDataset( p, SplitEnum.training, return_variant=PennFudanDataset.PennFudanReturnVariantEnum.instanced, ) global_torch_device(override=global_torch_device("cpu")) idx = -2 img, mask = dataset[idx] print(img) print(img.shape, mask.shape) pyplot.imshow(float_chw_to_hwc_uint_tensor(img)) pyplot.show() for m in mask: pyplot.imshow(m.squeeze(0)) pyplot.show() def main_instanced_mixed(p=Path.home() / "Data" / "Datasets" / "PennFudanPed"): """ :param p: :type p: """ dataset = PennFudanDataset( p, SplitEnum.training, return_variant=PennFudanDataset.PennFudanReturnVariantEnum.instanced, ) global_torch_device(override=global_torch_device("cpu")) idx = -2 img, mask = dataset[idx] print(img) print(img.shape, mask.shape) pyplot.imshow(float_chw_to_hwc_uint_tensor(img)) pyplot.show() print(mask.shape) pyplot.imshow(mix_channels(chw_to_hwc(mask.numpy()))) pyplot.show() def main_instanced_single_channel( p=Path.home() / "Data" / "Datasets" / "PennFudanPed", ): """ :param p: :type p: """ dataset = PennFudanDataset( p, SplitEnum.training, return_variant=PennFudanDataset.PennFudanReturnVariantEnum.instanced, ) global_torch_device(override=global_torch_device("cpu")) idx = -2 img, mask = dataset[idx] print(img) print(img.shape, mask.shape) i = float_chw_to_hwc_uint_tensor(img).numpy() # pyplot.imshow(i) # pyplot.show() a, b = numpy.zeros_like(mask), mask.numpy() print(a.shape, b.shape) pyplot.imshow(draw_masks(i, b)) pyplot.show() def main_all_bb(p=Path.home() / "Data" / "Datasets" / "PennFudanPed"): """ :param p: :type p: """ dataset = PennFudanDataset( p, SplitEnum.training, return_variant=PennFudanDataset.PennFudanReturnVariantEnum.all, ) global_torch_device(override=global_torch_device("cpu")) idx = -2 img, info = dataset[idx] print(img) print(img.shape) img = float_chw_to_hwc_uint_tensor(img).detach().numpy() pyplot.imshow( draw_boxes.draw_bounding_boxes( img, info["boxes"], labels=info["labels"], mode="RGB" ) ) pyplot.show() # p = Path.home() / "Data3" / "PennFudanPed" p = Path.home() / "Data" / "Datasets" / "PennFudanPed" # main_binary(p) # main_instanced(p) # main_instanced_mixed(p) main_instanced_single_channel(p) # main_all_bb(p )