BaseTransform
视觉中图像变化的基类。
调用逻辑:
如果你想要定义自己的图像变化方法, 需要重写子类中的 _apply_*
方法。
import numpy as np
from PIL import Image
import paddle.vision.transforms.functional as F
from paddle.vision.transforms import BaseTransform
def _get_image_size(img):
if F._is_pil_image(img):
return img.size
elif F._is_numpy_image(img):
return img.shape[:2][::-1]
else:
raise TypeError("Unexpected type {}".format(type(img)))
class CustomRandomFlip(BaseTransform):
def __init__(self, prob=0.5, keys=None):
super(CustomRandomFlip, self).__init__(keys)
def _get_params(self, inputs):
image = inputs[self.keys.index('image')]
params = {}
params['flip'] = np.random.random() < self.prob
params['size'] = _get_image_size(image)
return params
def _apply_image(self, image):
if self.params['flip']:
return F.hflip(image)
return image
# if you only want to transform image, do not need to rewrite this function
def _apply_coords(self, coords):
if self.params['flip']:
w = self.params['size'][0]
coords[:, 0] = w - coords[:, 0]
return coords
# if you only want to transform image, do not need to rewrite this function
idxs = np.array([(0, 1), (2, 1), (0, 3), (2, 3)]).flatten()
coords = np.asarray(boxes).reshape(-1, 4)[:, idxs].reshape(-1, 2)
minxy = coords.min(axis=1)
maxxy = coords.max(axis=1)
trans_boxes = np.concatenate((minxy, maxxy), axis=1)
return trans_boxes
# if you only want to transform image, do not need to rewrite this function
def _apply_mask(self, mask):
if self.params['flip']:
return F.hflip(mask)
return mask
# create fake inputs
fake_img = Image.fromarray((np.random.rand(400, 500, 3) * 255.).astype('uint8'))
fake_boxes = np.array([[2, 3, 200, 300], [50, 60, 80, 100]])
fake_mask = fake_img.convert('L')
# only transform for image:
flip_transform = CustomRandomFlip(1.0)
converted_img = flip_transform(fake_img)
# transform for image, boxes and mask
flip_transform = CustomRandomFlip(1.0, keys=('image', 'boxes', 'mask'))
print('converted boxes', converted_boxes)