    摘要: 本示例教程使用U-Net实现图像分割。





    本案例使用Oxford-IIIT Pet数据集,官网: 。


    1. 分割图像:https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz

    1. !curl -O http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
    2. !curl -O http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
    3. !tar -xf images.tar.gz
    4. !tar -xf annotations.tar.gz

    3.2 数据集概览


    1. 首先看一下images.tar.gz这个压缩包,该文件解压后得到一个images目录,这个目录比较简单,里面直接放的是用类名和序号命名好的图片文件,每个图片是对应的宠物照片。
    1. .
    2. ├── samoyed_7.jpg
    3. ├── ......
    4. └── samoyed_81.jpg
    1. 然后我们在看下annotations.tar.gz,文件解压后的目录里面包含以下内容,目录中的README文件将每个目录和文件做了比较详细的介绍,我们可以通过README来查看每个目录文件的说明。
    1. .
    2. ├── README
    3. ├── list.txt
    4. ├── test.txt
    5. ├── trainval.txt
    6. ├── trimaps
    7. ├── Abyssinian_1.png
    8. ├── Abyssinian_10.png
    9. ├── ......
    10. └── yorkshire_terrier_99.png
    11. └── xmls
    12. ├── Abyssinian_1.xml
    13. ├── Abyssinian_10.xml
    14. ├── ......
    15. └── yorkshire_terrier_190.xml



    1. IMAGE_SIZE = (160, 160)
    2. train_images_path = "images/"
    3. label_images_path = "annotations/trimaps/"
    4. image_count = len([os.path.join(train_images_path, image_name)
    5. for image_name in os.listdir(train_images_path)
    6. if image_name.endswith('.jpg')])
    7. print("用于训练的图片样本数量:", image_count)
    8. # 对数据集进行处理,划分训练集、测试集
    9. def _sort_images(image_dir, image_type):
    10. """
    11. 对文件夹内的图像进行按照文件名排序
    12. """
    13. files = []
    14. for image_name in os.listdir(image_dir):
    15. if image_name.endswith('.{}'.format(image_type)) \
    16. and not image_name.startswith('.'):
    17. files.append(os.path.join(image_dir, image_name))
    18. return sorted(files)
    19. def write_file(mode, images, labels):
    20. with open('./{}.txt'.format(mode), 'w') as f:
    21. for i in range(len(images)):
    22. f.write('{}\t{}\n'.format(images[i], labels[i]))
    23. """
    24. 由于所有文件都是散落在文件夹中,在训练时我们需要使用的是数据集和标签对应的数据关系,
    25. 所以我们第一步是对原始的数据集进行整理,得到数据集和标签两个数组,分别一一对应。
    26. 这样可以在使用的时候能够很方便的找到原始数据和标签的对应关系,否则对于原有的文件夹图片数据无法直接应用。
    27. 在这里是用了一个非常简单的方法,按照文件名称进行排序。
    28. 因为刚好数据和标签的文件名是按照这个逻辑制作的,名字都一样,只有扩展名不一样。
    29. """
    30. images = _sort_images(train_images_path, 'jpg')
    31. labels = _sort_images(label_images_path, 'png')
    32. eval_num = int(image_count * 0.15)
    33. write_file('train', images[:-eval_num], labels[:-eval_num])
    34. write_file('test', images[-eval_num:], labels[-eval_num:])
    35. write_file('predict', images[-eval_num:], labels[-eval_num:])
    1. 用于训练的图片样本数量: 7390

    3.3 PetDataSet数据集抽样展示


    3.4 数据集类定义

    飞桨(PaddlePaddle)数据集加载方案是统一使用Dataset(数据集定义) + DataLoader(多进程数据集加载)。


    1. class MyDataset(Dataset):
    2. def __init__(self):
    3. ...
    4. # 每次迭代时返回数据和对应的标签
    5. def __getitem__(self, idx):
    6. return x, y
    7. # 返回整个数据集的总数
    8. def __len__(self):
    9. return count(samples)



    1. import random
    2. from paddle.io import Dataset
    3. from paddle.vision.transforms import transforms as T
    4. class PetDataset(Dataset):
    5. """
    6. 数据集定义
    7. """
    8. def __init__(self, mode='train'):
    9. """
    10. 构造函数
    11. """
    12. self.image_size = IMAGE_SIZE
    13. self.mode = mode.lower()
    14. assert self.mode in ['train', 'test', 'predict'], \
    15. "mode should be 'train' or 'test' or 'predict', but got {}".format(self.mode)
    16. self.train_images = []
    17. self.label_images = []
    18. with open('./{}.txt'.format(self.mode), 'r') as f:
    19. for line in f.readlines():
    20. image, label = line.strip().split('\t')
    21. self.train_images.append(image)
    22. self.label_images.append(label)
    23. def _load_img(self, path, color_mode='rgb', transforms=[]):
    24. """
    25. 统一的图像处理接口封装,用于规整图像大小和通道
    26. """
    27. with open(path, 'rb') as f:
    28. img = PilImage.open(io.BytesIO(f.read()))
    29. if color_mode == 'grayscale':
    30. # if image is not already an 8-bit, 16-bit or 32-bit grayscale image
    31. # convert it to an 8-bit grayscale image.
    32. if img.mode not in ('L', 'I;16', 'I'):
    33. img = img.convert('L')
    34. elif color_mode == 'rgba':
    35. if img.mode != 'RGBA':
    36. img = img.convert('RGBA')
    37. elif color_mode == 'rgb':
    38. if img.mode != 'RGB':
    39. img = img.convert('RGB')
    40. else:
    41. raise ValueError('color_mode must be "grayscale", "rgb", or "rgba"')
    42. return T.Compose([
    43. T.Resize(self.image_size)
    44. ] + transforms)(img)
    45. def __getitem__(self, idx):
    46. """
    47. 返回 image, label
    48. """
    49. train_image = self._load_img(self.train_images[idx],
    50. transforms=[
    51. T.Transpose(),
    52. T.Normalize(mean=127.5, std=127.5)
    53. ]) # 加载原始图像
    54. label_image = self._load_img(self.label_images[idx],
    55. color_mode='grayscale',
    56. transforms=[T.Grayscale()]) # 加载Label图像
    57. # 返回image, label
    58. train_image = np.array(train_image, dtype='float32')
    59. label_image = np.array(label_image, dtype='int64')
    60. return train_image, label_image
    61. def __len__(self):
    62. """
    63. 返回数据集总数
    64. """
    65. return len(self.train_images)



    我们为了减少卷积操作中的训练参数来提升性能,是继承paddle.nn.Layer自定义了一个SeparableConv2D Layer类,整个过程是把filter_size * filter_size * num_filters的Conv2D操作拆解为两个子Conv2D,先对输入数据的每个通道使用filter_size * filter_size * 1的卷积核进行计算,输入输出通道数目相同,之后在使用1 * 1 * num_filters的卷积核计算。

    1. from paddle.nn import functional as F
    2. class SeparableConv2D(paddle.nn.Layer):
    3. def __init__(self,
    4. in_channels,
    5. out_channels,
    6. kernel_size,
    7. stride=1,
    8. padding=0,
    9. dilation=1,
    10. groups=None,
    11. weight_attr=None,
    12. bias_attr=None,
    13. data_format="NCHW"):
    14. super(SeparableConv2D, self).__init__()
    15. self._padding = padding
    16. self._dilation = dilation
    17. self._in_channels = in_channels
    18. self._data_format = data_format
    19. # 第一次卷积参数,没有偏置参数
    20. filter_shape = [in_channels, 1] + self.convert_to_list(kernel_size, 2, 'kernel_size')
    21. self.weight_conv = self.create_parameter(shape=filter_shape, attr=weight_attr)
    22. # 第二次卷积参数
    23. filter_shape = [out_channels, in_channels] + self.convert_to_list(1, 2, 'kernel_size')
    24. self.weight_pointwise = self.create_parameter(shape=filter_shape, attr=weight_attr)
    25. attr=bias_attr,
    26. is_bias=True)
    27. def convert_to_list(self, value, n, name, dtype=np.int):
    28. if isinstance(value, dtype):
    29. return [value, ] * n
    30. else:
    31. try:
    32. value_list = list(value)
    33. except TypeError:
    34. raise ValueError("The " + name +
    35. "'s type must be list or tuple. Received: " + str(
    36. value))
    37. if len(value_list) != n:
    38. raise ValueError("The " + name + "'s length must be " + str(n) +
    39. ". Received: " + str(value))
    40. for single_value in value_list:
    41. try:
    42. dtype(single_value)
    43. except (ValueError, TypeError):
    44. raise ValueError(
    45. "The " + name + "'s type must be a list or tuple of " + str(
    46. n) + " " + str(dtype) + " . Received: " + str(
    47. value) + " "
    48. "including element " + str(single_value) + " of type" + " "
    49. + str(type(single_value)))
    50. return value_list
    51. def forward(self, inputs):
    52. conv_out = F.conv2d(inputs,
    53. self.weight_conv,
    54. padding=self._padding,
    55. stride=self._stride,
    56. dilation=self._dilation,
    57. groups=self._in_channels,
    58. data_format=self._data_format)
    59. out = F.conv2d(conv_out,
    60. self.weight_pointwise,
    61. bias=self.bias_pointwise,
    62. padding=0,
    63. stride=1,
    64. dilation=1,
    65. groups=1,
    66. data_format=self._data_format)
    67. return out

    4.2 定义Encoder编码器


    1. class Encoder(paddle.nn.Layer):
    2. def __init__(self, in_channels, out_channels):
    3. super(Encoder, self).__init__()
    4. self.relus = paddle.nn.LayerList(
    5. [paddle.nn.ReLU() for i in range(2)])
    6. self.separable_conv_01 = SeparableConv2D(in_channels,
    7. out_channels,
    8. kernel_size=3,
    9. padding='same')
    10. self.bns = paddle.nn.LayerList(
    11. [paddle.nn.BatchNorm2D(out_channels) for i in range(2)])
    12. self.separable_conv_02 = SeparableConv2D(out_channels,
    13. out_channels,
    14. kernel_size=3,
    15. padding='same')
    16. self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
    17. self.residual_conv = paddle.nn.Conv2D(in_channels,
    18. out_channels,
    19. kernel_size=1,
    20. stride=2,
    21. padding='same')
    22. def forward(self, inputs):
    23. previous_block_activation = inputs
    24. y = self.relus[0](inputs)
    25. y = self.separable_conv_01(y)
    26. y = self.bns[0](y)
    27. y = self.relus[1](y)
    28. y = self.separable_conv_02(y)
    29. y = self.bns[1](y)
    30. y = self.pool(y)
    31. residual = self.residual_conv(previous_block_activation)
    32. y = paddle.add(y, residual)
    33. return y

    4.3 定义Decoder解码器


    1. class Decoder(paddle.nn.Layer):
    2. def __init__(self, in_channels, out_channels):
    3. super(Decoder, self).__init__()
    4. self.relus = paddle.nn.LayerList(
    5. [paddle.nn.ReLU() for i in range(2)])
    6. self.conv_transpose_01 = paddle.nn.Conv2DTranspose(in_channels,
    7. out_channels,
    8. kernel_size=3,
    9. padding=1)
    10. self.conv_transpose_02 = paddle.nn.Conv2DTranspose(out_channels,
    11. out_channels,
    12. kernel_size=3,
    13. padding=1)
    14. self.bns = paddle.nn.LayerList(
    15. [paddle.nn.BatchNorm2D(out_channels) for i in range(2)]
    16. )
    17. self.upsamples = paddle.nn.LayerList(
    18. [paddle.nn.Upsample(scale_factor=2.0) for i in range(2)]
    19. )
    20. self.residual_conv = paddle.nn.Conv2D(in_channels,
    21. out_channels,
    22. kernel_size=1,
    23. padding='same')
    24. def forward(self, inputs):
    25. previous_block_activation = inputs
    26. y = self.relus[0](inputs)
    27. y = self.conv_transpose_01(y)
    28. y = self.bns[0](y)
    29. y = self.relus[1](y)
    30. y = self.conv_transpose_02(y)
    31. y = self.bns[1](y)
    32. y = self.upsamples[0](y)
    33. residual = self.upsamples[1](previous_block_activation)
    34. residual = self.residual_conv(residual)
    35. y = paddle.add(y, residual)
    36. return y

    4.4 训练模型组网


    1. class PetNet(paddle.nn.Layer):
    2. def __init__(self, num_classes):
    3. super(PetNet, self).__init__()
    4. self.conv_1 = paddle.nn.Conv2D(3, 32,
    5. kernel_size=3,
    6. stride=2,
    7. padding='same')
    8. self.bn = paddle.nn.BatchNorm2D(32)
    9. self.relu = paddle.nn.ReLU()
    10. in_channels = 32
    11. self.encoders = []
    12. self.encoder_list = [64, 128, 256]
    13. self.decoder_list = [256, 128, 64, 32]
    14. # 根据下采样个数和配置循环定义子Layer,避免重复写一样的程序
    15. for out_channels in self.encoder_list:
    16. block = self.add_sublayer('encoder_{}'.format(out_channels),
    17. Encoder(in_channels, out_channels))
    18. self.encoders.append(block)
    19. in_channels = out_channels
    20. self.decoders = []
    21. # 根据上采样个数和配置循环定义子Layer,避免重复写一样的程序
    22. for out_channels in self.decoder_list:
    23. block = self.add_sublayer('decoder_{}'.format(out_channels),
    24. Decoder(in_channels, out_channels))
    25. self.decoders.append(block)
    26. in_channels = out_channels
    27. self.output_conv = paddle.nn.Conv2D(in_channels,
    28. num_classes,
    29. kernel_size=3,
    30. padding='same')
    31. def forward(self, inputs):
    32. y = self.conv_1(inputs)
    33. y = self.bn(y)
    34. y = self.relu(y)
    35. for encoder in self.encoders:
    36. y = encoder(y)
    37. for decoder in self.decoders:
    38. y = decoder(y)
    39. y = self.output_conv(y)
    40. return y


    5.1 启动模型训练


    1. train_dataset = PetDataset(mode='train') # 训练数据集
    2. val_dataset = PetDataset(mode='test') # 验证数据集
    3. optim = paddle.optimizer.RMSProp(learning_rate=0.001,
    4. rho=0.9,
    5. momentum=0.0,
    6. epsilon=1e-07,
    7. centered=False,
    8. parameters=model.parameters())
    9. model.prepare(optim, paddle.nn.CrossEntropyLoss(axis=1))
    10. model.fit(train_dataset,
    11. val_dataset,
    12. epochs=15,
    13. batch_size=32,
    14. verbose=1)
    6.1 预测数据集准备和预测



    1. predict_dataset = PetDataset(mode='predict')
    2. predict_results = model.predict(predict_dataset)
    1. Predict begin...
    2. step 1108/1108 [==============================] - 14ms/step

    6.2 预测结果可视化
