如何从 output 分类器创建分割掩码？

Question

I am a newer in ML and I am trying to apply image segmentation on my gray scale tif images.我是 ML 的新手，我正在尝试在我的灰度 tif 图像上应用图像分割。 The images has areas with the value NaN which represents the sea, and areas with value from 0 to 2 which represents land.这些图像具有代表海洋的值为 NaN 的区域，以及代表陆地的值为 0 到 2 的区域。 I create some true masks for training.我为训练创建了一些真正的面具。 The masks have area with NaN representing the sea, 0 representing the land, and 1 representing the clouds.蒙版的区域中，NaN 代表海洋，0 代表陆地，1 代表云。 I would like to create a segmentation mask which has 3 classes representing the sea, the land, and the clouds.我想创建一个分割蒙版，它有 3 个类，分别代表海洋、陆地和云。

I refer to TensorFlow tutorial and Google Colab tutorial and have the code below.我参考TensorFlow 教程和Google Colab 教程，并有以下代码。 The output classifier do show somethings but the segmentation mask became 0 overall. output 分类器确实显示了一些东西，但分割掩码整体变为 0。 Please help and thank you.请帮忙，谢谢。

from glob import glob
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models

#load images
img = sorted(glob('/content/drive/My Drive/train_sub_5/*.tif'))
mask = sorted(glob('/content/drive/My Drive/train_mask_sub_5/*.tif'))

#split into train and test dataset
img, img_val, mask, mask_val = train_test_split(img, mask, test_size=0.2, random_state=42)

#read images as array and make their shape (512, 512, 1)
train_image = []
for m in img[:]:
    image= Image.open(m)
    img_arr= np.nan_to_num(np.array(image), nan=0)
    stacked_img= np.stack((img_arr,)*1, axis=-1)
    train_image.append(stacked_img)

train_mask = []
for n in mask[:]:
    image_mask= Image.open(n)
    mask_arr= np.nan_to_num(np.array(image_mask), nan=2)
    stacked_mask = np.stack((mask_arr,)*1, axis=-1)
    train_mask.append(stacked_mask)

test_img = []
for o in img_val[:]:
    image= Image.open(o)
    img_arr = np.nan_to_num(np.array(image), nan=0)
    stacked_img = np.stack((img_arr,)*1, axis=-1)
    test_img.append(stacked_img)

test_mask = []
for p in mask_val[:]:
    image_mask= Image.open(p)
    mask_arr= np.nan_to_num(np.array(image_mask), nan=2)
    stacked_mask = np.stack((mask_arr,)*1, axis=-1)
    test_mask.append(stacked_mask)

#create tensorflow dataset 
train= tf.data.Dataset.from_tensor_slices((train_image, train_mask))
test = tf.data.Dataset.from_tensor_slices((test_img, test_mask))

#set parameters
train_length = len(train_image)
img_shape = (512,512,1)
batch_size = 16
epochs = 20

#shuffle, batch, and repeat
train_dataset = train.cache().shuffle(train_length).batch(batch_size).repeat()
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset = test.batch(batch_size).repeat()

#build the model
def conv_block(input_tensor, num_filters):
    encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
    encoder = layers.BatchNormalization()(encoder)
    encoder = layers.Activation('relu')(encoder)
    encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
    encoder = layers.BatchNormalization()(encoder)
    encoder = layers.Activation('relu')(encoder)
    return encoder

def encoder_block(input_tensor, num_filters):
    encoder = conv_block(input_tensor, num_filters)
    encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
    return encoder_pool, encoder

def decoder_block(input_tensor, concat_tensor, num_filters):
    decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
    decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
    decoder = layers.BatchNormalization()(decoder)
    decoder = layers.Activation('relu')(decoder)
    decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
    decoder = layers.BatchNormalization()(decoder)
    decoder = layers.Activation('relu')(decoder)
    decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
    decoder = layers.BatchNormalization()(decoder)
    decoder = layers.Activation('relu')(decoder)
    return decoder

inputs = layers.Input(shape=img_shape)
encoder0_pool, encoder0 = encoder_block(inputs, 32)
encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)
encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)
encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)
encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)
center = conv_block(encoder4_pool, 1024)
decoder4 = decoder_block(center, encoder4, 512)
decoder3 = decoder_block(decoder4, encoder3, 256)
decoder2 = decoder_block(decoder3, encoder2, 128)
decoder1 = decoder_block(decoder2, encoder1, 64)
decoder0 = decoder_block(decoder1, encoder0, 32)
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)

#defined the model
model = models.Model(inputs=[inputs], outputs=[outputs])

#defined loss function
def dice_coeff(y_true, y_pred):
    smooth = 1.
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    score = (2.*intersection+smooth)/(tf.reduce_sum(y_true_f)+tf.reduce_sum(y_pred_f)+smooth)
    return score

def dice_loss(y_true, y_pred):
    loss = 1 - dice_coeff(y_true, y_pred)
    return loss

def bce_dice_loss(y_true, y_pred):
    loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
    return loss

#compiled the model
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss])
model.summary()

save_model_path = '/content/drive/My Drive/tmp/weights.hdf5'
cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', mode='max', save_best_only=True)

#trained the model
history = model.fit(train_dataset, steps_per_epoch=int(np.ceil(train_length / float(batch_size))), epochs=epochs, validation_data=test_dataset, validation_steps=int(np.ceil(len(test_img) / float(batch_size))), callbacks=[cp])

#visualize training process
dice = history.history['dice_loss']
val_dice = history.history['val_dice_loss']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, dice, label='Training Dice Loss')
plt.plot(epochs_range, val_dice, label='Validation Dice Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Dice Loss')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()

#visualize the output
def display(display_list):
    plt.figure(figsize=(15, 15))
    title = ['Input Image', 'True Mask', 'Predicted Mask']
    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.show()

def show_predictions(dataset=None, num=1):
    for image, mask in dataset.take(num):
        pred_mask = model.predict(image)
        display([image[0,:,:,0], mask[0,:,:,0], create_mask(pred_mask)[:,:,0]]) #1
        display([image[0,:,:,0], mask[0,:,:,0], pred_mask[0,:,:,0]]) #2

def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]

show_predictions(test_dataset, 3)

The output classifier pred_mask do show some meaningful structures, below are some examples of the output from code #1. output 分类器 pred_mask 确实显示了一些有意义的结构，下面是代码 #1 中 output 的一些示例。 Example 1 .示例 1 。 Example 2 .示例 2 。 Example 3 .例 3 。

When I tried to create a segmentation mask like TensorFlow tutorial with code #2, it returned 0 all over for the segmentation mask.当我尝试使用代码 #2 创建像 TensorFlow 教程这样的分段掩码时，它为分段掩码返回了 0。 Example 1 .示例 1 。

Answer 1

I find out how to produce the mask.我知道如何生产面具。 Below is what required to be modified.以下是需要修改的内容。

#read images as array and make their shape
train_image = []
for m in img[:]:
    image= Image.open(m)
    img_arr= np.nan_to_num(np.array(image), nan=0)
    stacked_img= np.stack((img_arr,)*3, axis=-1)
    train_image.append(stacked_img)

test_img = []
for o in img_val[:]:
    image= Image.open(o)
    img_arr = np.nan_to_num(np.array(image), nan=0)
    stacked_img = np.stack((img_arr,)*3, axis=-1)
    test_img.append(stacked_img)

#set parameters
img_shape = (512,512,3) #if I want to produce a 3-class mask, then set the third channel as 3

#build the model
outputs = layers.Conv2D(3, (1, 1), activation='softmax')(decoder0) #if I want to produce 3-class mask, then set layers.Conv2D(3,(1,1)) and use softmax; if 2-class mask is required, then set (2,(1,1)) and use sigmoid.

Though it works now, I am confused about the image channel and the parameter layers.Conv2D.虽然它现在可以工作，但我对图像通道和参数 layers.Conv2D 感到困惑。 If I have a RGB image and I want to produce a 5-class mask, how should I set for the parameter img_shape as input and layers.Conv2D (?,(1,1)) as output?如果我有一个 RGB 图像并且我想生成一个 5 类蒙版，我应该如何将参数 img_shape 设置为输入，并将 layers.Conv2D (?,(1,1)) 设置为 output？

如何从 output 分类器创建分割掩码？

问题描述

1 个解决方案

解决方案1
0 2020-04-27 14:48:26

如何从 output 分类器创建分割掩码？

问题描述

1 个解决方案

解决方案1 0 2020-04-27 14:48:26

解决方案1
0 2020-04-27 14:48:26