关于分类而不是使用深度学习进行回归的问题

Question

我有个问题。 我有一个工作正常的网络，我想进行回归分析。 但是，当我尝试将其用于分类时（在进行了适当的更改之后），我遇到了一些问题。 我有9个班级，但问题是网络以我不清楚的方式输出我。 它为每个对象输出9x1的矢量，这很好，但是里面的值不是概率。 我尝试将softmax输出转换为概率（exp（1）/（exp（1）+ .. + exp（n）））但没有任何影响。 我用的是咖啡。 网络告诉我我想要的是它属于哪个类的输入。 基本上在输出中，我想要一个代表我的班级的值。 我附上我的prototxt文件。

name: "Zeiler_conv5"

input: "data"
input_dim: 1
input_dim: 3
input_dim: 224
input_dim: 224

input: "rois"
input_dim: 1 # to be changed on-the-fly to num ROIs
input_dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
input_dim: 1
input_dim: 1

input: "labels"
input_dim: 1 # to be changed on-the-fly to match num ROIs
input_dim: 1
input_dim: 1
input_dim: 1

input: "bbox_targets"
input_dim: 1  # to be changed on-the-fly to match num ROIs
input_dim: 84 # 4 * (K+1) (=21) classes
input_dim: 1
input_dim: 1

input: "bbox_loss_weights"
input_dim: 1  # to be changed on-the-fly to match num ROIs
input_dim: 84 # 4 * (K+1) (=21) classes
input_dim: 1
input_dim: 1

input: "angle_head"
input_dim: 1  # to be changed on-the-fly to match num ROIs
input_dim: 9 # 9 (-180:45:180) classes
input_dim: 1
input_dim: 1

input: "angle_head_weight"
input_dim: 1  # to be changed on-the-fly to match num ROIs
input_dim: 9 # 9 (-180:45:180) classes
input_dim: 1
input_dim: 1


layer {
    name: "conv1"
    type: "Convolution"
    bottom: "data"
    top: "conv1"
    param {
        lr_mult: 0.0
    }
    param {
        lr_mult: 0.0
    }
    convolution_param {
        num_output: 96
        kernel_size: 7
        pad: 3
        stride: 2
        weight_filler {
            type: "gaussian"
            std: 0.01
        }
        bias_filler {
            type: "constant"
            value: 0
        }
    }
}

layer {
    name: "relu1"
    type: "ReLU"
    bottom: "conv1"
    top: "conv1"
}

layer {
    name: "norm1"
    type: "LRN"
    bottom: "conv1"
    top: "norm1"
    lrn_param {
        local_size: 3
        alpha: 0.00005
        beta: 0.75
        norm_region: WITHIN_CHANNEL
    }
}

layer {
    name: "pool1"
    type: "Pooling"
    bottom: "norm1"
    top: "pool1"
    pooling_param {
        kernel_size: 3
        stride: 2
        pad: 1
        pool: MAX
    }
}

layer {
    name: "conv2"
    type: "Convolution"
    bottom: "pool1"
    top: "conv2"
    param {
        lr_mult: 0.0
    }
    param {
        lr_mult: 0.0
    }
    convolution_param {
        num_output: 256
        kernel_size: 5
        pad: 2
        stride: 2
        weight_filler {
            type: "gaussian"
            std: 0.01
        }
        bias_filler {
            type: "constant"
            value: 1
        }
    }
}

layer {
    name: "relu2"
    type: "ReLU"
    bottom: "conv2"
    top: "conv2"
}

layer {
    name: "norm2"
    type: "LRN"
    bottom: "conv2"
    top: "norm2"
    lrn_param {
        local_size: 3
        alpha: 0.00005
        beta: 0.75
        norm_region: WITHIN_CHANNEL
    }
}

layer {
    name: "pool2"
    type: "Pooling"
    bottom: "norm2"
    top: "pool2"
    pooling_param {
        kernel_size: 3
        stride: 2
        pad: 1
        pool: MAX
    }
}

layer {
    name: "conv3"
    type: "Convolution"
    bottom: "pool2"
    top: "conv3"
    param {
        lr_mult: 0.0
    }
    param {
        lr_mult: 0.0
    }
    convolution_param {
        num_output: 384
        kernel_size: 3
        pad: 1
        stride: 1
        weight_filler {
            type: "gaussian"
            std: 0.01
        }
        bias_filler {
            type: "constant"
            value: 0
        }
    }
}

layer {
    name: "relu3"
    type: "ReLU"
    bottom: "conv3"
    top: "conv3"
}

layer {
    name: "conv4"
    type: "Convolution"
    bottom: "conv3"
    top: "conv4"
    param {
        lr_mult: 0.0
    }
    param {
        lr_mult: 0.0
    }
    convolution_param {
        num_output: 384
        kernel_size: 3
        pad: 1
        stride: 1
        weight_filler {
            type: "gaussian"
            std: 0.01
        }
        bias_filler {
            type: "constant"
            value: 1
        }
    }
}

layer {
    name: "relu4"
    type: "ReLU"
    bottom: "conv4"
    top: "conv4"
}

layer {
    name: "conv5"
    type: "Convolution"
    bottom: "conv4"
    top: "conv5"
    param {
        lr_mult: 0.0
    }
    param {
        lr_mult: 0.0
    }
    convolution_param {
        num_output: 256
        kernel_size: 3
        pad: 1
        stride: 1
        weight_filler {
            type: "gaussian"
            std: 0.01
        }
        bias_filler {
            type: "constant"
            value: 1
        }
    }
}

layer {
    name: "relu5"
    type: "ReLU"
    bottom: "conv5"
    top: "conv5"
}

layer {
    bottom: "conv5"
    bottom: "rois"
    top: "pool5"
    name: "roi_pool5"
    type: "ROIPooling"
    roi_pooling_param {
        pooled_w: 6
        pooled_h: 6
        spatial_scale: 0.0625  # (1/16)
    }
}

layer {
    bottom: "pool5"
    top: "fc6"
    name: "fc6"
    param {
        lr_mult: 1.0
    }
    param {
        lr_mult: 2.0
    }
    type: "InnerProduct"
    inner_product_param {
        num_output: 4096
    }
}

layer {
    bottom: "fc6"
    top: "fc6"
    name: "relu6"
    type: "ReLU"
}

layer {
    bottom: "fc6"
    top: "fc6"
    name: "drop6"
    type: "Dropout"
    dropout_param {
        dropout_ratio: 0.5
        scale_train: false
    }
}

layer {
    bottom: "fc6"
    top: "fc7"
    name: "fc7"
    param {
        lr_mult: 1.0
    }
    param {
        lr_mult: 2.0
    }
    type: "InnerProduct"
    inner_product_param {
        num_output: 4096
    }
}

layer {
    bottom: "fc7"
    top: "fc7"
    name: "relu7"
    type: "ReLU"
}

layer {
    bottom: "fc7"
    top: "fc7"
    name: "drop7"
    type: "Dropout"
    dropout_param {
        dropout_ratio: 0.5
        scale_train: false
    }
}

layer {
    bottom: "fc7"
    top: "cls_score"
    name: "cls_score"
    param {
        lr_mult: 1.0
    }
    param {
        lr_mult: 2.0
    }
    type: "InnerProduct"
    inner_product_param {
        num_output: 21
        weight_filler {
            type: "gaussian"
            std: 0.01
        }
        bias_filler {
            type: "constant"
            value: 0
        }
    }
}

layer {
    bottom: "fc7"
    top: "angle_pred"
    name: "angle_pred"
    type: "InnerProduct"
    param {
        lr_mult: 1.0
    }
    param {
        lr_mult: 2.0
    }
    inner_product_param {
        num_output: 9
        weight_filler {
            type: "gaussian"
            std: 0.001
        }
        bias_filler {
            type: "constant"
            value: 0
        }
    }
}

layer {
    bottom: "fc7"
    top: "bbox_pred"
    name: "bbox_pred"
    type: "InnerProduct"
    param {
        lr_mult: 1.0
    }
    param {
        lr_mult: 2.0
    }
    inner_product_param {
        num_output: 84
        weight_filler {
            type: "gaussian"
            std: 0.001
        }
        bias_filler {
            type: "constant"
            value: 0
        }
    }
}

layer {
    name: "loss"
    type: "SoftmaxWithLoss"
    bottom: "cls_score"
    bottom: "labels"
    top: "loss_cls"
    loss_weight: 0
}

layer {
    name: "accuarcy"
    type: "Accuracy"
    bottom: "cls_score"
    bottom: "labels"
    top: "accuarcy"
}

layer {
    name: "loss_angle"
    type: "SmoothL1Loss"
    bottom: "angle_pred"
    bottom: "angle_head"
    bottom: "angle_head_weight"
    top: "loss_angle"
    loss_weight: 1
}

layer {
    name: "loss_bbox"
    type: "SmoothL1Loss"
    bottom: "bbox_pred"
    bottom: "bbox_targets"
    bottom: "bbox_loss_weights"
    top: "loss_bbox"
    loss_weight: 0
}

`

Answer 1

您已在输出层中使用softmax函数将神经网络模型生成的分数转换为概率。 现在，您需要考虑最大概率，并且与最大概率相关的类就是您的答案。 顺便说一句，这是什么意思，softmax函数无效吗？ Softmax函数还将为您提供一个向量，而不是单个值。 您可以从概率中确定分类器预测的最终分类是什么。

Answer 2

我认为您上传的文件是培训原型文件。 您已经使用了SoftmaxWithLoss层。 这一层不会给您概率。 在部署期间将其替换为SoftMax层，以获取每个类别的概率。

关于分类而不是使用深度学习进行回归的问题

问题描述

2 个解决方案

解决方案1
0 2016-10-24 01:09:37

解决方案2
0 已采纳 2016-10-25 07:51:02

关于分类而不是使用深度学习进行回归的问题

问题描述

2 个解决方案

解决方案1 0 2016-10-24 01:09:37

解决方案2 0 已采纳 2016-10-25 07:51:02

解决方案1
0 2016-10-24 01:09:37

解决方案2
0 已采纳 2016-10-25 07:51:02