计算机系统应用教程网站

网站首页 > 技术文章 正文

基于Caffe的DeepID2实现(下)

btikc 2024-09-06 18:09:59 技术文章 17 ℃ 0 评论

小喵的唠叨话:这次的博客,真心累伤了小喵的心。但考虑到知识需要巩固和分享,小喵决定这次把剩下的内容都写完。

四、数据的重整,简单的划分

前面的Data层用于生成成对的输入数据,Normalization层,用于将feature归一化,那么之后是不是就可以使用ContrastiveLoss层进行训练了呢?

且慢,还差一步。

ContrastiveLoss层要求有3个bottom:feature1、feature2以及表示对位的feature是否为同一个identity的label。

我们现在得到的feature却是所有的都在一起,data层直接得到的label也和这里要求的label不同。因此务必要对数据进行一次重整。

一个简单的规则就是按照奇偶,将feature划分成两部分。这样得到的两部分正好就是相同位置为一对。对于label的重整,也可以用类似的方法。小喵这里只对feature进行重整,而label的处理则是通过改ContrastiveLoss层来实现。

feature的重整本质上就是一个切片的操作,这里命名为id2_slice_layer,实现方法就是按照奇偶把bottom的数据复制到top。后馈的时候,也就是将两部分的feature的diff都直接复制到对应位置的bottom_diff中,具体实现如下:

 1 // created by miao
 2 #ifndef CAFFE_ID2_SLICE_LAYER_HPP_
 3 #define CAFFE_ID2_SLICE_LAYER_HPP_
 4 
 5 #include <vector>
 6 
 7 #include "caffe/blob.hpp"
 8 #include "caffe/layer.hpp"
 9 #include "caffe/proto/caffe.pb.h"
10 
11 namespace caffe {
12 
13 /**
14  * @brief Takes a Blob and slices it along either the num or channel dimension,
15  *        outputting multiple sliced Blob results.
16  *
17  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
18  */
19 template <typename Dtype>
20 class Id2SliceLayer : public Layer<Dtype> {
21  public:
22   explicit Id2SliceLayer(const LayerParameter& param)
23       : Layer<Dtype>(param) {}
24   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
25       const vector<Blob<Dtype>*>& top);
26   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
27       const vector<Blob<Dtype>*>& top);
28 
29   virtual inline const char* type const { return "Id2Slice"; }
30   virtual inline int ExactNumBottomBlobs const { return 1; }
31   virtual inline int MinTopBlobs const { return 1; }
32 
33  protected:
34   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
35       const vector<Blob<Dtype>*>& top);
36   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
37       const vector<Blob<Dtype>*>& top);
38   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
39       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
40   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
41       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
42 };
43 
44 }  // namespace caffe
45 
46 #endif  // CAFFE_ID2_SLICE_LAYER_HPP_

头文件,巨简单。。。

Cpp的代码,也非常简单,要注意id2_slice层的top有两个,每个的形状都是bottom的一半。

 1 // created by miao
 2 #include <algorithm>
 3 #include <vector>
 4 
 5 #include "caffe/layers/id2_slice_layer.hpp"
 6 #include "caffe/util/math_functions.hpp"
 7 
 8 namespace caffe {
 9 
10 template <typename Dtype>
11 void Id2SliceLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
12       const vector<Blob<Dtype>*>& top) {
13 }
14 
15 template <typename Dtype>
16 void Id2SliceLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
17       const vector<Blob<Dtype>*>& top) {
18     vector<int> top_shape = bottom[0]->shape;
19     top_shape[0] /= 2;
20     top[0]->Reshape(top_shape);
21     top[1]->Reshape(top_shape);
22 }
23 
24 template <typename Dtype>
25 void Id2SliceLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
26       const vector<Blob<Dtype>*>& top) {
27     const int feature_size = bottom[0]->count(1);
28     for (int n = 0; n < bottom[0]->num; ++ n) {
29         caffe_copy(
30  feature_size, 
31 bottom[0]->cpu_data + n * feature_size, 
32 top[n & 1]->mutable_cpu_data + (n / 2) * feature_size
33  );
34     }
35 }
36 
37 template <typename Dtype>
38 void Id2SliceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
39       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
40     const int feature_size = bottom[0]->count(1);
41     for (int n = 0; n < bottom[0]->num; ++ n) {
42         caffe_copy(
43  feature_size,
44 top[n & 1]->cpu_diff + (n / 2) * feature_size,
45 bottom[0]->mutable_cpu_diff + n * feature_size
46  );
47     }
48 }
49 
50 #ifdef CPU_ONLY
51 STUB_GPU(Id2SliceLayer);
52 #endif
53 
54 INSTANTIATE_CLASS(Id2SliceLayer);
55 REGISTER_LAYER_CLASS(Id2Slice);
56 
57 }  // namespace caffe

GPU上的实现,为了简单起见,也是直接调用了CPU的前馈函数。

 1 // created by miao
 2 #include <vector>
 3 
 4 #include "caffe/layers/id2_slice_layer.hpp"
 5 #include "caffe/util/math_functions.hpp"
 6 
 7 namespace caffe {
 8 template <typename Dtype>
 9 void Id2SliceLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
10       const vector<Blob<Dtype>*>& top) {
11     this->Forward_cpu(bottom, top);
12 }
13 
14 template <typename Dtype>
15 void Id2SliceLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
16       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
17     this->Backward_cpu(top, propagate_down, bottom);
18 }
19 
20 INSTANTIATE_LAYER_GPU_FUNCS(Id2SliceLayer);
21 
22 }  // namespace caffe

这样就完成了feature的重整。由于没有用到新的参数,因此也不需要修改caffe.proto。

亲可以仿照这个方法对label来做类似的操作。鉴于小喵比较懒。。。这里就只是简单的改ContrastiveLoss层的代码了。

第一步,在ContrastiveLossLayer中新增一个用于记录feature pair是否是同一个identity的成员变量,取代原本的第三个bottom的功能。这样只需要在前馈的时候提前算好,就可以代替之前的第三个bottom来使用,而不需要再修改别的地方的代码。

为了大家使用的方便,小喵直接把修改之后的头文件粘贴出来(删掉注释)。新增的行,用“added by miao”这个注释标注出来。头文件只加了一行。

 1 #ifndef CAFFE_CONTRASTIVE_LOSS_LAYER_HPP_
 2 #define CAFFE_CONTRASTIVE_LOSS_LAYER_HPP_
 3 
 4 #include <vector>
 5 
 6 #include "caffe/blob.hpp"
 7 #include "caffe/layer.hpp"
 8 #include "caffe/proto/caffe.pb.h"
 9 
10 #include "caffe/layers/loss_layer.hpp"
11 
12 namespace caffe {
13 template <typename Dtype>
14 class ContrastiveLossLayer : public LossLayer<Dtype> {
15  public:
16   explicit ContrastiveLossLayer(const LayerParameter& param)
17       : LossLayer<Dtype>(param), diff_ {}
18   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
19       const vector<Blob<Dtype>*>& top);
20 
21   virtual inline int ExactNumBottomBlobs const { return 3; }
22   virtual inline const char* type const { return "ContrastiveLoss"; }
23   virtual inline bool AllowForceBackward(const int bottom_index) const {
24     return bottom_index != 2;
25   }
26  protected:
27   /// @copydoc ContrastiveLossLayer
28   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
29       const vector<Blob<Dtype>*>& top);
30   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
31       const vector<Blob<Dtype>*>& top);
32   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
33       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
34   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
35       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
36 
37   Blob<Dtype> diff_;  // cached for backward pass
38   Blob<Dtype> dist_sq_;  // cached for backward pass
39   Blob<Dtype> diff_sq_;  // tmp storage for gpu forward pass
40   Blob<Dtype> summer_vec_;  // tmp storage for gpu forward pass
41   Blob<Dtype> is_same_; // added by miao
42 };
43 }  // namespace caffe
44 
45 #endif  // CAFFE_CONTRASTIVE_LOSS_LAYER_HPP_

源文件的修改也十分简单,这里只贴出来Cuda的部分。源文件,修改了与原来的bottom3相关的地方。

  1 #include <algorithm>
  2 #include <vector>
  3 #include <iostream>
  4 #include "caffe/layers/contrastive_loss_layer.hpp"
  5 #include "caffe/util/math_functions.hpp"
  6 
  7 namespace caffe {
  8 
  9 template <typename Dtype>
 10 void ContrastiveLossLayer<Dtype>::Forward_gpu(
 11     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 12   const int count = bottom[0]->count;
 13   caffe_gpu_sub(
 14       count,
 15       bottom[0]->gpu_data,  // a
 16       bottom[1]->gpu_data,  // b
 17       diff_.mutable_gpu_data);  // a_i-b_i
 18   caffe_gpu_powx(
 19       count,
 20       diff_.mutable_gpu_data,  // a_i-b_i
 21       Dtype(2),
 22       diff_sq_.mutable_gpu_data);  // (a_i-b_i)^2
 23   caffe_gpu_gemv(
 24       CblasNoTrans,
 25       bottom[0]->num,
 26       bottom[0]->channels,
 27       Dtype(1.0),
 28       diff_sq_.gpu_data,  // (a_i-b_i)^2
 29       summer_vec_.gpu_data,
 30       Dtype(0.0),
 31       dist_sq_.mutable_gpu_data);  // \Sum (a_i-b_i)^2
 32   Dtype margin = this->layer_param_.contrastive_loss_param.margin;
 33   bool legacy_version =
 34       this->layer_param_.contrastive_loss_param.legacy_version;
 35   Dtype loss(0.0);
 36   for (int i = 0; i < bottom[0]->num; ++i) {
 37     // added by miao
 38     is_same_.mutable_cpu_data[i] = (bottom[2]->cpu_data[2 * i] == bottom[2]->cpu_data[2 * i + 1])? 1:0;
 39     if (is_same_.cpu_data[i] == 1) {  // similar pairs
 40       loss += dist_sq_.cpu_data[i];
 41     } else {  // dissimilar pairs
 42       if (legacy_version) {
 43         loss += std::max(margin - dist_sq_.cpu_data[i], Dtype(0.0));
 44       } else {
 45         Dtype dist = std::max(margin - sqrt(dist_sq_.cpu_data[i]),
 46 Dtype(0.0));
 47         loss += dist*dist;
 48       }
 49     }
 50   }
 51   loss = loss / static_cast<Dtype>(bottom[0]->num) / Dtype(2);
 52   top[0]->mutable_cpu_data[0] = loss;
 53 }
 54 
 55 template <typename Dtype>
 56 __global__ void CLLBackward(const int count, const int channels,
 57     const Dtype margin, const bool legacy_version, const Dtype alpha,
 58     const Dtype* y, const Dtype* diff, const Dtype* dist_sq,
 59     Dtype *bottom_diff) {
 60   CUDA_KERNEL_LOOP(i, count) {
 61     int n = i / channels;  // the num index, to access y and dist_sq
 62     if (static_cast<int>(y[n])) {  // similar pairs
 63       bottom_diff[i] = alpha * diff[i];
 64     } else {  // dissimilar pairs
 65       Dtype mdist(0.0);
 66       Dtype beta(0.0);
 67       if (legacy_version) {
 68         mdist = (margin - dist_sq[n]);
 69         beta = -alpha;
 70       } else {
 71         Dtype dist = sqrt(dist_sq[n]);
 72         mdist = (margin - dist);
 73         beta = -alpha * mdist / (dist + Dtype(1e-4)) * diff[i];
 74       }
 75       if (mdist > 0.0) {
 76         bottom_diff[i] = beta;
 77       } else {
 78         bottom_diff[i] = 0;
 79       }
 80     }
 81   }
 82 }
 83 
 84 template <typename Dtype>
 85 void ContrastiveLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 86     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
 87   for (int i = 0; i < 2; ++i) {
 88     if (propagate_down[i]) {
 89       const int count = bottom[0]->count;
 90       const int channels = bottom[0]->channels;
 91       Dtype margin = this->layer_param_.contrastive_loss_param.margin;
 92       const bool legacy_version =
 93 this->layer_param_.contrastive_loss_param.legacy_version;
 94       const Dtype sign = (i == 0) ? 1 : -1;
 95       const Dtype alpha = sign * top[0]->cpu_diff[0] /
 96 static_cast<Dtype>(bottom[0]->num);
 97       // NOLINT_NEXT_LINE(whitespace/operators)
 98       CLLBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
 99  count, channels, margin, legacy_version, alpha,
100 is_same_.gpu_data,  // pair similarity 0 or 1  added by miao
101 diff_.gpu_data,  // the cached eltwise difference between a and b
102 dist_sq_.gpu_data,  // the cached square distance between a and b
103 bottom[i]->mutable_gpu_diff);
104       CUDA_POST_KERNEL_CHECK;
105     }
106   }
107 }
108 
109 INSTANTIATE_LAYER_GPU_FUNCS(ContrastiveLossLayer);
110 
111 }  // namespace caffe

需要注意的时候,前馈和后馈都需要做一点代码上的修改,虽说十分的简单,但也要小心。

至此,基于Caffe的DeepID2的修改全部完成。

最后再给出一个基于AlexNet得到的一个train.prototxt。(具体能不能训练,小喵不能保证,这里只是示意一下各个层如何使用)其中fc7是特征层,fc8是分类的结果。这里一共使用了两个loss。

name: "AlexNet"
layer {
    name: "data"
    type: "Python"
    top: "data"
    top: "label"
    include {
        phase: TRAIN
    }    
    python_param {
        module: "id2_data_layer"
        layer: "ld2_data_layer"
        param_str: "{'crop_size' : 128, 'batch_size' : 96, 'mean_file': '/path/to/mean_file', 'scale': 0.0078125, 'source': '/path/to/train_list', 'image_root_dir': '/path/to/image_root_dir'}"    
    }
}

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "conv1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "norm1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "conv2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "norm2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc8"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 10000
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8"
  bottom: "label"
  top: "loss"
}
layer {
  name: "norm_fea"
  type: "Normalization"
  bottom: "fc7"
  top: "norm_fea"
}
layer {
  name: "fea_slice"
  type: "Id2Slice"
  bottom: "norm_fea"
  top: "feature1"
  top: "feature2"
}
layer {
  name: "id2_loss"
  type: "ContrastiveLoss"
  bottom: "feature1"
  bottom: "feature2"
  bottom: "label"
  top: "id2_loss"
  contrastive_loss_param {
    margin: 1.0
  }
}

alextnet.prototxt

转载请注明出处~

Tags:

本文暂时没有评论,来添加一个吧(●'◡'●)

欢迎 发表评论:

最近发表
标签列表