faster rcnn在ubuntu环境下使用GPU模式并用cuDNN v5加速

这断时间跳进了faster rcnn这个坑里面，差点没爬起来。这篇文章主要讲在faster rcnn配置成中让cuDNN v5跑起来。主要讲gpu模式中，使用cudnn v5加速，关于其他的配置问题，建议大家仔细阅读官网资料，https://github.com/rbgirshick/py-faster-rcnn。毕竟每个人的电脑配置都不同。

本人使用的是CUDA8.0，cuDNN v5.1.首先要做到：

1.确认自己编译没有问题（就是不使用cuDNN的时候能编译通过）

2.CUDA8.0，cuDNN v5安装好了，并且没有问题。

原文地址：

https://github.com/ShuangLI59/person_search/pull/8/commits/622ff2b2f4a20bbc91f4828f3e85ffd6f22d1c75#diff-a3c8b6ea171736e9ddcf84d93b7d6364R75

废话不多说，如果要使用CUDNN，需要在makefile.config中使下面选项打开：

USE_CUDNN := 1

然后就需要修改下列文件：

1: caffe-fast-rcnn/include/caffe/layers/cudnn_relu_layer.hpp

在第40行+（表示添加）

cudnnHandle_t handle_;

cudnnTensorDescriptor_t bottom_desc_;

cudnnTensorDescriptor_t top_desc_;

+ cudnnActivationDescriptor_t activ_desc_;

};

2： caffe-fast-rcnn/include/caffe/layers/cudnn_sigmoid_layer.hpp

在第40行+

cudnnHandle_t handle_;

cudnnTensorDescriptor_t bottom_desc_;

cudnnTensorDescriptor_t top_desc_;

+ cudnnActivationDescriptor_t activ_desc_;

};

3： caffe-fast-rcnn/include/caffe/layers/cudnn_tanh_layer.hpp

在第40行+

cudnnHandle_t handle_;

cudnnTensorDescriptor_t bottom_desc_;

cudnnTensorDescriptor_t top_desc_;

+ cudnnActivationDescriptor_t activ_desc_;

};

4： caffe-fast-rcnn/include/caffe/util/cudnn.hpp

在第94行+，96-（’-‘表示删除）新生成的96行及

后面+

CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,

- n, c, h, w));

+ CUDNN_TENSOR_NCHW, n, c, h, w));

+#else

+ CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,

+ CUDNN_TENSOR_NCHW, n, c, h, w));

+#endif

}

删除131，132行，并重新添加内容

CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));

- CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,

- pad_h, pad_w, stride_h, stride_w));

+#if CUDNN_VERSION_MIN(5, 0, 0)

+ CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,

+ CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));

+#else

+ CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,

+ CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));

+#endif

+template <typename Dtype>

+inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,

+ cudnnActivationMode_t mode) {

+ CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));

+ CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,

+ CUDNN_PROPAGATE_NAN, Dtype(0)));

}

5： caffe-fast-rcnn/src/caffe/layers/cudnn_conv_layer.cu

删除33行，38-44行

if (this->bias_term_) {

const Dtype* bias_data = this->blobs_[1]->gpu_data();

-#if CUDNN_VERSION_MIN(4, 0, 0)

CUDNN_CHECK(cudnnAddTensor(handle_[g],

cudnn::dataType<Dtype>::one,

bias_desc_, bias_data + bias_offset_ * g,

cudnn::dataType<Dtype>::one,

top_descs_[i], top_data + top_offset_ * g));

-#else

- CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,

- cudnn::dataType<Dtype>::one,

- bias_desc_, bias_data + bias_offset_ * g,

- cudnn::dataType<Dtype>::one,

- top_descs_[i], top_data + top_offset_ * g));

-#endif

}

在第77行删除并添加：

if (this->param_propagate_down_[0]) {

const Dtype* bottom_data = bottom[i]->gpu_data();

- CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3(

+ CUDNN_CHECK(cudnnConvolutionBackwardFilter(

handle_[1*this->group_ + g],

cudnn::dataType<Dtype>::one,

bottom_descs_[i], bottom_data + bottom_offset_ * g,

在第95行删除并添加：

Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();

- CUDNN_CHECK(cudnnConvolutionBackwardData_v3(

+ CUDNN_CHECK(cudnnConvolutionBackwardData(

handle_[2*this->group_ + g],

cudnn::dataType<Dtype>::one,

6： caffe-fast-rcnn/src/caffe/layers/cudnn_relu_layer.cpp

在16行添加

cudnn::createTensor4dDesc<Dtype>(&top_desc_);

+ cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_RELU);

handles_setup_ = true;

}

7： caffe-fast-rcnn/src/caffe/layers/cudnn_relu_layer.cu

在第18行，20行，25行删除添加：

const Dtype* bottom_data = bottom[0]->gpu_data();

Dtype* top_data = top[0]->mutable_gpu_data();

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnActivationForward(this->handle_,

- CUDNN_ACTIVATION_RELU,

+ activ_desc_,

cudnn::dataType<Dtype>::one,

this->bottom_desc_, bottom_data,

cudnn::dataType<Dtype>::zero,

this->top_desc_, top_data));

+#else

+ CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,

+ activ_desc_,

+ cudnn::dataType<Dtype>::one,

+ this->bottom_desc_, bottom_data,

+ cudnn::dataType<Dtype>::zero,

+ this->top_desc_, top_data));

+#endif

}

template <typename Dtype>

在52，54，60行删除添加：

const Dtype* top_diff = top[0]->gpu_diff();

const Dtype* bottom_data = bottom[0]->gpu_data();

Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnActivationBackward(this->handle_,

- CUDNN_ACTIVATION_RELU,

+ activ_desc_,

cudnn::dataType<Dtype>::one,

this->top_desc_, top_data, this->top_desc_, top_diff,

this->bottom_desc_, bottom_data,

cudnn::dataType<Dtype>::zero,

this->bottom_desc_, bottom_diff));

+#else

+ CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,

+ activ_desc_,

+ cudnn::dataType<Dtype>::one,

+ this->top_desc_, top_data, this->top_desc_, top_diff,

+ this->bottom_desc_, bottom_data,

+ cudnn::dataType<Dtype>::zero,

+ this->bottom_desc_, bottom_diff));

+#endif

}

8： caffe-fast-rcnn/src/caffe/layers/cudnn_sigmoid_layer.cpp

15，16行添加：

cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);

cudnn::createTensor4dDesc<Dtype>(&top_desc_);

+ cudnn::createActivationDescriptor<Dtype>(&activ_desc_,

+ CUDNN_ACTIVATION_SIGMOID);

handles_setup_ = true;

9： caffe-fast-rcnn/src/caffe/layers/cudnn_sigmoid_layer.cu

在13，15，20行添加删除：

const Dtype* bottom_data = bottom[0]->gpu_data();

Dtype* top_data = top[0]->mutable_gpu_data();

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnActivationForward(this->handle_,

- CUDNN_ACTIVATION_SIGMOID,

+ activ_desc_,

cudnn::dataType<Dtype>::one,

this->bottom_desc_, bottom_data,

cudnn::dataType<Dtype>::zero,

this->top_desc_, top_data));

+#else

+ CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,

+ activ_desc_,

+ cudnn::dataType<Dtype>::one,

+ this->bottom_desc_, bottom_data,

+ cudnn::dataType<Dtype>::zero,

+ this->top_desc_, top_data));

+#endif

}

在42，44，50添加删除：

Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnActivationBackward(this->handle_,

- CUDNN_ACTIVATION_SIGMOID,

+ activ_desc_,

cudnn::dataType<Dtype>::one,

this->top_desc_, top_data, this->top_desc_, top_diff,

this->bottom_desc_, bottom_data,

cudnn::dataType<Dtype>::zero,

this->bottom_desc_, bottom_diff));

+#else

+ CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,

+ activ_desc_,

+ cudnn::dataType<Dtype>::one,

+ this->top_desc_, top_data, this->top_desc_, top_diff,

+ this->bottom_desc_, bottom_data,

+ cudnn::dataType<Dtype>::zero,

+ this->bottom_desc_, bottom_diff));

+#endif

}

10： caffe-fast-rcnn/src/caffe/layers/cudnn_tanh_layer.cpp

在16行添加：

cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);

cudnn::createTensor4dDesc<Dtype>(&top_desc_);

+ cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_TANH);

handles_setup_ = true;

11： caffe-fast-rcnn/src/caffe/layers/cudnn_tanh_layer.cu

在13，15，20行添加删除：

const Dtype* bottom_data = bottom[0]->gpu_data();

Dtype* top_data = top[0]->mutable_gpu_data();

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnActivationForward(this->handle_,

- CUDNN_ACTIVATION_TANH,

+ activ_desc_,

cudnn::dataType<Dtype>::one,

this->bottom_desc_, bottom_data,

cudnn::dataType<Dtype>::zero,

this->top_desc_, top_data));

+#else

+ CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,

+ activ_desc_,

+ cudnn::dataType<Dtype>::one,

+ this->bottom_desc_, bottom_data,

+ cudnn::dataType<Dtype>::zero,

+ this->top_desc_, top_data));

+#endif

}

在43，45，51行添加删除：

const Dtype* bottom_data = bottom[0]->gpu_data();

Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();

+#if CUDNN_VERSION_MIN(5, 0, 0)

CUDNN_CHECK(cudnnActivationBackward(this->handle_,

- CUDNN_ACTIVATION_TANH,

+ activ_desc_,

cudnn::dataType<Dtype>::one,

this->top_desc_, top_data, this->top_desc_, top_diff,

this->bottom_desc_, bottom_data,

cudnn::dataType<Dtype>::zero,

this->bottom_desc_, bottom_diff));

+#else

+ CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,

+ activ_desc_,

+ cudnn::dataType<Dtype>::one,

+ this->top_desc_, top_data, this->top_desc_, top_diff,

+ this->bottom_desc_, bottom_data,

+ cudnn::dataType<Dtype>::zero,

+ this->bottom_desc_, bottom_diff));

+#endif

}

然后重新编译makefile.config文件（记住先make clean后make）

后面我碰到的一个问题是

‘numpy.float64’ object cannot be interpreted as an index

主要原因是版本不支持问题，解决方法：sudo pip install -U numpy==1.11.0

希望大家能把GPU模式跑起来，毕竟cpu和gpu不是一个数量级的.cudnn还能够减少消耗的显存。

有什么安装，编译问题大家可以一起探讨！

网站首页 > 技术文章正文

猜你喜欢

本文暂时没有评论，来添加一个吧(●'◡'●)

取消回复欢迎你发表评论:

网站首页 > 技术文章 正文

faster rcnn在ubuntu环境下使用GPU模式并用cuDNN v5加速

猜你喜欢

本文暂时没有评论，来添加一个吧(●'◡'●)

取消回复欢迎 你 发表评论:

网站首页 > 技术文章正文

取消回复欢迎你发表评论: