简单的 CUDA 应用模板,白送的 Sample。

▶ 源代码

 //template_cpu.cpp
extern "C" void computeGold(float *, const unsigned int); void computeGold(float *idata, const unsigned int len)
{
const float f_len = static_cast<float>(len);
for (unsigned int i = ; i < len; ++i)
idata[i] *= f_len;
}
 // template.cu
#include <stdio.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h> extern "C" void computeGold(float *, const unsigned int); __global__ void testKernel(float *g_idata, float *g_odata)
{
extern __shared__ float sdata[];
const unsigned int tid = threadIdx.x; sdata[tid] = g_idata[tid];
__syncthreads();
sdata[tid] = (float)blockDim.x * sdata[tid];
__syncthreads();
g_odata[tid] = sdata[tid];
} int main()
{
printf("\n\tStart.\n"); cudaSetDevice();
StopWatchInterface *timer = ;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); unsigned int num_threads = ;
unsigned int mem_size = sizeof(float) * num_threads;
float *h_idata, *h_odata, *d_idata, *d_odata;
h_idata = (float *)malloc(mem_size);
h_odata = (float *)malloc(mem_size);
cudaMalloc((void **) &d_idata, mem_size);
cudaMalloc((void **)&d_odata, mem_size);
for (unsigned int i = ; i < num_threads; ++i)
h_idata[i] = (float)i;
cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice); testKernel << < dim3(, , ), dim3(num_threads, , ), mem_size >> > (d_idata, d_odata);
//getLastCudaError("Kernel execution failed");// 检查内核调用的报错结果
cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\tProcessing time: %f ms\n", sdkGetTimerValue(&timer));
sdkDeleteTimer(&timer); computeGold(h_idata, num_threads);
printf("\n\tFinish, return %s.\n", compareData(h_idata, h_odata, num_threads, 0.0f,0.0f) ? "Passed" : "Failed"); free(h_idata);
free(h_odata);
cudaFree(d_idata);
cudaFree(d_odata);
getchar();
return ;
}

▶ 输出结果:

    Start.

    Processing time: 101.169357 ms

    Finish, return Passed.

▶ 涨姿势:没有

最新文章

  1. Java集合-Python数据结构比较
  2. Reactjs-JQuery-Vuejs-Extjs-Angularjs对比
  3. git 常用操作
  4. lambda的使用ret = filter(lambda x : x &gt; 22 ,[11,22,33,44])
  5. css3 盒模型记
  6. phpcms 调用全站最新发布数据
  7. new String[0]的作用
  8. [置顶] fmt日期格式化
  9. cocos2d-x学习资源汇总(持续更新。。。)
  10. Override/implements methods 如何添加
  11. Quick Cocos2dx 调试问题
  12. Python源码分析
  13. canvas生成海报
  14. mydumper安装及使用
  15. [erlang] mnesia
  16. HTML5-CSS3-JavaScript(2)
  17. &quot;1001. A+B Format (20)&quot; 解题报告
  18. FlashDevelop导入swc库
  19. SVM之SMO算法(转)
  20. 502 bad gateway,ngix

热门文章

  1. 51Nod 1009:1009 数字1的数量 (思维)
  2. 蓝桥杯 ALGO-1:区间k大数查询
  3. hdu5228
  4. Python &amp; 机器学习入门指导
  5. html 子元素和父元素都监听了 click 事件,点击子元素时为何先触发的是父元素的 click 事件?
  6. sublime text3安装 mac os汉化/常用模块
  7. PHP中文件类型 文件属性 路径以及 文件相关的函数
  8. 常用Web框架
  9. Java技术专题之JVM你的内存泄露了吗?
  10. 操作系统:Android(Google公司开发的操作系统)