0_Simple__template
2024-08-30 04:27:56
简单的 CUDA 应用模板,白送的 Sample。
▶ 源代码
//template_cpu.cpp
extern "C" void computeGold(float *, const unsigned int); void computeGold(float *idata, const unsigned int len)
{
const float f_len = static_cast<float>(len);
for (unsigned int i = ; i < len; ++i)
idata[i] *= f_len;
}
// template.cu
#include <stdio.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <helper_functions.h> extern "C" void computeGold(float *, const unsigned int); __global__ void testKernel(float *g_idata, float *g_odata)
{
extern __shared__ float sdata[];
const unsigned int tid = threadIdx.x; sdata[tid] = g_idata[tid];
__syncthreads();
sdata[tid] = (float)blockDim.x * sdata[tid];
__syncthreads();
g_odata[tid] = sdata[tid];
} int main()
{
printf("\n\tStart.\n"); cudaSetDevice();
StopWatchInterface *timer = ;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); unsigned int num_threads = ;
unsigned int mem_size = sizeof(float) * num_threads;
float *h_idata, *h_odata, *d_idata, *d_odata;
h_idata = (float *)malloc(mem_size);
h_odata = (float *)malloc(mem_size);
cudaMalloc((void **) &d_idata, mem_size);
cudaMalloc((void **)&d_odata, mem_size);
for (unsigned int i = ; i < num_threads; ++i)
h_idata[i] = (float)i;
cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice); testKernel << < dim3(, , ), dim3(num_threads, , ), mem_size >> > (d_idata, d_odata);
//getLastCudaError("Kernel execution failed");// 检查内核调用的报错结果
cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\tProcessing time: %f ms\n", sdkGetTimerValue(&timer));
sdkDeleteTimer(&timer); computeGold(h_idata, num_threads);
printf("\n\tFinish, return %s.\n", compareData(h_idata, h_odata, num_threads, 0.0f,0.0f) ? "Passed" : "Failed"); free(h_idata);
free(h_odata);
cudaFree(d_idata);
cudaFree(d_odata);
getchar();
return ;
}
▶ 输出结果:
Start. Processing time: 101.169357 ms Finish, return Passed.
▶ 涨姿势:没有
最新文章
- Java集合-Python数据结构比较
- Reactjs-JQuery-Vuejs-Extjs-Angularjs对比
- git 常用操作
- lambda的使用ret = filter(lambda x : x >; 22 ,[11,22,33,44])
- css3 盒模型记
- phpcms 调用全站最新发布数据
- new String[0]的作用
- [置顶] fmt日期格式化
- cocos2d-x学习资源汇总(持续更新。。。)
- Override/implements methods 如何添加
- Quick Cocos2dx 调试问题
- Python源码分析
- canvas生成海报
- mydumper安装及使用
- [erlang] mnesia
- HTML5-CSS3-JavaScript(2)
- ";1001. A+B Format (20)"; 解题报告
- FlashDevelop导入swc库
- SVM之SMO算法(转)
- 502 bad gateway,ngix