35 lines
646 B
Plaintext
35 lines
646 B
Plaintext
#include <stdio.h>
|
|
|
|
extern "C" __device__ void add_u32(
|
|
ulonglong4 *out_c,
|
|
ulonglong4 in_a,
|
|
ulonglong4 in_b
|
|
);
|
|
|
|
__constant__ char ok[] = "ok";
|
|
__constant__ char not_ok[] = "not ok";
|
|
|
|
__global__ void kernel(char *buf) {
|
|
ulonglong4 a = {0, 1, 2, 3};
|
|
ulonglong4 b = {1, 1, 1, 1};
|
|
ulonglong4 c = {1, 2, 3, 4};
|
|
|
|
add_u32(&c, a, b);
|
|
|
|
memcpy(buf, ok, sizeof(ok));
|
|
}
|
|
|
|
int main() {
|
|
char h_buf[32];
|
|
char *d_buf;
|
|
cudaMalloc(&d_buf, 32);
|
|
|
|
kernel<<<1, 1>>>(d_buf);
|
|
|
|
cudaDeviceSynchronize();
|
|
cudaMemcpy(h_buf, d_buf, 32, cudaMemcpyDeviceToHost);
|
|
|
|
printf("%s\n", h_buf);
|
|
cudaFree(d_buf);
|
|
return 0;
|
|
} |