티스토리 뷰

배열 A의 값과 배열 B의 값을 더해서 배열 C에 저장하는 간단한 예제를 작성해보았다.

C프로그래밍에 비해 설정이 많기 때문에 상대적으로 난이도가 높은 것 같다.

아래는 실행코드이다.


#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <strings.h>
#include <CL/cl.h>

void vec_add(float *A, float *B, float *C, int N);
void verify(float *C1, float *C2, int N);
void vec_add_opencl(float *A, float *B, float *C, int N);

int main(int argc, char *argv[])
{
    int i;
    int N = 4096;

    if(argc == 2) N = atoi(argv[1]);

    float *A = (float *)malloc(sizeof(float) * N);
    float *B = (float *)malloc(sizeof(float) * N);
    float *C1 = (float *)malloc(sizeof(float) * N);
    float *C2 = (float *)malloc(sizeof(float) * N);

    srand(0);
    for(i = 0; i < N ; i++)
    {
        A[i] = (float)(rand() % 1000) / 100.0f;
        B[i] = (float)(rand() % 1000) / 100.0f;
    }

    printf("== Vector Addition ==\n");
    printf("N : %d\n", N);
    
    vec_add_opencl(A, B, C1, N);
    vec_add(A, B, C2, N);
    verify(C1, C2, N);

    free(A);
    free(B);
    free(C1);
    free(C2);

    return EXIT_SUCCESS;
}

void vec_add(float *A, float *B, float *C, int N)
{
    int i;
    for( i = 0; i < N ; i++)
        C[i] = A[i] + B[i];
}

void verify(float *C1, float *C2, int N)
{
    int passed = 1;
    int i;
    for( i = 0 ; i < N ; i++)
    {
        if(C1[i] != C2[i])
        {
            fprintf(stderr, "%d: C1=%.2f vs. C2=%.2f\n", i, C1[i], C2[i]);
            passed = 0;
        }
    }

    if(passed)
    {
        printf("PASSED.\n");
    }
}

#define CHECK_ERROR(err)\
    if(err != CL_SUCCESS){\
        fprintf(stderr, "[%s:%d] ERROR : %d\n", __FILE__, __LINE__, err);\
        exit(EXIT_FAILURE);\
    }\

void vec_add_opencl(float *A, float *B, float *C, int N)
{
    cl_platform_id        *platform;
    cl_device_type        dev_type = CL_DEVICE_TYPE_DEFAULT;
    cl_device_id        dev;
    cl_context            context;
    cl_command_queue    cmd_queue;
    cl_program            program;
    cl_kernel            kernel;
    cl_mem                mem_A, mem_B, mem_C;
    cl_int                err;
    cl_uint                num_platforms;
    cl_uint                num_dev = 0;

    int i;
    
    //Get the device type to use from the environmental variable.

    char *dtype = getenv("CL_DEV_TYPE");

    if(dtype)
    {
        if(strcasecmp(dtype, "cpu") == 0)
        {
            dev_type = CL_DEVICE_TYPE_CPU;
        } else if(strcasecmp(dtype, "gpu") == 0)
        {
            dev_type = CL_DEVICE_TYPE_GPU;
        }
    }
    
    // Platform
    err = clGetPlatformIDs(0, NULL, &num_platforms);
    CHECK_ERROR(err);
    if(num_platforms == 0)
    {
        fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__, __LINE__);
        exit(EXIT_FAILURE);
    }
    printf("Number of platforms : %u\n", num_platforms);
    platform = (cl_platform_id*) malloc(sizeof(cl_platform_id) * num_platforms);
    err = clGetPlatformIDs(num_platforms, platform, NULL);
    CHECK_ERROR(err);

    // Device
    for( i = 0; i < num_platforms ; i++)
    {
        err = clGetDeviceIDs(platform[i], dev_type, 1, &dev, &num_dev);
        if(err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err);
        if(num_dev == 1) break;
    }
    if(num_dev < 1)
    {
        fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__);
        exit(EXIT_FAILURE);
    }

    // Print the device name.
    size_t name_size;
    clGetDeviceInfo(dev, CL_DEVICE_NAME, 0, NULL, &name_size);
    char *dev_name = (char *)malloc(name_size + 1);
    err = clGetDeviceInfo(dev, CL_DEVICE_NAME, name_size, dev_name, NULL);
    CHECK_ERROR(err);
    printf("Device : %s\n", dev_name);
    free(dev_name);

    // Context
    context = clCreateContext(NULL, 1, &dev, NULL, NULL, &err);
    CHECK_ERROR(err);

    // Command queue
    cmd_queue = clCreateCommandQueue(context, dev, 0, &err);
    CHECK_ERROR(err);

    // Create a program.
    char *source_code = "__kernel void vec_add(__global float *A, __global float *B, __global float *C, int N){\n"
        " int i = get_global_id(0);\n"
        " if (i < N) {\n"
        "   C[i] = A[i] + B[i];\n"
        " }\n"
        "}\n";
    size_t source_len = strlen(source_code);
    program = clCreateProgramWithSource(context, 1, (const char **)&source_code, &source_len, &err);
    CHECK_ERROR(err);

    // Build the program.
    err = clBuildProgram(program, 1, &dev, NULL, NULL, NULL);
    if(err != CL_SUCCESS)
    {
        //Print the build log.
        size_t log_size;
        clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
        char *log = (char*)malloc(log_size + 1);
        clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
        fprintf(stderr, "\n-----------BUILD LOG------------\n%s\n",log);
        fprintf(stderr, "-----------------------------------------\n");
        free(log);

        CHECK_ERROR(err);
    }

    // kernel
    kernel = clCreateKernel(program, "vec_add", &err);
    CHECK_ERROR(err);

    // Buffers
    mem_A = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * N, A, &err);
    
    CHECK_ERROR(err);
    
    mem_B = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * N, B, &err);
    
    CHECK_ERROR(err);
    
    mem_C = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * N, NULL, &err);
    
    CHECK_ERROR(err);

    // Set the arguments.
    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_A);
    CHECK_ERROR(err);
    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_B);
    CHECK_ERROR(err);
    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_C);
    CHECK_ERROR(err);
    err = clSetKernelArg(kernel, 3, sizeof(int), &N);
    CHECK_ERROR(err);

    // Enqueue the kernel.
    size_t lws[1] = {128};
    size_t gws[1];
    gws[0] = (size_t)ceil((double)N / lws[0]) * lws[0];
    err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, gws, lws, 0, NULL, NULL);

    CHECK_ERROR(err);

    // Read the result.
    err = clEnqueueReadBuffer(cmd_queue, mem_C, CL_TRUE, 0, sizeof(float) * N, C, 0, NULL, NULL);

    CHECK_ERROR(err);

    // Release
    clReleaseMemObject(mem_A);
    clReleaseMemObject(mem_B);
    clReleaseMemObject(mem_C);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(cmd_queue);
    clReleaseContext(context);
    free(platform);
}


// Makefile도 같이 올린다

TARGET = vec_add
OBJS = vec_add.o

CC = gcc
INCLUDES = 
CFLAGS = -O2 -Wall $(INCLUDES)
LDFLAGS = -lm -lOpenCL

all: $(TARGET)

$(TARGET): $(OBJS)
             $(CC) $(LDFLAGS) $(OBJS) -o $@
.c.o:
     $(CC) $(CFLAGS) -c $< -o $@

clean:
    rm -f $(OBJS) $(TARGET)



공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
TAG
more
«   2024/04   »
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30
글 보관함