티스토리 뷰
#include <stdio.h>
#include <sys/time.h>
#include <unistd.h>
#include <stdlib.h>
#include "timers.h"
#define NDIM 1024
#define MIN(a,b) ((a) < (b) ? (a) : (b))
float a[NDIM][NDIM];
float b[NDIM][NDIM];
float c[NDIM][NDIM];
int print_matrix = 0;
int validation = 0;
int tiling = 0;
int ijk = 1;
int kij = 0;
int jki = 0;
void mat_mul_blocked(float c[NDIM][NDIM], float a[NDIM][NDIM], float b[NDIM][NDIM])
{
int i, j, k, ii, jj, kk;
int B = 16;
for(ii = 0; ii < NDIM; ii += B)
{
for(jj = 0; jj < NDIM ; jj += B)
{
for(kk = 0; kk < NDIM ; kk += B)
{
for(i = ii ; i < MIN(ii+B, NDIM) ; i++)
{
for(j = jj ; j < MIN(jj+B, NDIM); j++)
{
for(k = kk ; k < MIN(kk+B, NDIM); k++)
{
c[i][j] += a[i][k] * b[k][j];
}
}
}
}
}
}
}
void mat_mul_ijk(float c[NDIM][NDIM],float a[NDIM][NDIM],float b[NDIM][NDIM])
{
int i, j, k;
float sum;
//C = AB
for(i = 0; i < NDIM ; i++)
{
for(j = 0; j < NDIM ; j++)
{
sum = 0;
for( k = 0; k < NDIM ; k++)
{
sum += a[i][k] * b[k][j];
}
c[i][j] = sum;
}
}
}
void mat_mul_kij(float c[NDIM][NDIM],float a[NDIM][NDIM],float b[NDIM][NDIM])
{
int i, j, k;
float r;
//C = AB
for(k = 0; k < NDIM ; k++)
{
for(i = 0; i < NDIM ; i++)
{
r = a[i][k];
for( j = 0; j < NDIM ; j++)
{
c[i][j] += r * b[k][j];
}
}
}
}
void mat_mul_jki(float c[NDIM][NDIM],float a[NDIM][NDIM],float b[NDIM][NDIM])
{
int i, j, k;
float r;
//C = AB
for(j = 0; j < NDIM ; j++)
{
for(k = 0; k < NDIM ; k++)
{
r = b[k][j];
for( i = 0; i < NDIM ; i++)
{
c[i][j] += a[i][k] * r;
}
}
}
}
void check_mat_mul(float c[NDIM][NDIM], float a[NDIM][NDIM], float b[NDIM][NDIM])
{
int i, j, k;
float sum;
int validated = 1;
printf("Validating the result..\n");
// C = AB
for( i = 0; i < NDIM ; i++)
{
for(j = 0; j < NDIM ; j++)
{
sum = 0;
for(k = 0; k < NDIM ; k++)
{
sum += a[i][k] * b[k][j];
}
if(c[i][j] != sum)
{
printf("c[%d][%d] is differ(value=%lf "
"correct_value=%lf)!!\n",i,j,c[i][j],sum);
validated = 0;
}
}
}
printf("Validation : ");
if(validated) printf("SUCCESSFUL!!\n");
else printf("FAILED...\n");
}
void print_mat(float mat[NDIM][NDIM])
{
int i,j;
for(i = 0; i < NDIM; i++)
{
for(j = 0 ; j < NDIM; j++)
{
printf("%8.2lf ", mat[i][j]);
}
printf("\n");
}
}
void print_help(const char* prog_name)
{
printf("Usage: %s [-pvh]\n", prog_name);
printf("\n");
printf("OPTIONS\n");
printf(" -b : matrix multiplication (ikj) - blocked\n");
printf(" -p : print matrix data.\n");
printf(" -v : validate matrix multiplication.\n");
printf(" -h : print this page.\n");
printf(" -i : matrix multiplication (ikj) "
"(default).\n");
printf(" -k : matrix multiplication (kij).\n");
printf(" -j : matrix multiplication (jki).\n");
}
void parse_opt(int argc, char** argv)
{
int opt;
while( (opt = getopt(argc, argv, "bpvhikjs:")) != -1)
{
switch(opt)
{
case 'b' : tiling = 1;
break;
case 'p' : print_matrix = 1;
break;
case 'v' : validation = 1;
break;
case 'i' : ijk = 1; kij = 0; jki = 0;
break;
case 'k' : kij = 1; ijk = 0; jki = 0;
break;
case 'j' : jki = 1; ijk = 0; kij = 0;
break;
case 'h':
default:
print_help(argv[0]);
exit(0);
break;
}
}
}
int main(int argc, char** argv)
{
int i, j, k = 1;
parse_opt(argc, argv);
for(i=0; i<NDIM; i++)
{
for(j=0; j<NDIM; j++)
{
a[i][j] = k;
b[i][j] = k;
k++;
}
}
printf("tiling %d\n",tiling);
timer_start(1);
if(tiling == 1)
{
printf("Matrix multiply ijk - Tiling...\n");
mat_mul_blocked(c, a, b);
}
else if(ijk == 1)
{
printf("Matrix multiply ijk ... \n");
mat_mul_ijk(c, a, b);
}
else if(kij == 1)
{
printf("Matrix multiply kij ... \n");
mat_mul_kij(c, a, b);
}
else
{
printf("Matrix multiply jki ... \n");
mat_mul_jki(c, a, b);
}
timer_stop(1);
printf("Time elapsed : %lf sec\n", timer_read(1));
if(validation) check_mat_mul(c, a, b);
if(print_matrix)
{
printf("MATRIX A : \n");
print_mat(a);
printf("MATRIX B : \n");
print_mat(b);
printf("MATRIX C : \n");
print_mat(c);
}
return 0;
}
' la fermata, 개발 > Pthread' 카테고리의 다른 글
False sharing 예제 (0) | 2019.02.19 |
---|---|
Thread Affinity 사용해보기 (0) | 2019.02.19 |
큐를 이용한 주사위 확률 구하기 (0) | 2019.02.19 |
병렬 계수 정렬(Parallel counting sort (0) | 2019.02.19 |
fetch and add, test and set(TAS) 사용해보기 (0) | 2019.02.19 |