diff --git a/example/README.md b/example/README.md new file mode 100644 index 0000000..e9241c0 --- /dev/null +++ b/example/README.md @@ -0,0 +1,34 @@ +# Description +Simple SpMV (Spare Matrix Vector) based GEMV kernel for cpu-pin tool example. Each ROI has either sparse gather or scatter instructions, or vectorization target for vector instruction. + +# Matrix Input +_.mtx_ file is used as an input from https://sparse.tamu.edu/HB/. Each _.mtx_ has a banner on the top in the form of _MatrixMarket banner1 banner2 banner3 ..._ For this example _matrix, coordinate, real_ are required and only compatible. + +# ROI + - spmv_serial: serial gemv kernel (activated w/o -t) + - spmv_omp: omp gemv kernel (activated w/ -t ) + - set_sparse_vector: scatter based sparse operand vector generator + +# Use +## Compile & Run +``` +gcc -g -fopenmp -o maxim_spmv src/*.c +./maxim_spmv -t -f mat/ +``` +## Run PIN +``` +echo > roi_funcs.txt +$PIN_ROOT/pin -t ../pin_tracing/obj-intel64/ImemROIThreads.so -- ./maxim_spmv -t -f mat/ +``` +## Run GS Pattern +``` +gzip roitrace.00..bin +/gs_pattern roitrace.00..bin.gz maxim_spmv +``` + +# Troubleshoot +## PIN +If no ROI instructions are tracked, try with less or no compiler optimization options. gcc tends to be more reliable for the optimizations for this example. + +## gs_pattern +If empty outputs are generated with the valid trace file, the fastest solutions you may consider would be to try with 1) _SYMBOLS_ONLY_ turned off or 2) larger problem size. From experience, lower threshold of the number of indices is ~100 for this example. \ No newline at end of file diff --git a/example/cpu/mat/bcsstk01.mtx b/example/cpu/mat/bcsstk01.mtx new file mode 100644 index 0000000..728073c --- /dev/null +++ b/example/cpu/mat/bcsstk01.mtx @@ -0,0 +1,238 @@ +%%MatrixMarket matrix coordinate real symmetric +%------------------------------------------------------------------------------- +% UF Sparse Matrix Collection, Tim Davis +% http://www.cise.ufl.edu/research/sparse/matrices/HB/bcsstk01 +% name: HB/bcsstk01 +% [SYMMETRIC STIFFNESS MATRIX SMALL GENERALIZED EIGENVALUE PROBLEM] +% id: 23 +% date: 1982 +% author: J. Lewis +% ed: I. Duff, R. Grimes, J. Lewis +% fields: title A name id date author ed kind +% kind: structural problem +%------------------------------------------------------------------------------- +48 48 224 +1 1 2832268.51852 +5 1 1e6 +6 1 2083333.33333 +7 1 -3333.33333333 +11 1 1e6 +19 1 -2.8e6 +25 1 -28935.1851852 +30 1 2083333.33333 +2 2 1635447.53086 +4 2 -2e6 +6 2 5555555.55555 +8 2 -6666.66666667 +10 2 -2e6 +20 2 -30864.1975309 +24 2 5555555.55555 +26 2 -1597916.66667 +3 3 1724367.28395 +4 3 -2083333.33333 +5 3 -2777777.77778 +9 3 -1.68e6 +21 3 -15432.0987654 +23 3 -2777777.77778 +27 3 -28935.1851852 +28 3 -2083333.33333 +4 4 1003333333.33 +8 4 2e6 +10 4 4e8 +22 4 -3333333.33333 +27 4 2083333.33333 +28 4 1e8 +5 5 1.0675e9 +7 5 -1e6 +11 5 2e8 +21 5 2777777.77778 +23 5 333333333.333 +29 5 -833333.333333 +6 6 1535333333.33 +12 6 -2e6 +20 6 -5555555.55555 +24 6 666666666.667 +25 6 -2083333.33333 +30 6 1e8 +7 7 2832268.51852 +11 7 -1e6 +12 7 2083333.33333 +13 7 -2.8e6 +31 7 -28935.1851852 +36 7 2083333.33333 +8 8 1635447.53086 +10 8 2e6 +12 8 5555555.55555 +14 8 -30864.1975309 +18 8 5555555.55555 +32 8 -1597916.66667 +9 9 1724367.28395 +10 9 -2083333.33333 +11 9 -2777777.77778 +15 9 -15432.0987654 +17 9 -2777777.77778 +33 9 -28935.1851852 +34 9 -2083333.33333 +10 10 1003333333.33 +16 10 -3333333.33333 +33 10 2083333.33333 +34 10 1e8 +11 11 1.0675e9 +15 11 2777777.77778 +17 11 333333333.333 +35 11 -833333.333333 +12 12 1535333333.33 +14 12 -5555555.55555 +18 12 666666666.667 +31 12 -2083333.33333 +36 12 1e8 +13 13 2836099.4695 +17 13 -2149285.29451 +18 13 2359161.80402 +19 13 -3333.33333333 +23 13 -1e6 +37 13 -28935.1851852 +42 13 2083333.33333 +43 13 -3830.95098171 +47 13 -1149285.29451 +48 13 275828.470683 +14 14 1767410.74446 +15 14 517922.131816 +16 14 4298570.58902 +18 14 -5555555.55555 +20 14 -6666.66666667 +22 14 2e6 +38 14 -1597916.66667 +44 14 -131963.213599 +45 14 -517922.131816 +46 14 2298570.58902 +15 15 3890038.06848 +16 15 -2634990.2747 +17 15 2777777.77778 +21 15 -1.68e6 +39 15 -28935.1851852 +40 15 -2083333.33333 +44 15 -517922.131816 +45 15 -2165670.78453 +46 15 -551656.941367 +16 16 1975720635.31 +20 16 -2e6 +22 16 4e8 +39 16 2083333.33333 +40 16 1e8 +44 16 -2298570.58902 +45 16 551656.941366 +46 16 486193650.99 +17 17 1527346515.47 +18 17 -109779731.332 +19 17 1e6 +23 17 2e8 +41 17 -833333.333333 +43 17 1149285.29451 +47 17 229724661.236 +48 17 -55717351.0779 +18 18 1564111437.11 +24 18 -2e6 +37 18 -2083333.33333 +42 18 1e8 +43 18 -275828.470683 +47 18 -55717351.0779 +48 18 10941196.0038 +19 19 2832268.51852 +23 19 1e6 +24 19 2083333.33333 +43 19 -28935.1851852 +48 19 2083333.33333 +20 20 1635447.53086 +22 20 -2e6 +24 20 -5555555.55555 +44 20 -1597916.66667 +21 21 1724367.28395 +22 21 -2083333.33333 +23 21 2777777.77778 +45 21 -28935.1851852 +46 21 -2083333.33333 +22 22 1003333333.33 +45 22 2083333.33333 +46 22 1e8 +23 23 1.0675e9 +47 23 -833333.333333 +24 24 1535333333.33 +43 24 -2083333.33333 +48 24 1e8 +25 25 60879.6296296 +29 25 1.25e6 +30 25 416666.666667 +31 25 -4166.66666667 +35 25 1.25e6 +26 26 3372916.66667 +28 26 -2.5e6 +32 26 -8333.33333333 +34 26 -2.5e6 +27 27 2411712.96296 +28 27 -416666.666667 +33 27 -2.355e6 +28 28 1.5e9 +32 28 2.5e6 +34 28 5e8 +29 29 501833333.333 +31 29 -1.25e6 +35 29 2.5e8 +30 30 5.025e8 +36 30 -2.5e6 +31 31 3985879.62963 +35 31 -1.25e6 +36 31 416666.666667 +37 31 -3.925e6 +32 32 3411496.91358 +34 32 2.5e6 +36 32 6944444.44444 +38 32 -38580.2469136 +42 32 6944444.44445 +33 33 2431003.08642 +34 33 -416666.666667 +35 33 -3472222.22222 +39 33 -19290.1234568 +41 33 -3472222.22222 +34 34 1504166666.67 +40 34 -4166666.66667 +35 35 1335166666.67 +39 35 3472222.22222 +41 35 416666666.667 +36 36 2169166666.67 +38 36 -6944444.44444 +42 36 833333333.333 +37 37 3985879.62963 +41 37 -1.25e6 +42 37 416666.666667 +43 37 -4166.66666667 +47 37 -1.25e6 +38 38 3411496.91358 +40 38 2.5e6 +42 38 -6944444.44445 +44 38 -8333.33333333 +46 38 2.5e6 +39 39 2431003.08642 +40 39 -416666.666667 +41 39 3472222.22222 +45 39 -2.355e6 +40 40 1504166666.67 +44 40 -2.5e6 +46 40 5e8 +41 41 1335166666.67 +43 41 1.25e6 +47 41 2.5e8 +42 42 2169166666.67 +48 42 -2.5e6 +43 43 64710.5806113 +47 43 2399285.29451 +48 43 140838.195984 +44 44 3504879.88027 +45 44 517922.131816 +46 44 -4798570.58902 +45 45 4577383.74749 +46 45 134990.2747 +46 46 2472387301.98 +47 47 961679848.804 +48 47 -109779731.332 +48 48 531278103.775 diff --git a/example/cpu/src/maxim_spmv.c b/example/cpu/src/maxim_spmv.c new file mode 100644 index 0000000..0a40db3 --- /dev/null +++ b/example/cpu/src/maxim_spmv.c @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include +#include "mmio.h" + +typedef struct { + int i; + int j; + double v; +} elem; + +int compare(const void *a, const void *b) { + elem lhs = *(elem *) a; + elem rhs = *(elem *) b; + return (lhs.i == rhs.i) ? (lhs.j - rhs.j) : (lhs.i - rhs.i); +} + +int read(const char *fname, int *M, int *N, int **row, int **col, double **val) { + int ret_code; + MM_typecode matcode; + FILE *f; + int nz; + + if ((f = fopen(fname, "r")) == NULL) { + fprintf(stderr, "error in fopen\n"); + exit(1); + } + if (mm_read_banner(f, &matcode) != 0) { + fprintf(stderr, "error reading matrix banner\n"); + exit(1); + } + if (!mm_is_real(matcode) || !mm_is_matrix(matcode) || !mm_is_coordinate(matcode)) { + fprintf(stderr, "unsupported "); + fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(1); + } + if ((ret_code = mm_read_mtx_crd_size(f, M, N, &nz)) !=0) { + fprintf(stderr, "error reading matrix size\n"); + exit(1); + } + + // reserve memory for coordinate form + elem *coords = (elem *) malloc (nz * sizeof(elem)); + + // read + for (int i = 0; i < nz; i++) { + if (fscanf(f, "%d %d %lg\n", &coords[i].i, &coords[i].j, &coords[i].v) != 3) { + fprintf(stderr, "invalid format at entry %d\n", i+1); + } + } + + // sort + qsort(coords, nz, sizeof(elem), compare); + + // reserve memory for CSR + *row = (int *) malloc ((*M+1) * sizeof(int)); + *col = (int *) malloc (nz * sizeof(int)); + *val = (double *) malloc (nz * sizeof(double)); + for (int i = 0; i < (*M)+1; i++) { (*row)[i] = 0; } + + // convert to CSR + for (int i = 0; i < nz; i++) { + (*row)[coords[i].i]++; + (*col)[i] = coords[i].j - 1; + (*val)[i] = coords[i].v; + } + for (int i = 0; i < *M; i++) { + (*row)[i+1] += (*row)[i]; + } + + fclose(f); + free(coords); + return nz; +} + +void set_sparse_vector(int N, double *vec) { + + int* scatter = (int *) malloc (N * sizeof(int)); + + for (int i = 0; i < N; i++) { + scatter[i] = rand() % N; + } + + for (int i = 0; i < N; i++) { + vec[scatter[i]] = i; + } + + free(scatter); +} + +void spmv_serial(int M, int *row, int *col, double *val, double *vec, double *res) { + for (int i = 0; i < M; i++) { + res[i] = 0; + for (int j = row[i]; j < row[i+1]; j++) { + res[i] += val[j] * vec[col[j]]; + } + } + return; +} + +void spmv_omp(int M, int *row, int *col, double *val, double *vec, double *res, int num_threads) { + #pragma omp parallel for num_threads(num_threads) + for (int i = 0; i < M; i++) { + res[i] = 0; + for (int j = row[i]; j < row[i+1]; j++) { + res[i] += val[j] * vec[col[j]]; + } + } +} + +int compare_results(double *a, double *b, int size, double tol) { + for (int i = 0; i < size; i++) { + if (fabs(a[i] - b[i]) > tol) { + return 0; // match + } + } + return 1; // mismatch +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "usage: %s -f [martix-market-filename] -t [num_thread]\n", argv[0]); + exit(1); + } + + int num_threads = 1; + int f_idx = 0; + int print = 0; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-f") == 0) { + f_idx = i + 1; + } else if (strcmp(argv[i], "-t") == 0) { + printf("num_thread: %s\n", argv[i+1]); + num_threads = atoi(argv[i+1]); + } + } + + int M, N, *row, *col; + double *val, *vec, *res; + + read(argv[f_idx], &M, &N, &row, &col, &val); + + vec = (double *) malloc (N * sizeof(double)); + res = (double *) malloc (M * sizeof(double)); + + // For the evaluation of vectorized g/s instruction + set_sparse_vector(N, vec); + + if (num_threads <= 1) { + spmv_serial(M, row, col, val, vec, res); + } else { + spmv_omp(M, row, col, val, vec, res, num_threads); + } + + free(row); + free(col); + free(val); + free(vec); + free(res); + return 0; +} diff --git a/example/cpu/src/mmio.c b/example/cpu/src/mmio.c new file mode 100644 index 0000000..c250ff2 --- /dev/null +++ b/example/cpu/src/mmio.c @@ -0,0 +1,511 @@ +/* +* Matrix Market I/O library for ANSI C +* +* See http://math.nist.gov/MatrixMarket for details. +* +* +*/ + + +#include +#include +#include +#include + +#include "mmio.h" + +int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, + double **val_, int **I_, int **J_) +{ + FILE *f; + MM_typecode matcode; + int M, N, nz; + int i; + double *val; + int *I, *J; + + if ((f = fopen(fname, "r")) == NULL) + return -1; + + + if (mm_read_banner(f, &matcode) != 0) + { + printf("mm_read_unsymetric: Could not process Matrix Market banner "); + printf(" in file [%s]\n", fname); + return -1; + } + + + + if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) + { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", + mm_typecode_to_str(matcode)); + return -1; + } + + /* find out size of sparse matrix: M, N, nz .... */ + + if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) + { + fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } + + *M_ = M; + *N_ = N; + *nz_ = nz; + + /* reseve memory for matrices */ + + I = (int *) malloc(nz * sizeof(int)); + J = (int *) malloc(nz * sizeof(int)); + val = (double *) malloc(nz * sizeof(double)); + + *val_ = val; + *I_ = I; + *J_ = J; + + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + + for (i=0; i