|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 #ifndef _VIENNACL_MATRIX_ROW_SOURCE_HPP_ 00002 #define _VIENNACL_MATRIX_ROW_SOURCE_HPP_ 00003 //Automatically generated file from aux-directory, do not edit manually! 00004 namespace viennacl 00005 { 00006 namespace linalg 00007 { 00008 namespace kernels 00009 { 00010 const char * const matrix_row_align1_inplace_divide = 00011 "__kernel void inplace_divide(\n" 00012 " __global float * vec,\n" 00013 " __global const float * fac, //note: CPU variant is mapped to prod_scalar\n" 00014 " unsigned int size) \n" 00015 "{ \n" 00016 " float factor = *fac;\n" 00017 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00018 " vec[i] /= factor;\n" 00019 "}\n" 00020 ; //matrix_row_align1_inplace_divide 00021 00022 const char * const matrix_row_align1_trans_lower_triangular_substitute_inplace = 00023 "__kernel void trans_lower_triangular_substitute_inplace(\n" 00024 " __global const float * matrix,\n" 00025 " unsigned int matrix_rows,\n" 00026 " unsigned int matrix_cols,\n" 00027 " unsigned int matrix_internal_rows,\n" 00028 " unsigned int matrix_internal_cols,\n" 00029 " __global float * vector)\n" 00030 "{\n" 00031 " float temp;\n" 00032 " for (int row = 0; row < matrix_rows; ++row)\n" 00033 " {\n" 00034 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00035 " if (get_global_id(0) == 0)\n" 00036 " vector[row] /= matrix[row+row*matrix_internal_cols];\n" 00037 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00038 " temp = vector[row];\n" 00039 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00040 " vector[elim] -= temp * matrix[row * matrix_internal_cols + elim];\n" 00041 " }\n" 00042 "}\n" 00043 ; //matrix_row_align1_trans_lower_triangular_substitute_inplace 00044 00045 const char * const matrix_row_align1_trans_unit_upper_triangular_substitute_inplace = 00046 "//transposed lower triangular matrix\n" 00047 "__kernel void trans_unit_upper_triangular_substitute_inplace(\n" 00048 " __global const float * matrix, \n" 00049 " unsigned int matrix_rows,\n" 00050 " unsigned int matrix_cols,\n" 00051 " unsigned int matrix_internal_rows,\n" 00052 " unsigned int matrix_internal_cols,\n" 00053 " __global float * vector) \n" 00054 "{ \n" 00055 " float temp; \n" 00056 " for (int row = matrix_rows-1; row > -1; --row) \n" 00057 " { \n" 00058 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00059 " temp = vector[row]; \n" 00060 " //eliminate column with index 'row' in parallel: \n" 00061 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00062 " vector[elim] -= temp * matrix[row * matrix_internal_cols + elim]; \n" 00063 " } \n" 00064 " \n" 00065 "}\n" 00066 ; //matrix_row_align1_trans_unit_upper_triangular_substitute_inplace 00067 00068 const char * const matrix_row_align1_sub = 00069 "__kernel void sub(\n" 00070 " __global const float * vec1,\n" 00071 " __global const float * vec2, \n" 00072 " __global float * result,\n" 00073 " unsigned int size)\n" 00074 "{ \n" 00075 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00076 " result[i] = vec1[i] - vec2[i];\n" 00077 "}\n" 00078 ; //matrix_row_align1_sub 00079 00080 const char * const matrix_row_align1_scaled_rank1_update = 00081 "__kernel void scaled_rank1_update(\n" 00082 " __global float * matrix,\n" 00083 " unsigned int matrix_rows,\n" 00084 " unsigned int matrix_cols,\n" 00085 " unsigned int matrix_internal_rows,\n" 00086 " unsigned int matrix_internal_cols,\n" 00087 " float val,\n" 00088 " __global const float * vector1, \n" 00089 " __global const float * vector2) \n" 00090 "{ \n" 00091 " float tmp;\n" 00092 " unsigned int offset;\n" 00093 " for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n" 00094 " {\n" 00095 " tmp = val * vector1[row];\n" 00096 " offset = row * matrix_internal_cols;\n" 00097 " for (unsigned int col = 0; col < matrix_cols; ++col)\n" 00098 " matrix[offset+col] += tmp * vector2[col];\n" 00099 " }\n" 00100 "}\n" 00101 ; //matrix_row_align1_scaled_rank1_update 00102 00103 const char * const matrix_row_align1_inplace_sub = 00104 "__kernel void inplace_sub(\n" 00105 " __global float * vec1,\n" 00106 " __global const float * vec2,\n" 00107 " unsigned int size) \n" 00108 "{ \n" 00109 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00110 " vec1[i] -= vec2[i];\n" 00111 "}\n" 00112 ; //matrix_row_align1_inplace_sub 00113 00114 const char * const matrix_row_align1_lower_triangular_substitute_inplace = 00115 "__kernel void lower_triangular_substitute_inplace(\n" 00116 " __global const float * matrix,\n" 00117 " unsigned int matrix_rows,\n" 00118 " unsigned int matrix_cols,\n" 00119 " unsigned int matrix_internal_rows,\n" 00120 " unsigned int matrix_internal_cols,\n" 00121 " __global float * vector)\n" 00122 "{\n" 00123 " float temp;\n" 00124 " for (int row = 0; row < matrix_rows; ++row)\n" 00125 " {\n" 00126 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00127 " if (get_global_id(0) == 0)\n" 00128 " vector[row] /= matrix[row+row*matrix_internal_cols];\n" 00129 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00130 " temp = vector[row];\n" 00131 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00132 " vector[elim] -= temp * matrix[elim * matrix_internal_cols + row];\n" 00133 " }\n" 00134 "}\n" 00135 ; //matrix_row_align1_lower_triangular_substitute_inplace 00136 00137 const char * const matrix_row_align1_vec_mul = 00138 "\n" 00139 "\n" 00140 "\n" 00141 "__kernel void vec_mul(\n" 00142 " __global const float * matrix,\n" 00143 " unsigned int matrix_rows,\n" 00144 " unsigned int matrix_cols,\n" 00145 " unsigned int matrix_internal_rows,\n" 00146 " unsigned int matrix_internal_cols,\n" 00147 " __global const float * vector, \n" 00148 " __global float * result) \n" 00149 "{ \n" 00150 " for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n" 00151 " {\n" 00152 " float dot_prod = 0.0f;\n" 00153 " for (unsigned int col = 0; col < matrix_cols; ++col)\n" 00154 " dot_prod += matrix[row*matrix_internal_cols + col] * vector[col];\n" 00155 " result[row] = dot_prod;\n" 00156 " }\n" 00157 "}\n" 00158 "\n" 00159 "\n" 00160 ; //matrix_row_align1_vec_mul 00161 00162 const char * const matrix_row_align1_lu_factorize = 00163 "__kernel void lu_factorize(\n" 00164 " __global float * matrix,\n" 00165 " unsigned int matrix_rows,\n" 00166 " unsigned int matrix_cols,\n" 00167 " unsigned int matrix_internal_rows,\n" 00168 " unsigned int matrix_internal_cols) \n" 00169 "{ \n" 00170 " float temp;\n" 00171 " unsigned rowi;\n" 00172 " unsigned rowk;\n" 00173 " for (unsigned int i=1; i<matrix_rows; ++i)\n" 00174 " {\n" 00175 " rowi = i * matrix_internal_cols;\n" 00176 " for (unsigned int k=0; k<i; ++k)\n" 00177 " {\n" 00178 " rowk = k * matrix_internal_cols;\n" 00179 " if (get_global_id(0) == 0)\n" 00180 " matrix[rowi + k] /= matrix[rowk + k];\n" 00181 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00182 " temp = matrix[rowi + k];\n" 00183 " \n" 00184 " //parallel subtraction:\n" 00185 " for (unsigned int j=k+1 + get_global_id(0); j<matrix_rows; j += get_global_size(0))\n" 00186 " matrix[rowi + j] -= temp * matrix[rowk + j];\n" 00187 " }\n" 00188 " }\n" 00189 "} \n" 00190 ; //matrix_row_align1_lu_factorize 00191 00192 const char * const matrix_row_align1_rank1_update = 00193 "//perform a rank-1 update of the matrix, i.e. A += x * x^T\n" 00194 "__kernel void rank1_update(\n" 00195 " __global float * matrix,\n" 00196 " unsigned int matrix_rows,\n" 00197 " unsigned int matrix_cols,\n" 00198 " unsigned int matrix_internal_rows,\n" 00199 " unsigned int matrix_internal_cols,\n" 00200 " __global const float * vector1, \n" 00201 " __global const float * vector2) \n" 00202 "{ \n" 00203 " float tmp;\n" 00204 " unsigned int offset;\n" 00205 " for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n" 00206 " {\n" 00207 " tmp = vector1[row];\n" 00208 " offset = row*matrix_internal_cols;\n" 00209 " for (unsigned int col = 0; col < matrix_cols; ++col)\n" 00210 " matrix[offset+col] += tmp * vector2[col];\n" 00211 " }\n" 00212 "}\n" 00213 ; //matrix_row_align1_rank1_update 00214 00215 const char * const matrix_row_align1_trans_upper_triangular_substitute_inplace = 00216 "//transposed lower triangular matrix\n" 00217 "__kernel void trans_upper_triangular_substitute_inplace(\n" 00218 " __global const float * matrix, \n" 00219 " unsigned int matrix_rows,\n" 00220 " unsigned int matrix_cols,\n" 00221 " unsigned int matrix_internal_rows,\n" 00222 " unsigned int matrix_internal_cols,\n" 00223 " __global float * vector) \n" 00224 "{ \n" 00225 " float temp; \n" 00226 " for (int row = matrix_rows-1; row > -1; --row) \n" 00227 " { \n" 00228 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00229 " if (get_global_id(0) == 0) \n" 00230 " vector[row] /= matrix[row*matrix_internal_cols + row]; \n" 00231 " \n" 00232 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00233 " temp = vector[row]; \n" 00234 " //eliminate column with index 'row' in parallel: \n" 00235 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00236 " vector[elim] -= temp * matrix[row * matrix_internal_cols + elim]; \n" 00237 " } \n" 00238 " \n" 00239 "}\n" 00240 ; //matrix_row_align1_trans_upper_triangular_substitute_inplace 00241 00242 const char * const matrix_row_align1_unit_lower_triangular_substitute_inplace = 00243 "__kernel void unit_lower_triangular_substitute_inplace(\n" 00244 " __global const float * matrix,\n" 00245 " unsigned int matrix_rows,\n" 00246 " unsigned int matrix_cols,\n" 00247 " unsigned int matrix_internal_rows,\n" 00248 " unsigned int matrix_internal_cols,\n" 00249 " __global float * vector)\n" 00250 "{\n" 00251 " float temp;\n" 00252 " for (int row = 0; row < matrix_rows; ++row)\n" 00253 " {\n" 00254 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00255 " temp = vector[row];\n" 00256 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00257 " vector[elim] -= temp * matrix[elim * matrix_internal_cols + row];\n" 00258 " }\n" 00259 "}\n" 00260 ; //matrix_row_align1_unit_lower_triangular_substitute_inplace 00261 00262 const char * const matrix_row_align1_upper_triangular_substitute_inplace = 00263 "__kernel void upper_triangular_substitute_inplace( \n" 00264 " __global const float * matrix, \n" 00265 " unsigned int matrix_rows,\n" 00266 " unsigned int matrix_cols,\n" 00267 " unsigned int matrix_internal_rows,\n" 00268 " unsigned int matrix_internal_cols,\n" 00269 " __global float * vector) \n" 00270 "{ \n" 00271 " float temp; \n" 00272 " for (int row = matrix_rows-1; row > -1; --row) \n" 00273 " { \n" 00274 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00275 " if (get_global_id(0) == 0) \n" 00276 " vector[row] /= matrix[row*matrix_internal_cols + row]; \n" 00277 " \n" 00278 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00279 " temp = vector[row]; \n" 00280 " //eliminate column with index 'row' in parallel: \n" 00281 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00282 " vector[elim] -= temp * matrix[elim * matrix_internal_cols + row]; \n" 00283 " } \n" 00284 " \n" 00285 "}\n" 00286 ; //matrix_row_align1_upper_triangular_substitute_inplace 00287 00288 const char * const matrix_row_align1_inplace_add = 00289 "__kernel void inplace_add(\n" 00290 " __global float * vec1,\n" 00291 " __global const float * vec2,\n" 00292 " unsigned int size) \n" 00293 "{ \n" 00294 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00295 " vec1[i] += vec2[i];\n" 00296 "}\n" 00297 ; //matrix_row_align1_inplace_add 00298 00299 const char * const matrix_row_align1_trans_vec_mul = 00300 "__kernel void trans_vec_mul(\n" 00301 " __global const float * matrix,\n" 00302 " unsigned int matrix_rows,\n" 00303 " unsigned int matrix_cols,\n" 00304 " unsigned int matrix_internal_rows,\n" 00305 " unsigned int matrix_internal_cols,\n" 00306 " __global const float * vector, \n" 00307 " __global float * result) \n" 00308 "{ \n" 00309 " //row and col indicate indices within transposed matrix\n" 00310 " for (unsigned int row = get_global_id(0); row < matrix_cols; row += get_global_size(0))\n" 00311 " {\n" 00312 " float dot_prod2 = 0.0f;\n" 00313 " for (unsigned int col = 0; col < matrix_rows; ++col)\n" 00314 " dot_prod2 += matrix[row + col*matrix_internal_cols] * vector[col];\n" 00315 " result[row] = dot_prod2;\n" 00316 " }\n" 00317 "}\n" 00318 ; //matrix_row_align1_trans_vec_mul 00319 00320 const char * const matrix_row_align1_trans_unit_lower_triangular_substitute_inplace = 00321 "\n" 00322 "__kernel void trans_unit_lower_triangular_substitute_inplace(\n" 00323 " __global const float * matrix,\n" 00324 " unsigned int matrix_rows,\n" 00325 " unsigned int matrix_cols,\n" 00326 " unsigned int matrix_internal_rows,\n" 00327 " unsigned int matrix_internal_cols,\n" 00328 " __global float * vector)\n" 00329 "{\n" 00330 " float temp;\n" 00331 " for (int row = 0; row < matrix_rows; ++row)\n" 00332 " {\n" 00333 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00334 "\n" 00335 " temp = vector[row];\n" 00336 "\n" 00337 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00338 " vector[elim] -= temp * matrix[row * matrix_internal_cols + elim];\n" 00339 " }\n" 00340 "}\n" 00341 "\n" 00342 "\n" 00343 ; //matrix_row_align1_trans_unit_lower_triangular_substitute_inplace 00344 00345 const char * const matrix_row_align1_cpu_inplace_mult = 00346 "__kernel void cpu_inplace_mult(\n" 00347 " __global float * vec,\n" 00348 " float factor, \n" 00349 " unsigned int size) \n" 00350 "{ \n" 00351 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00352 " vec[i] *= factor;\n" 00353 "}\n" 00354 ; //matrix_row_align1_cpu_inplace_mult 00355 00356 const char * const matrix_row_align1_clear = 00357 "__kernel void clear(\n" 00358 " __global float * vec,\n" 00359 " unsigned int size) \n" 00360 "{ \n" 00361 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00362 " vec[i] = 0;\n" 00363 "}\n" 00364 ; //matrix_row_align1_clear 00365 00366 const char * const matrix_row_align1_inplace_mult = 00367 "__kernel void inplace_mult(\n" 00368 " __global float * vec,\n" 00369 " __global const float * fac, \n" 00370 " unsigned int size) \n" 00371 "{ \n" 00372 " float factor = *fac;\n" 00373 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00374 " vec[i] *= factor;\n" 00375 "}\n" 00376 ; //matrix_row_align1_inplace_mult 00377 00378 const char * const matrix_row_align1_unit_upper_triangular_substitute_inplace = 00379 "__kernel void unit_upper_triangular_substitute_inplace( \n" 00380 " __global const float * matrix, \n" 00381 " unsigned int matrix_rows,\n" 00382 " unsigned int matrix_cols,\n" 00383 " unsigned int matrix_internal_rows,\n" 00384 " unsigned int matrix_internal_cols,\n" 00385 " __global float * vector) \n" 00386 "{ \n" 00387 " float temp; \n" 00388 " for (int row = matrix_rows-1; row > -1; --row) \n" 00389 " { \n" 00390 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00391 " temp = vector[row]; \n" 00392 " //eliminate column with index 'row' in parallel: \n" 00393 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00394 " vector[elim] -= temp * matrix[elim * matrix_internal_cols + row]; \n" 00395 " } \n" 00396 " \n" 00397 "}\n" 00398 ; //matrix_row_align1_unit_upper_triangular_substitute_inplace 00399 00400 const char * const matrix_row_align1_add = 00401 "__kernel void add(\n" 00402 " __global const float * vec1,\n" 00403 " __global const float * vec2, \n" 00404 " __global float * result,\n" 00405 " unsigned int size) \n" 00406 "{ \n" 00407 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00408 " result[i] = vec1[i] + vec2[i];\n" 00409 "}\n" 00410 ; //matrix_row_align1_add 00411 00412 } //namespace kernels 00413 } //namespace linalg 00414 } //namespace viennacl 00415 #endif
1.7.6.1