|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 #ifndef _VIENNACL_MATRIX_COL_SOURCE_HPP_ 00002 #define _VIENNACL_MATRIX_COL_SOURCE_HPP_ 00003 //Automatically generated file from aux-directory, do not edit manually! 00004 namespace viennacl 00005 { 00006 namespace linalg 00007 { 00008 namespace kernels 00009 { 00010 const char * const matrix_col_align1_inplace_divide = 00011 "__kernel void inplace_divide(\n" 00012 " __global float * vec,\n" 00013 " __global const float * fac, //note: CPU variant is mapped to prod_scalar\n" 00014 " unsigned int size) \n" 00015 "{ \n" 00016 " float factor = *fac;\n" 00017 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00018 " vec[i] /= factor;\n" 00019 "}\n" 00020 ; //matrix_col_align1_inplace_divide 00021 00022 const char * const matrix_col_align1_trans_lower_triangular_substitute_inplace = 00023 "__kernel void trans_lower_triangular_substitute_inplace(\n" 00024 " __global const float * matrix,\n" 00025 " unsigned int matrix_rows,\n" 00026 " unsigned int matrix_cols,\n" 00027 " unsigned int matrix_internal_rows,\n" 00028 " unsigned int matrix_internal_cols,\n" 00029 " __global float * vector)\n" 00030 "{\n" 00031 " float temp;\n" 00032 " for (int row = 0; row < matrix_rows; ++row)\n" 00033 " {\n" 00034 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00035 " if (get_global_id(0) == 0)\n" 00036 " vector[row] /= matrix[row+row*matrix_internal_rows];\n" 00037 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00038 " temp = vector[row];\n" 00039 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00040 " vector[elim] -= temp * matrix[elim * matrix_internal_rows + row];\n" 00041 " }\n" 00042 "}\n" 00043 ; //matrix_col_align1_trans_lower_triangular_substitute_inplace 00044 00045 const char * const matrix_col_align1_trans_unit_upper_triangular_substitute_inplace = 00046 "//transposed lower triangular matrix\n" 00047 "__kernel void trans_unit_upper_triangular_substitute_inplace(\n" 00048 " __global const float * matrix, \n" 00049 " unsigned int matrix_rows,\n" 00050 " unsigned int matrix_cols,\n" 00051 " unsigned int matrix_internal_rows,\n" 00052 " unsigned int matrix_internal_cols,\n" 00053 " __global float * vector) \n" 00054 "{ \n" 00055 " float temp; \n" 00056 " for (int row = matrix_rows-1; row > -1; --row) \n" 00057 " { \n" 00058 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00059 " \n" 00060 " temp = vector[row]; \n" 00061 " //eliminate column with index 'row' in parallel: \n" 00062 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00063 " vector[elim] -= temp * matrix[row + elim * matrix_internal_rows]; \n" 00064 " } \n" 00065 " \n" 00066 "}\n" 00067 ; //matrix_col_align1_trans_unit_upper_triangular_substitute_inplace 00068 00069 const char * const matrix_col_align1_sub = 00070 "__kernel void sub(\n" 00071 " __global const float * vec1,\n" 00072 " __global const float * vec2, \n" 00073 " __global float * result,\n" 00074 " unsigned int size)\n" 00075 "{ \n" 00076 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00077 " result[i] = vec1[i] - vec2[i];\n" 00078 "}\n" 00079 ; //matrix_col_align1_sub 00080 00081 const char * const matrix_col_align1_scaled_rank1_update = 00082 "__kernel void scaled_rank1_update(\n" 00083 " __global float * matrix,\n" 00084 " unsigned int matrix_rows,\n" 00085 " unsigned int matrix_cols,\n" 00086 " unsigned int matrix_internal_rows,\n" 00087 " unsigned int matrix_internal_cols,\n" 00088 " float val,\n" 00089 " __global const float * vector1, \n" 00090 " __global const float * vector2) \n" 00091 "{ \n" 00092 " float tmp;\n" 00093 " for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n" 00094 " {\n" 00095 " tmp = val * vector1[row];\n" 00096 " for (unsigned int col = 0; col < matrix_cols; ++col)\n" 00097 " matrix[row + col*matrix_internal_rows] += tmp * vector2[col];\n" 00098 " }\n" 00099 "}\n" 00100 ; //matrix_col_align1_scaled_rank1_update 00101 00102 const char * const matrix_col_align1_inplace_sub = 00103 "__kernel void inplace_sub(\n" 00104 " __global float * vec1,\n" 00105 " __global const float * vec2,\n" 00106 " unsigned int size) \n" 00107 "{ \n" 00108 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00109 " vec1[i] -= vec2[i];\n" 00110 "}\n" 00111 ; //matrix_col_align1_inplace_sub 00112 00113 const char * const matrix_col_align1_lower_triangular_substitute_inplace = 00114 "__kernel void lower_triangular_substitute_inplace(\n" 00115 " __global const float * matrix,\n" 00116 " unsigned int matrix_rows,\n" 00117 " unsigned int matrix_cols,\n" 00118 " unsigned int matrix_internal_rows,\n" 00119 " unsigned int matrix_internal_cols,\n" 00120 " __global float * vector)\n" 00121 "{\n" 00122 " float temp;\n" 00123 " for (int row = 0; row < matrix_rows; ++row)\n" 00124 " {\n" 00125 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00126 " if (get_global_id(0) == 0)\n" 00127 " vector[row] /= matrix[row+row*matrix_internal_rows];\n" 00128 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00129 " temp = vector[row];\n" 00130 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00131 " vector[elim] -= temp * matrix[row * matrix_internal_rows + elim];\n" 00132 " }\n" 00133 "}\n" 00134 ; //matrix_col_align1_lower_triangular_substitute_inplace 00135 00136 const char * const matrix_col_align1_vec_mul = 00137 "__kernel void vec_mul(\n" 00138 " __global const float * matrix,\n" 00139 " unsigned int matrix_rows,\n" 00140 " unsigned int matrix_cols,\n" 00141 " unsigned int matrix_internal_rows,\n" 00142 " unsigned int matrix_internal_cols,\n" 00143 " __global const float * vector, \n" 00144 " __global float * result) \n" 00145 "{ \n" 00146 " for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n" 00147 " {\n" 00148 " float dot_prod = 0.0f;\n" 00149 " for (unsigned int col = 0; col < matrix_cols; ++col)\n" 00150 " dot_prod += matrix[row + col*matrix_internal_rows] * vector[col];\n" 00151 " result[row] = dot_prod;\n" 00152 " }\n" 00153 "}\n" 00154 ; //matrix_col_align1_vec_mul 00155 00156 const char * const matrix_col_align1_lu_factorize = 00157 "__kernel void lu_factorize(\n" 00158 " __global float * matrix,\n" 00159 " unsigned int matrix_rows,\n" 00160 " unsigned int matrix_cols,\n" 00161 " unsigned int matrix_internal_rows,\n" 00162 " unsigned int matrix_internal_cols) \n" 00163 "{ \n" 00164 " float temp;\n" 00165 " for (unsigned int i=1; i<matrix_rows; ++i)\n" 00166 " {\n" 00167 " for (unsigned int k=0; k<i; ++k)\n" 00168 " {\n" 00169 " if (get_global_id(0) == 0)\n" 00170 " matrix[i + k*matrix_internal_rows] /= matrix[k + k*matrix_internal_rows];\n" 00171 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00172 " temp = matrix[i + k*matrix_internal_rows];\n" 00173 " \n" 00174 " //parallel subtraction:\n" 00175 " for (unsigned int j=k+1 + get_global_id(0); j<matrix_cols; j += get_global_size(0))\n" 00176 " matrix[i + j*matrix_internal_rows] -= temp * matrix[k + j*matrix_internal_rows];\n" 00177 " }\n" 00178 " }\n" 00179 "} \n" 00180 ; //matrix_col_align1_lu_factorize 00181 00182 const char * const matrix_col_align1_rank1_update = 00183 "//perform a rank-1 update of the matrix, i.e. A += x * x^T\n" 00184 "__kernel void rank1_update(\n" 00185 " __global float * matrix,\n" 00186 " unsigned int matrix_rows,\n" 00187 " unsigned int matrix_cols,\n" 00188 " unsigned int matrix_internal_rows,\n" 00189 " unsigned int matrix_internal_cols,\n" 00190 " __global const float * vector1, \n" 00191 " __global const float * vector2) \n" 00192 "{ \n" 00193 " float tmp;\n" 00194 " for (unsigned int row= get_global_id(0); row < matrix_rows; row += get_global_size(0))\n" 00195 " {\n" 00196 " tmp = vector1[row];\n" 00197 " for (unsigned int col = 0; col < matrix_cols; ++col)\n" 00198 " matrix[row + col * matrix_internal_rows] += tmp * vector2[col];\n" 00199 " }\n" 00200 "}\n" 00201 ; //matrix_col_align1_rank1_update 00202 00203 const char * const matrix_col_align1_trans_upper_triangular_substitute_inplace = 00204 "//transposed lower triangular matrix\n" 00205 "__kernel void trans_upper_triangular_substitute_inplace(\n" 00206 " __global const float * matrix, \n" 00207 " unsigned int matrix_rows,\n" 00208 " unsigned int matrix_cols,\n" 00209 " unsigned int matrix_internal_rows,\n" 00210 " unsigned int matrix_internal_cols,\n" 00211 " __global float * vector) \n" 00212 "{ \n" 00213 " float temp; \n" 00214 " for (int row = matrix_rows-1; row > -1; --row) \n" 00215 " { \n" 00216 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00217 " if (get_global_id(0) == 0) \n" 00218 " vector[row] /= matrix[row + row*matrix_internal_rows]; \n" 00219 " \n" 00220 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00221 " temp = vector[row]; \n" 00222 " //eliminate column with index 'row' in parallel: \n" 00223 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00224 " vector[elim] -= temp * matrix[row + elim * matrix_internal_rows]; \n" 00225 " } \n" 00226 " \n" 00227 "}\n" 00228 ; //matrix_col_align1_trans_upper_triangular_substitute_inplace 00229 00230 const char * const matrix_col_align1_unit_lower_triangular_substitute_inplace = 00231 "__kernel void unit_lower_triangular_substitute_inplace(\n" 00232 " __global const float * matrix,\n" 00233 " unsigned int matrix_rows,\n" 00234 " unsigned int matrix_cols,\n" 00235 " unsigned int matrix_internal_rows,\n" 00236 " unsigned int matrix_internal_cols,\n" 00237 " __global float * vector)\n" 00238 "{\n" 00239 " float temp;\n" 00240 " for (int row = 0; row < matrix_rows; ++row)\n" 00241 " {\n" 00242 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00243 " temp = vector[row];\n" 00244 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00245 " vector[elim] -= temp * matrix[row * matrix_internal_rows + elim];\n" 00246 " }\n" 00247 "}\n" 00248 ; //matrix_col_align1_unit_lower_triangular_substitute_inplace 00249 00250 const char * const matrix_col_align1_upper_triangular_substitute_inplace = 00251 "__kernel void upper_triangular_substitute_inplace( \n" 00252 " __global const float * matrix, \n" 00253 " unsigned int matrix_rows,\n" 00254 " unsigned int matrix_cols,\n" 00255 " unsigned int matrix_internal_rows,\n" 00256 " unsigned int matrix_internal_cols,\n" 00257 " __global float * vector) \n" 00258 "{ \n" 00259 " float temp; \n" 00260 " for (int row = matrix_rows-1; row > -1; --row) \n" 00261 " { \n" 00262 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00263 " if (get_global_id(0) == 0) \n" 00264 " vector[row] /= matrix[row + row*matrix_internal_rows]; \n" 00265 " \n" 00266 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00267 " temp = vector[row]; \n" 00268 " //eliminate column with index 'row' in parallel: \n" 00269 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00270 " vector[elim] -= temp * matrix[elim + row * matrix_internal_rows]; \n" 00271 " } \n" 00272 " \n" 00273 "}\n" 00274 ; //matrix_col_align1_upper_triangular_substitute_inplace 00275 00276 const char * const matrix_col_align1_inplace_add = 00277 "__kernel void inplace_add(\n" 00278 " __global float * vec1,\n" 00279 " __global const float * vec2,\n" 00280 " unsigned int size) \n" 00281 "{ \n" 00282 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00283 " vec1[i] += vec2[i];\n" 00284 "}\n" 00285 ; //matrix_col_align1_inplace_add 00286 00287 const char * const matrix_col_align1_trans_vec_mul = 00288 "__kernel void trans_vec_mul(\n" 00289 " __global const float * matrix,\n" 00290 " unsigned int matrix_rows,\n" 00291 " unsigned int matrix_cols,\n" 00292 " unsigned int matrix_internal_rows,\n" 00293 " unsigned int matrix_internal_cols,\n" 00294 " __global const float * vector, \n" 00295 " __global float * result) \n" 00296 "{ \n" 00297 " //row and col indicate indices within transposed matrix\n" 00298 " for (unsigned int row = get_global_id(0); row < matrix_cols; row += get_global_size(0))\n" 00299 " {\n" 00300 " float dot_prod2 = 0.0f;\n" 00301 " for (unsigned int col = 0; col < matrix_rows; ++col)\n" 00302 " dot_prod2 += matrix[row * matrix_internal_rows + col] * vector[col];\n" 00303 " result[row] = dot_prod2;\n" 00304 " }\n" 00305 "}\n" 00306 ; //matrix_col_align1_trans_vec_mul 00307 00308 const char * const matrix_col_align1_trans_unit_lower_triangular_substitute_inplace = 00309 "\n" 00310 "__kernel void trans_unit_lower_triangular_substitute_inplace(\n" 00311 " __global const float * matrix,\n" 00312 " unsigned int matrix_rows,\n" 00313 " unsigned int matrix_cols,\n" 00314 " unsigned int matrix_internal_rows,\n" 00315 " unsigned int matrix_internal_cols,\n" 00316 " __global float * vector)\n" 00317 "{\n" 00318 " float temp;\n" 00319 " for (int row = 0; row < matrix_rows; ++row)\n" 00320 " {\n" 00321 " barrier(CLK_GLOBAL_MEM_FENCE);\n" 00322 "\n" 00323 " temp = vector[row];\n" 00324 "\n" 00325 " for (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n" 00326 " vector[elim] -= temp * matrix[elim * matrix_internal_rows + row];\n" 00327 " }\n" 00328 "}\n" 00329 "\n" 00330 "\n" 00331 ; //matrix_col_align1_trans_unit_lower_triangular_substitute_inplace 00332 00333 const char * const matrix_col_align1_cpu_inplace_mult = 00334 "__kernel void cpu_inplace_mult(\n" 00335 " __global float * vec,\n" 00336 " float factor, \n" 00337 " unsigned int size) \n" 00338 "{ \n" 00339 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00340 " vec[i] *= factor;\n" 00341 "}\n" 00342 ; //matrix_col_align1_cpu_inplace_mult 00343 00344 const char * const matrix_col_align1_clear = 00345 "__kernel void clear(\n" 00346 " __global float * vec,\n" 00347 " unsigned int size) \n" 00348 "{ \n" 00349 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00350 " vec[i] = 0;\n" 00351 "}\n" 00352 ; //matrix_col_align1_clear 00353 00354 const char * const matrix_col_align1_inplace_mult = 00355 "__kernel void inplace_mult(\n" 00356 " __global float * vec,\n" 00357 " __global const float * fac, \n" 00358 " unsigned int size) \n" 00359 "{ \n" 00360 " float factor = *fac;\n" 00361 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00362 " vec[i] *= factor;\n" 00363 "}\n" 00364 ; //matrix_col_align1_inplace_mult 00365 00366 const char * const matrix_col_align1_unit_upper_triangular_substitute_inplace = 00367 "__kernel void unit_upper_triangular_substitute_inplace( \n" 00368 " __global const float * matrix, \n" 00369 " unsigned int matrix_rows,\n" 00370 " unsigned int matrix_cols,\n" 00371 " unsigned int matrix_internal_rows,\n" 00372 " unsigned int matrix_internal_cols,\n" 00373 " __global float * vector) \n" 00374 "{ \n" 00375 " float temp; \n" 00376 " for (int row = matrix_rows-1; row > -1; --row) \n" 00377 " { \n" 00378 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00379 " \n" 00380 " temp = vector[row]; \n" 00381 " //eliminate column with index 'row' in parallel: \n" 00382 " for (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n" 00383 " vector[elim] -= temp * matrix[elim + row * matrix_internal_rows]; \n" 00384 " } \n" 00385 " \n" 00386 "}\n" 00387 ; //matrix_col_align1_unit_upper_triangular_substitute_inplace 00388 00389 const char * const matrix_col_align1_add = 00390 "__kernel void add(\n" 00391 " __global const float * vec1,\n" 00392 " __global const float * vec2, \n" 00393 " __global float * result,\n" 00394 " unsigned int size) \n" 00395 "{ \n" 00396 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00397 " result[i] = vec1[i] + vec2[i];\n" 00398 "}\n" 00399 ; //matrix_col_align1_add 00400 00401 } //namespace kernels 00402 } //namespace linalg 00403 } //namespace viennacl 00404 #endif
1.7.6.1