ViennaCL - The Vienna Computing Library  1.1.2
/build/buildd/viennacl-1.1.2/viennacl/linalg/kernels/matrix_col_source.h
Go to the documentation of this file.
00001 #ifndef _VIENNACL_MATRIX_COL_SOURCE_HPP_
00002 #define _VIENNACL_MATRIX_COL_SOURCE_HPP_
00003 //Automatically generated file from aux-directory, do not edit manually!
00004 namespace viennacl
00005 {
00006  namespace linalg
00007  {
00008   namespace kernels
00009   {
00010 const char * const matrix_col_align1_inplace_divide = 
00011 "__kernel void inplace_divide(\n"
00012 "          __global float * vec,\n"
00013 "          __global const float * fac,  //note: CPU variant is mapped to prod_scalar\n"
00014 "          unsigned int size) \n"
00015 "{ \n"
00016 "  float factor = *fac;\n"
00017 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00018 "    vec[i] /= factor;\n"
00019 "}\n"
00020 ; //matrix_col_align1_inplace_divide
00021 
00022 const char * const matrix_col_align1_trans_lower_triangular_substitute_inplace = 
00023 "__kernel void trans_lower_triangular_substitute_inplace(\n"
00024 "          __global const float * matrix,\n"
00025 "          unsigned int matrix_rows,\n"
00026 "          unsigned int matrix_cols,\n"
00027 "          unsigned int matrix_internal_rows,\n"
00028 "          unsigned int matrix_internal_cols,\n"
00029 "          __global float * vector)\n"
00030 "{\n"
00031 "  float temp;\n"
00032 "  for (int row = 0; row < matrix_rows; ++row)\n"
00033 "  {\n"
00034 "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
00035 "    if (get_global_id(0) == 0)\n"
00036 "      vector[row] /= matrix[row+row*matrix_internal_rows];\n"
00037 "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
00038 "    temp = vector[row];\n"
00039 "    for  (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n"
00040 "      vector[elim] -= temp * matrix[elim * matrix_internal_rows + row];\n"
00041 "  }\n"
00042 "}\n"
00043 ; //matrix_col_align1_trans_lower_triangular_substitute_inplace
00044 
00045 const char * const matrix_col_align1_trans_unit_upper_triangular_substitute_inplace = 
00046 "//transposed lower triangular matrix\n"
00047 "__kernel void trans_unit_upper_triangular_substitute_inplace(\n"
00048 "          __global const float * matrix, \n"
00049 "          unsigned int matrix_rows,\n"
00050 "          unsigned int matrix_cols,\n"
00051 "          unsigned int matrix_internal_rows,\n"
00052 "          unsigned int matrix_internal_cols,\n"
00053 "          __global float * vector) \n"
00054 "{ \n"
00055 "  float temp; \n"
00056 "  for (int row = matrix_rows-1; row > -1; --row) \n"
00057 "  { \n"
00058 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00059 "    \n"
00060 "    temp = vector[row]; \n"
00061 "    //eliminate column with index 'row' in parallel: \n"
00062 "    for  (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n"
00063 "      vector[elim] -= temp * matrix[row + elim  * matrix_internal_rows]; \n"
00064 "  } \n"
00065 "   \n"
00066 "}\n"
00067 ; //matrix_col_align1_trans_unit_upper_triangular_substitute_inplace
00068 
00069 const char * const matrix_col_align1_sub = 
00070 "__kernel void sub(\n"
00071 "          __global const float * vec1,\n"
00072 "          __global const float * vec2, \n"
00073 "          __global float * result,\n"
00074 "          unsigned int size)\n"
00075 "{ \n"
00076 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00077 "    result[i] = vec1[i] - vec2[i];\n"
00078 "}\n"
00079 ; //matrix_col_align1_sub
00080 
00081 const char * const matrix_col_align1_scaled_rank1_update = 
00082 "__kernel void scaled_rank1_update(\n"
00083 "          __global float * matrix,\n"
00084 "          unsigned int matrix_rows,\n"
00085 "          unsigned int matrix_cols,\n"
00086 "          unsigned int matrix_internal_rows,\n"
00087 "          unsigned int matrix_internal_cols,\n"
00088 "          float val,\n"
00089 "          __global const float * vector1,  \n"
00090 "          __global const float * vector2) \n"
00091 "{ \n"
00092 "  float tmp;\n"
00093 "  for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n"
00094 "  {\n"
00095 "    tmp = val * vector1[row];\n"
00096 "    for (unsigned int col = 0; col < matrix_cols; ++col)\n"
00097 "      matrix[row + col*matrix_internal_rows] += tmp * vector2[col];\n"
00098 "  }\n"
00099 "}\n"
00100 ; //matrix_col_align1_scaled_rank1_update
00101 
00102 const char * const matrix_col_align1_inplace_sub = 
00103 "__kernel void inplace_sub(\n"
00104 "          __global float * vec1,\n"
00105 "          __global const float * vec2,\n"
00106 "          unsigned int size) \n"
00107 "{ \n"
00108 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00109 "    vec1[i] -= vec2[i];\n"
00110 "}\n"
00111 ; //matrix_col_align1_inplace_sub
00112 
00113 const char * const matrix_col_align1_lower_triangular_substitute_inplace = 
00114 "__kernel void lower_triangular_substitute_inplace(\n"
00115 "          __global const float * matrix,\n"
00116 "          unsigned int matrix_rows,\n"
00117 "          unsigned int matrix_cols,\n"
00118 "          unsigned int matrix_internal_rows,\n"
00119 "          unsigned int matrix_internal_cols,\n"
00120 "          __global float * vector)\n"
00121 "{\n"
00122 "  float temp;\n"
00123 "  for (int row = 0; row < matrix_rows; ++row)\n"
00124 "  {\n"
00125 "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
00126 "    if (get_global_id(0) == 0)\n"
00127 "      vector[row] /= matrix[row+row*matrix_internal_rows];\n"
00128 "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
00129 "    temp = vector[row];\n"
00130 "    for  (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n"
00131 "      vector[elim] -= temp * matrix[row * matrix_internal_rows + elim];\n"
00132 "  }\n"
00133 "}\n"
00134 ; //matrix_col_align1_lower_triangular_substitute_inplace
00135 
00136 const char * const matrix_col_align1_vec_mul = 
00137 "__kernel void vec_mul(\n"
00138 "          __global const float * matrix,\n"
00139 "          unsigned int matrix_rows,\n"
00140 "          unsigned int matrix_cols,\n"
00141 "          unsigned int matrix_internal_rows,\n"
00142 "          unsigned int matrix_internal_cols,\n"
00143 "          __global const float * vector,  \n"
00144 "          __global float * result) \n"
00145 "{ \n"
00146 "  for (unsigned int row = get_global_id(0); row < matrix_rows; row += get_global_size(0))\n"
00147 "  {\n"
00148 "    float dot_prod = 0.0f;\n"
00149 "    for (unsigned int col = 0; col < matrix_cols; ++col)\n"
00150 "      dot_prod += matrix[row + col*matrix_internal_rows] * vector[col];\n"
00151 "    result[row] = dot_prod;\n"
00152 "  }\n"
00153 "}\n"
00154 ; //matrix_col_align1_vec_mul
00155 
00156 const char * const matrix_col_align1_lu_factorize = 
00157 "__kernel void lu_factorize(\n"
00158 "          __global float * matrix,\n"
00159 "          unsigned int matrix_rows,\n"
00160 "          unsigned int matrix_cols,\n"
00161 "          unsigned int matrix_internal_rows,\n"
00162 "          unsigned int matrix_internal_cols) \n"
00163 "{ \n"
00164 "  float temp;\n"
00165 "  for (unsigned int i=1; i<matrix_rows; ++i)\n"
00166 "  {\n"
00167 "    for (unsigned int k=0; k<i; ++k)\n"
00168 "    {\n"
00169 "      if (get_global_id(0) == 0)\n"
00170 "        matrix[i + k*matrix_internal_rows] /= matrix[k + k*matrix_internal_rows];\n"
00171 "      barrier(CLK_GLOBAL_MEM_FENCE);\n"
00172 "      temp = matrix[i + k*matrix_internal_rows];\n"
00173 "      \n"
00174 "      //parallel subtraction:\n"
00175 "      for (unsigned int j=k+1 + get_global_id(0); j<matrix_cols; j += get_global_size(0))\n"
00176 "        matrix[i + j*matrix_internal_rows] -= temp * matrix[k + j*matrix_internal_rows];\n"
00177 "    }\n"
00178 "  }\n"
00179 "} \n"
00180 ; //matrix_col_align1_lu_factorize
00181 
00182 const char * const matrix_col_align1_rank1_update = 
00183 "//perform a rank-1 update of the matrix, i.e. A += x * x^T\n"
00184 "__kernel void rank1_update(\n"
00185 "          __global float * matrix,\n"
00186 "          unsigned int matrix_rows,\n"
00187 "          unsigned int matrix_cols,\n"
00188 "          unsigned int matrix_internal_rows,\n"
00189 "          unsigned int matrix_internal_cols,\n"
00190 "          __global const float * vector1,  \n"
00191 "          __global const float * vector2) \n"
00192 "{ \n"
00193 "  float tmp;\n"
00194 "  for (unsigned int row= get_global_id(0); row < matrix_rows; row += get_global_size(0))\n"
00195 "  {\n"
00196 "    tmp = vector1[row];\n"
00197 "    for (unsigned int col = 0; col < matrix_cols; ++col)\n"
00198 "      matrix[row + col * matrix_internal_rows] += tmp * vector2[col];\n"
00199 "  }\n"
00200 "}\n"
00201 ; //matrix_col_align1_rank1_update
00202 
00203 const char * const matrix_col_align1_trans_upper_triangular_substitute_inplace = 
00204 "//transposed lower triangular matrix\n"
00205 "__kernel void trans_upper_triangular_substitute_inplace(\n"
00206 "          __global const float * matrix, \n"
00207 "          unsigned int matrix_rows,\n"
00208 "          unsigned int matrix_cols,\n"
00209 "          unsigned int matrix_internal_rows,\n"
00210 "          unsigned int matrix_internal_cols,\n"
00211 "          __global float * vector) \n"
00212 "{ \n"
00213 "  float temp; \n"
00214 "  for (int row = matrix_rows-1; row > -1; --row) \n"
00215 "  { \n"
00216 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00217 "    if (get_global_id(0) == 0) \n"
00218 "      vector[row] /= matrix[row + row*matrix_internal_rows]; \n"
00219 " \n"
00220 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00221 "    temp = vector[row]; \n"
00222 "    //eliminate column with index 'row' in parallel: \n"
00223 "    for  (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n"
00224 "      vector[elim] -= temp * matrix[row + elim * matrix_internal_rows]; \n"
00225 "  } \n"
00226 "   \n"
00227 "}\n"
00228 ; //matrix_col_align1_trans_upper_triangular_substitute_inplace
00229 
00230 const char * const matrix_col_align1_unit_lower_triangular_substitute_inplace = 
00231 "__kernel void unit_lower_triangular_substitute_inplace(\n"
00232 "          __global const float * matrix,\n"
00233 "          unsigned int matrix_rows,\n"
00234 "          unsigned int matrix_cols,\n"
00235 "          unsigned int matrix_internal_rows,\n"
00236 "          unsigned int matrix_internal_cols,\n"
00237 "          __global float * vector)\n"
00238 "{\n"
00239 "  float temp;\n"
00240 "  for (int row = 0; row < matrix_rows; ++row)\n"
00241 "  {\n"
00242 "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
00243 "    temp = vector[row];\n"
00244 "    for  (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n"
00245 "      vector[elim] -= temp * matrix[row * matrix_internal_rows + elim];\n"
00246 "  }\n"
00247 "}\n"
00248 ; //matrix_col_align1_unit_lower_triangular_substitute_inplace
00249 
00250 const char * const matrix_col_align1_upper_triangular_substitute_inplace = 
00251 "__kernel void upper_triangular_substitute_inplace( \n"
00252 "          __global const float * matrix, \n"
00253 "          unsigned int matrix_rows,\n"
00254 "          unsigned int matrix_cols,\n"
00255 "          unsigned int matrix_internal_rows,\n"
00256 "          unsigned int matrix_internal_cols,\n"
00257 "          __global float * vector) \n"
00258 "{ \n"
00259 "  float temp; \n"
00260 "  for (int row = matrix_rows-1; row > -1; --row) \n"
00261 "  { \n"
00262 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00263 "    if (get_global_id(0) == 0) \n"
00264 "      vector[row] /= matrix[row + row*matrix_internal_rows]; \n"
00265 " \n"
00266 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00267 "    temp = vector[row]; \n"
00268 "    //eliminate column with index 'row' in parallel: \n"
00269 "    for  (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n"
00270 "      vector[elim] -= temp * matrix[elim + row  * matrix_internal_rows]; \n"
00271 "  } \n"
00272 "   \n"
00273 "}\n"
00274 ; //matrix_col_align1_upper_triangular_substitute_inplace
00275 
00276 const char * const matrix_col_align1_inplace_add = 
00277 "__kernel void inplace_add(\n"
00278 "          __global float * vec1,\n"
00279 "          __global const float * vec2,\n"
00280 "          unsigned int size) \n"
00281 "{ \n"
00282 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00283 "    vec1[i] += vec2[i];\n"
00284 "}\n"
00285 ; //matrix_col_align1_inplace_add
00286 
00287 const char * const matrix_col_align1_trans_vec_mul = 
00288 "__kernel void trans_vec_mul(\n"
00289 "          __global const float * matrix,\n"
00290 "          unsigned int matrix_rows,\n"
00291 "          unsigned int matrix_cols,\n"
00292 "          unsigned int matrix_internal_rows,\n"
00293 "          unsigned int matrix_internal_cols,\n"
00294 "          __global const float * vector,  \n"
00295 "          __global float * result) \n"
00296 "{ \n"
00297 "  //row and col indicate indices within transposed matrix\n"
00298 "  for (unsigned int row = get_global_id(0); row < matrix_cols; row += get_global_size(0))\n"
00299 "  {\n"
00300 "    float dot_prod2 = 0.0f;\n"
00301 "    for (unsigned int col = 0; col < matrix_rows; ++col)\n"
00302 "      dot_prod2 += matrix[row * matrix_internal_rows + col] * vector[col];\n"
00303 "    result[row] = dot_prod2;\n"
00304 "  }\n"
00305 "}\n"
00306 ; //matrix_col_align1_trans_vec_mul
00307 
00308 const char * const matrix_col_align1_trans_unit_lower_triangular_substitute_inplace = 
00309 "\n"
00310 "__kernel void trans_unit_lower_triangular_substitute_inplace(\n"
00311 "          __global const float * matrix,\n"
00312 "          unsigned int matrix_rows,\n"
00313 "          unsigned int matrix_cols,\n"
00314 "          unsigned int matrix_internal_rows,\n"
00315 "          unsigned int matrix_internal_cols,\n"
00316 "          __global float * vector)\n"
00317 "{\n"
00318 "  float temp;\n"
00319 "  for (int row = 0; row < matrix_rows; ++row)\n"
00320 "  {\n"
00321 "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
00322 "\n"
00323 "    temp = vector[row];\n"
00324 "\n"
00325 "    for  (int elim = row + get_global_id(0) + 1; elim < matrix_rows; elim += get_global_size(0))\n"
00326 "      vector[elim] -= temp * matrix[elim * matrix_internal_rows + row];\n"
00327 "  }\n"
00328 "}\n"
00329 "\n"
00330 "\n"
00331 ; //matrix_col_align1_trans_unit_lower_triangular_substitute_inplace
00332 
00333 const char * const matrix_col_align1_cpu_inplace_mult = 
00334 "__kernel void cpu_inplace_mult(\n"
00335 "          __global float * vec,\n"
00336 "          float factor, \n"
00337 "          unsigned int size) \n"
00338 "{ \n"
00339 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00340 "    vec[i] *= factor;\n"
00341 "}\n"
00342 ; //matrix_col_align1_cpu_inplace_mult
00343 
00344 const char * const matrix_col_align1_clear = 
00345 "__kernel void clear(\n"
00346 "          __global float * vec,\n"
00347 "          unsigned int size) \n"
00348 "{ \n"
00349 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00350 "    vec[i] = 0;\n"
00351 "}\n"
00352 ; //matrix_col_align1_clear
00353 
00354 const char * const matrix_col_align1_inplace_mult = 
00355 "__kernel void inplace_mult(\n"
00356 "          __global float * vec,\n"
00357 "          __global const float * fac, \n"
00358 "          unsigned int size) \n"
00359 "{ \n"
00360 "  float factor = *fac;\n"
00361 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00362 "    vec[i] *= factor;\n"
00363 "}\n"
00364 ; //matrix_col_align1_inplace_mult
00365 
00366 const char * const matrix_col_align1_unit_upper_triangular_substitute_inplace = 
00367 "__kernel void unit_upper_triangular_substitute_inplace( \n"
00368 "          __global const float * matrix, \n"
00369 "          unsigned int matrix_rows,\n"
00370 "          unsigned int matrix_cols,\n"
00371 "          unsigned int matrix_internal_rows,\n"
00372 "          unsigned int matrix_internal_cols,\n"
00373 "          __global float * vector) \n"
00374 "{ \n"
00375 "  float temp; \n"
00376 "  for (int row = matrix_rows-1; row > -1; --row) \n"
00377 "  { \n"
00378 "    barrier(CLK_GLOBAL_MEM_FENCE); \n"
00379 "    \n"
00380 "    temp = vector[row]; \n"
00381 "    //eliminate column with index 'row' in parallel: \n"
00382 "    for  (int elim = get_global_id(0); elim < row; elim += get_global_size(0)) \n"
00383 "      vector[elim] -= temp * matrix[elim + row  * matrix_internal_rows]; \n"
00384 "  } \n"
00385 "   \n"
00386 "}\n"
00387 ; //matrix_col_align1_unit_upper_triangular_substitute_inplace
00388 
00389 const char * const matrix_col_align1_add = 
00390 "__kernel void add(\n"
00391 "          __global const float * vec1,\n"
00392 "          __global const float * vec2, \n"
00393 "          __global float * result,\n"
00394 "          unsigned int size) \n"
00395 "{ \n"
00396 "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00397 "    result[i] = vec1[i] + vec2[i];\n"
00398 "}\n"
00399 ; //matrix_col_align1_add
00400 
00401   }  //namespace kernels
00402  }  //namespace linalg
00403 }  //namespace viennacl
00404 #endif