|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 #ifndef _VIENNACL_MATRIX_SOLVE_ROW_ROW_SOURCE_HPP_ 00002 #define _VIENNACL_MATRIX_SOLVE_ROW_ROW_SOURCE_HPP_ 00003 //Automatically generated file from aux-directory, do not edit manually! 00004 namespace viennacl 00005 { 00006 namespace linalg 00007 { 00008 namespace kernels 00009 { 00010 const char * const matrix_solve_row_row_align1_lower_trans_solve = 00011 "// file automatically generated - do not edit!\n" 00012 "// inplace solve A \\ B^T\n" 00013 "// matrix layouts: A...row_major, B...row_major\n" 00014 "__kernel void lower_trans_solve(\n" 00015 " __global const float * A,\n" 00016 " unsigned int A_rows,\n" 00017 " unsigned int A_cols,\n" 00018 " unsigned int A_internal_rows,\n" 00019 " unsigned int A_internal_cols,\n" 00020 " __global float * B, \n" 00021 " unsigned int B_rows,\n" 00022 " unsigned int B_cols,\n" 00023 " unsigned int B_internal_rows,\n" 00024 " unsigned int B_internal_cols)\n" 00025 "{ \n" 00026 " float temp; \n" 00027 " for (int row = 0; row < A_rows; ++row) \n" 00028 " { \n" 00029 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00030 " if (get_local_id(0) == 0) \n" 00031 " B[row + get_group_id(0) * B_internal_cols] /= A[row + row*A_internal_cols]; \n" 00032 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00033 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00034 " //eliminate column of op(A) with index 'row' in parallel: \n" 00035 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00036 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim * A_internal_cols + row];\n" 00037 " }\n" 00038 "}\n" 00039 ; //matrix_solve_row_row_align1_lower_trans_solve 00040 00041 const char * const matrix_solve_row_row_align1_unit_lower_solve = 00042 "// file automatically generated - do not edit!\n" 00043 "// inplace solve A \\ B\n" 00044 "// matrix layouts: A...row_major, B...row_major\n" 00045 "__kernel void unit_lower_solve(\n" 00046 " __global const float * A,\n" 00047 " unsigned int A_rows,\n" 00048 " unsigned int A_cols,\n" 00049 " unsigned int A_internal_rows,\n" 00050 " unsigned int A_internal_cols,\n" 00051 " __global float * B, \n" 00052 " unsigned int B_rows,\n" 00053 " unsigned int B_cols,\n" 00054 " unsigned int B_internal_rows,\n" 00055 " unsigned int B_internal_cols)\n" 00056 "{ \n" 00057 " float temp; \n" 00058 " for (int row = 0; row < A_rows; ++row) \n" 00059 " { \n" 00060 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00061 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00062 " //eliminate column of op(A) with index 'row' in parallel: \n" 00063 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00064 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n" 00065 " }\n" 00066 "}\n" 00067 ; //matrix_solve_row_row_align1_unit_lower_solve 00068 00069 const char * const matrix_solve_row_row_align1_trans_unit_upper_trans_solve = 00070 "// file automatically generated - do not edit!\n" 00071 "// inplace solve A^T \\ B^T\n" 00072 "// matrix layouts: A...row_major, B...row_major\n" 00073 "__kernel void trans_unit_upper_trans_solve(\n" 00074 " __global const float * A,\n" 00075 " unsigned int A_rows,\n" 00076 " unsigned int A_cols,\n" 00077 " unsigned int A_internal_rows,\n" 00078 " unsigned int A_internal_cols,\n" 00079 " __global float * B, \n" 00080 " unsigned int B_rows,\n" 00081 " unsigned int B_cols,\n" 00082 " unsigned int B_internal_rows,\n" 00083 " unsigned int B_internal_cols)\n" 00084 "{ \n" 00085 " float temp; \n" 00086 " for (int row = A_rows-1; row > -1; --row) \n" 00087 " { \n" 00088 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00089 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00090 " //eliminate column of op(A) with index 'row' in parallel: \n" 00091 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00092 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim + row * A_internal_cols];\n" 00093 " }\n" 00094 "}\n" 00095 ; //matrix_solve_row_row_align1_trans_unit_upper_trans_solve 00096 00097 const char * const matrix_solve_row_row_align1_unit_upper_trans_solve = 00098 "// file automatically generated - do not edit!\n" 00099 "// inplace solve A \\ B^T\n" 00100 "// matrix layouts: A...row_major, B...row_major\n" 00101 "__kernel void unit_upper_trans_solve(\n" 00102 " __global const float * A,\n" 00103 " unsigned int A_rows,\n" 00104 " unsigned int A_cols,\n" 00105 " unsigned int A_internal_rows,\n" 00106 " unsigned int A_internal_cols,\n" 00107 " __global float * B, \n" 00108 " unsigned int B_rows,\n" 00109 " unsigned int B_cols,\n" 00110 " unsigned int B_internal_rows,\n" 00111 " unsigned int B_internal_cols)\n" 00112 "{ \n" 00113 " float temp; \n" 00114 " for (int row = A_rows-1; row > -1; --row) \n" 00115 " { \n" 00116 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00117 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00118 " //eliminate column of op(A) with index 'row' in parallel: \n" 00119 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00120 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim * A_internal_cols + row];\n" 00121 " }\n" 00122 "}\n" 00123 ; //matrix_solve_row_row_align1_unit_upper_trans_solve 00124 00125 const char * const matrix_solve_row_row_align1_unit_lower_trans_solve = 00126 "// file automatically generated - do not edit!\n" 00127 "// inplace solve A \\ B^T\n" 00128 "// matrix layouts: A...row_major, B...row_major\n" 00129 "__kernel void unit_lower_trans_solve(\n" 00130 " __global const float * A,\n" 00131 " unsigned int A_rows,\n" 00132 " unsigned int A_cols,\n" 00133 " unsigned int A_internal_rows,\n" 00134 " unsigned int A_internal_cols,\n" 00135 " __global float * B, \n" 00136 " unsigned int B_rows,\n" 00137 " unsigned int B_cols,\n" 00138 " unsigned int B_internal_rows,\n" 00139 " unsigned int B_internal_cols)\n" 00140 "{ \n" 00141 " float temp; \n" 00142 " for (int row = 0; row < A_rows; ++row) \n" 00143 " { \n" 00144 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00145 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00146 " //eliminate column of op(A) with index 'row' in parallel: \n" 00147 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00148 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim * A_internal_cols + row];\n" 00149 " }\n" 00150 "}\n" 00151 ; //matrix_solve_row_row_align1_unit_lower_trans_solve 00152 00153 const char * const matrix_solve_row_row_align1_trans_upper_trans_solve = 00154 "// file automatically generated - do not edit!\n" 00155 "// inplace solve A^T \\ B^T\n" 00156 "// matrix layouts: A...row_major, B...row_major\n" 00157 "__kernel void trans_upper_trans_solve(\n" 00158 " __global const float * A,\n" 00159 " unsigned int A_rows,\n" 00160 " unsigned int A_cols,\n" 00161 " unsigned int A_internal_rows,\n" 00162 " unsigned int A_internal_cols,\n" 00163 " __global float * B, \n" 00164 " unsigned int B_rows,\n" 00165 " unsigned int B_cols,\n" 00166 " unsigned int B_internal_rows,\n" 00167 " unsigned int B_internal_cols)\n" 00168 "{ \n" 00169 " float temp; \n" 00170 " for (int row = A_rows-1; row > -1; --row) \n" 00171 " { \n" 00172 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00173 " if (get_local_id(0) == 0) \n" 00174 " B[row + get_group_id(0) * B_internal_cols] /= A[row + row*A_internal_cols]; \n" 00175 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00176 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00177 " //eliminate column of op(A) with index 'row' in parallel: \n" 00178 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00179 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim + row * A_internal_cols];\n" 00180 " }\n" 00181 "}\n" 00182 ; //matrix_solve_row_row_align1_trans_upper_trans_solve 00183 00184 const char * const matrix_solve_row_row_align1_upper_solve = 00185 "// file automatically generated - do not edit!\n" 00186 "// inplace solve A \\ B\n" 00187 "// matrix layouts: A...row_major, B...row_major\n" 00188 "__kernel void upper_solve(\n" 00189 " __global const float * A,\n" 00190 " unsigned int A_rows,\n" 00191 " unsigned int A_cols,\n" 00192 " unsigned int A_internal_rows,\n" 00193 " unsigned int A_internal_cols,\n" 00194 " __global float * B, \n" 00195 " unsigned int B_rows,\n" 00196 " unsigned int B_cols,\n" 00197 " unsigned int B_internal_rows,\n" 00198 " unsigned int B_internal_cols)\n" 00199 "{ \n" 00200 " float temp; \n" 00201 " for (int row = A_rows-1; row > -1; --row) \n" 00202 " { \n" 00203 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00204 " if (get_local_id(0) == 0) \n" 00205 " B[row * B_internal_cols + get_group_id(0)] /= A[row + row*A_internal_cols]; \n" 00206 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00207 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00208 " //eliminate column of op(A) with index 'row' in parallel: \n" 00209 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00210 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n" 00211 " }\n" 00212 "}\n" 00213 ; //matrix_solve_row_row_align1_upper_solve 00214 00215 const char * const matrix_solve_row_row_align1_lower_solve = 00216 "// file automatically generated - do not edit!\n" 00217 "// inplace solve A \\ B\n" 00218 "// matrix layouts: A...row_major, B...row_major\n" 00219 "__kernel void lower_solve(\n" 00220 " __global const float * A,\n" 00221 " unsigned int A_rows,\n" 00222 " unsigned int A_cols,\n" 00223 " unsigned int A_internal_rows,\n" 00224 " unsigned int A_internal_cols,\n" 00225 " __global float * B, \n" 00226 " unsigned int B_rows,\n" 00227 " unsigned int B_cols,\n" 00228 " unsigned int B_internal_rows,\n" 00229 " unsigned int B_internal_cols)\n" 00230 "{ \n" 00231 " float temp; \n" 00232 " for (int row = 0; row < A_rows; ++row) \n" 00233 " { \n" 00234 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00235 " if (get_local_id(0) == 0) \n" 00236 " B[row * B_internal_cols + get_group_id(0)] /= A[row + row*A_internal_cols]; \n" 00237 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00238 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00239 " //eliminate column of op(A) with index 'row' in parallel: \n" 00240 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00241 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n" 00242 " }\n" 00243 "}\n" 00244 ; //matrix_solve_row_row_align1_lower_solve 00245 00246 const char * const matrix_solve_row_row_align1_trans_unit_lower_solve = 00247 "// file automatically generated - do not edit!\n" 00248 "// inplace solve A^T \\ B\n" 00249 "// matrix layouts: A...row_major, B...row_major\n" 00250 "__kernel void trans_unit_lower_solve(\n" 00251 " __global const float * A,\n" 00252 " unsigned int A_rows,\n" 00253 " unsigned int A_cols,\n" 00254 " unsigned int A_internal_rows,\n" 00255 " unsigned int A_internal_cols,\n" 00256 " __global float * B, \n" 00257 " unsigned int B_rows,\n" 00258 " unsigned int B_cols,\n" 00259 " unsigned int B_internal_rows,\n" 00260 " unsigned int B_internal_cols)\n" 00261 "{ \n" 00262 " float temp; \n" 00263 " for (int row = 0; row < A_rows; ++row) \n" 00264 " { \n" 00265 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00266 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00267 " //eliminate column of op(A) with index 'row' in parallel: \n" 00268 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00269 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n" 00270 " }\n" 00271 "}\n" 00272 ; //matrix_solve_row_row_align1_trans_unit_lower_solve 00273 00274 const char * const matrix_solve_row_row_align1_trans_lower_trans_solve = 00275 "// file automatically generated - do not edit!\n" 00276 "// inplace solve A^T \\ B^T\n" 00277 "// matrix layouts: A...row_major, B...row_major\n" 00278 "__kernel void trans_lower_trans_solve(\n" 00279 " __global const float * A,\n" 00280 " unsigned int A_rows,\n" 00281 " unsigned int A_cols,\n" 00282 " unsigned int A_internal_rows,\n" 00283 " unsigned int A_internal_cols,\n" 00284 " __global float * B, \n" 00285 " unsigned int B_rows,\n" 00286 " unsigned int B_cols,\n" 00287 " unsigned int B_internal_rows,\n" 00288 " unsigned int B_internal_cols)\n" 00289 "{ \n" 00290 " float temp; \n" 00291 " for (int row = 0; row < A_rows; ++row) \n" 00292 " { \n" 00293 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00294 " if (get_local_id(0) == 0) \n" 00295 " B[row + get_group_id(0) * B_internal_cols] /= A[row + row*A_internal_cols]; \n" 00296 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00297 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00298 " //eliminate column of op(A) with index 'row' in parallel: \n" 00299 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00300 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim + row * A_internal_cols];\n" 00301 " }\n" 00302 "}\n" 00303 ; //matrix_solve_row_row_align1_trans_lower_trans_solve 00304 00305 const char * const matrix_solve_row_row_align1_trans_lower_solve = 00306 "// file automatically generated - do not edit!\n" 00307 "// inplace solve A^T \\ B\n" 00308 "// matrix layouts: A...row_major, B...row_major\n" 00309 "__kernel void trans_lower_solve(\n" 00310 " __global const float * A,\n" 00311 " unsigned int A_rows,\n" 00312 " unsigned int A_cols,\n" 00313 " unsigned int A_internal_rows,\n" 00314 " unsigned int A_internal_cols,\n" 00315 " __global float * B, \n" 00316 " unsigned int B_rows,\n" 00317 " unsigned int B_cols,\n" 00318 " unsigned int B_internal_rows,\n" 00319 " unsigned int B_internal_cols)\n" 00320 "{ \n" 00321 " float temp; \n" 00322 " for (int row = 0; row < A_rows; ++row) \n" 00323 " { \n" 00324 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00325 " if (get_local_id(0) == 0) \n" 00326 " B[row * B_internal_cols + get_group_id(0)] /= A[row + row*A_internal_cols]; \n" 00327 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00328 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00329 " //eliminate column of op(A) with index 'row' in parallel: \n" 00330 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00331 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n" 00332 " }\n" 00333 "}\n" 00334 ; //matrix_solve_row_row_align1_trans_lower_solve 00335 00336 const char * const matrix_solve_row_row_align1_unit_upper_solve = 00337 "// file automatically generated - do not edit!\n" 00338 "// inplace solve A \\ B\n" 00339 "// matrix layouts: A...row_major, B...row_major\n" 00340 "__kernel void unit_upper_solve(\n" 00341 " __global const float * A,\n" 00342 " unsigned int A_rows,\n" 00343 " unsigned int A_cols,\n" 00344 " unsigned int A_internal_rows,\n" 00345 " unsigned int A_internal_cols,\n" 00346 " __global float * B, \n" 00347 " unsigned int B_rows,\n" 00348 " unsigned int B_cols,\n" 00349 " unsigned int B_internal_rows,\n" 00350 " unsigned int B_internal_cols)\n" 00351 "{ \n" 00352 " float temp; \n" 00353 " for (int row = A_rows-1; row > -1; --row) \n" 00354 " { \n" 00355 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00356 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00357 " //eliminate column of op(A) with index 'row' in parallel: \n" 00358 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00359 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim * A_internal_cols + row];\n" 00360 " }\n" 00361 "}\n" 00362 ; //matrix_solve_row_row_align1_unit_upper_solve 00363 00364 const char * const matrix_solve_row_row_align1_trans_upper_solve = 00365 "// file automatically generated - do not edit!\n" 00366 "// inplace solve A^T \\ B\n" 00367 "// matrix layouts: A...row_major, B...row_major\n" 00368 "__kernel void trans_upper_solve(\n" 00369 " __global const float * A,\n" 00370 " unsigned int A_rows,\n" 00371 " unsigned int A_cols,\n" 00372 " unsigned int A_internal_rows,\n" 00373 " unsigned int A_internal_cols,\n" 00374 " __global float * B, \n" 00375 " unsigned int B_rows,\n" 00376 " unsigned int B_cols,\n" 00377 " unsigned int B_internal_rows,\n" 00378 " unsigned int B_internal_cols)\n" 00379 "{ \n" 00380 " float temp; \n" 00381 " for (int row = A_rows-1; row > -1; --row) \n" 00382 " { \n" 00383 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00384 " if (get_local_id(0) == 0) \n" 00385 " B[row * B_internal_cols + get_group_id(0)] /= A[row + row*A_internal_cols]; \n" 00386 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00387 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00388 " //eliminate column of op(A) with index 'row' in parallel: \n" 00389 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00390 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n" 00391 " }\n" 00392 "}\n" 00393 ; //matrix_solve_row_row_align1_trans_upper_solve 00394 00395 const char * const matrix_solve_row_row_align1_upper_trans_solve = 00396 "// file automatically generated - do not edit!\n" 00397 "// inplace solve A \\ B^T\n" 00398 "// matrix layouts: A...row_major, B...row_major\n" 00399 "__kernel void upper_trans_solve(\n" 00400 " __global const float * A,\n" 00401 " unsigned int A_rows,\n" 00402 " unsigned int A_cols,\n" 00403 " unsigned int A_internal_rows,\n" 00404 " unsigned int A_internal_cols,\n" 00405 " __global float * B, \n" 00406 " unsigned int B_rows,\n" 00407 " unsigned int B_cols,\n" 00408 " unsigned int B_internal_rows,\n" 00409 " unsigned int B_internal_cols)\n" 00410 "{ \n" 00411 " float temp; \n" 00412 " for (int row = A_rows-1; row > -1; --row) \n" 00413 " { \n" 00414 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00415 " if (get_local_id(0) == 0) \n" 00416 " B[row + get_group_id(0) * B_internal_cols] /= A[row + row*A_internal_cols]; \n" 00417 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00418 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00419 " //eliminate column of op(A) with index 'row' in parallel: \n" 00420 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00421 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim * A_internal_cols + row];\n" 00422 " }\n" 00423 "}\n" 00424 ; //matrix_solve_row_row_align1_upper_trans_solve 00425 00426 const char * const matrix_solve_row_row_align1_trans_unit_lower_trans_solve = 00427 "// file automatically generated - do not edit!\n" 00428 "// inplace solve A^T \\ B^T\n" 00429 "// matrix layouts: A...row_major, B...row_major\n" 00430 "__kernel void trans_unit_lower_trans_solve(\n" 00431 " __global const float * A,\n" 00432 " unsigned int A_rows,\n" 00433 " unsigned int A_cols,\n" 00434 " unsigned int A_internal_rows,\n" 00435 " unsigned int A_internal_cols,\n" 00436 " __global float * B, \n" 00437 " unsigned int B_rows,\n" 00438 " unsigned int B_cols,\n" 00439 " unsigned int B_internal_rows,\n" 00440 " unsigned int B_internal_cols)\n" 00441 "{ \n" 00442 " float temp; \n" 00443 " for (int row = 0; row < A_rows; ++row) \n" 00444 " { \n" 00445 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00446 " temp = B[row + get_group_id(0) * B_internal_cols]; \n" 00447 " //eliminate column of op(A) with index 'row' in parallel: \n" 00448 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n" 00449 " B[elim + get_group_id(0) * B_internal_cols] -= temp * A[elim + row * A_internal_cols];\n" 00450 " }\n" 00451 "}\n" 00452 ; //matrix_solve_row_row_align1_trans_unit_lower_trans_solve 00453 00454 const char * const matrix_solve_row_row_align1_trans_unit_upper_solve = 00455 "// file automatically generated - do not edit!\n" 00456 "// inplace solve A^T \\ B\n" 00457 "// matrix layouts: A...row_major, B...row_major\n" 00458 "__kernel void trans_unit_upper_solve(\n" 00459 " __global const float * A,\n" 00460 " unsigned int A_rows,\n" 00461 " unsigned int A_cols,\n" 00462 " unsigned int A_internal_rows,\n" 00463 " unsigned int A_internal_cols,\n" 00464 " __global float * B, \n" 00465 " unsigned int B_rows,\n" 00466 " unsigned int B_cols,\n" 00467 " unsigned int B_internal_rows,\n" 00468 " unsigned int B_internal_cols)\n" 00469 "{ \n" 00470 " float temp; \n" 00471 " for (int row = A_rows-1; row > -1; --row) \n" 00472 " { \n" 00473 " barrier(CLK_GLOBAL_MEM_FENCE); \n" 00474 " temp = B[row * B_internal_cols + get_group_id(0)]; \n" 00475 " //eliminate column of op(A) with index 'row' in parallel: \n" 00476 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n" 00477 " B[elim * B_internal_cols + get_group_id(0)] -= temp * A[elim + row * A_internal_cols];\n" 00478 " }\n" 00479 "}\n" 00480 ; //matrix_solve_row_row_align1_trans_unit_upper_solve 00481 00482 } //namespace kernels 00483 } //namespace linalg 00484 } //namespace viennacl 00485 #endif
1.7.6.1