|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 #ifndef _VIENNACL_VECTOR_KERNELS_HPP_ 00002 #define _VIENNACL_VECTOR_KERNELS_HPP_ 00003 #include "viennacl/tools/tools.hpp" 00004 #include "viennacl/ocl/kernel.hpp" 00005 #include "viennacl/ocl/platform.hpp" 00006 #include "viennacl/ocl/utils.hpp" 00007 #include "viennacl/linalg/kernels/vector_source.h" 00008 00009 //Automatically generated file from aux-directory, do not edit manually! 00010 namespace viennacl 00011 { 00012 namespace linalg 00013 { 00014 namespace kernels 00015 { 00016 template<class TYPE, unsigned int alignment> 00017 struct vector; 00018 00019 00021 template <> 00022 struct vector<float, 4> 00023 { 00024 static std::string program_name() 00025 { 00026 return "f_vector_4"; 00027 } 00028 static void init() 00029 { 00030 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply(); 00031 static std::map<cl_context, bool> init_done; 00032 viennacl::ocl::context & context_ = viennacl::ocl::current_context(); 00033 if (!init_done[context_.handle()]) 00034 { 00035 std::string source; 00036 source.append(vector_align1_inplace_divide); 00037 source.append(vector_align4_inplace_div_sub); 00038 source.append(vector_align1_vmax); 00039 source.append(vector_align1_index_norm_inf); 00040 source.append(vector_align1_sub); 00041 source.append(vector_align4_mul_add); 00042 source.append(vector_align1_inplace_sub); 00043 source.append(vector_align1_inner_prod); 00044 source.append(vector_align1_mult); 00045 source.append(vector_align1_diag_precond); 00046 source.append(vector_align4_inplace_mul_add); 00047 source.append(vector_align1_norm_1); 00048 source.append(vector_align1_divide); 00049 source.append(vector_align1_swap); 00050 source.append(vector_align1_norm_inf); 00051 source.append(vector_align4_inplace_div_add); 00052 source.append(vector_align1_sqrt_sum); 00053 source.append(vector_align1_inplace_add); 00054 source.append(vector_align1_mul_sub); 00055 source.append(vector_align1_sum); 00056 source.append(vector_align4_cpu_mul_add); 00057 source.append(vector_align1_cpu_mult); 00058 source.append(vector_align4_cpu_inplace_mul_add); 00059 source.append(vector_align1_cpu_inplace_mult); 00060 source.append(vector_align1_plane_rotation); 00061 source.append(vector_align1_clear); 00062 source.append(vector_align1_inplace_mult); 00063 source.append(vector_align4_inplace_mul_sub); 00064 source.append(vector_align1_norm_2); 00065 source.append(vector_align1_add); 00066 std::string prog_name = program_name(); 00067 #ifdef VIENNACL_BUILD_INFO 00068 std::cout << "Creating program " << prog_name << std::endl; 00069 #endif 00070 context_.add_program(source, prog_name); 00071 viennacl::ocl::program & prog_ = context_.get_program(prog_name); 00072 prog_.add_kernel("inplace_divide"); 00073 prog_.add_kernel("inplace_div_sub"); 00074 prog_.add_kernel("vmax"); 00075 prog_.add_kernel("index_norm_inf"); 00076 prog_.add_kernel("sub"); 00077 prog_.add_kernel("mul_add"); 00078 prog_.add_kernel("inplace_sub"); 00079 prog_.add_kernel("inner_prod"); 00080 prog_.add_kernel("mult"); 00081 prog_.add_kernel("diag_precond"); 00082 prog_.add_kernel("inplace_mul_add"); 00083 prog_.add_kernel("norm_1"); 00084 prog_.add_kernel("divide"); 00085 prog_.add_kernel("swap"); 00086 prog_.add_kernel("norm_inf"); 00087 prog_.add_kernel("inplace_div_add"); 00088 prog_.add_kernel("sqrt_sum"); 00089 prog_.add_kernel("inplace_add"); 00090 prog_.add_kernel("mul_sub"); 00091 prog_.add_kernel("sum"); 00092 prog_.add_kernel("cpu_mul_add"); 00093 prog_.add_kernel("cpu_mult"); 00094 prog_.add_kernel("cpu_inplace_mul_add"); 00095 prog_.add_kernel("cpu_inplace_mult"); 00096 prog_.add_kernel("plane_rotation"); 00097 prog_.add_kernel("clear"); 00098 prog_.add_kernel("inplace_mult"); 00099 prog_.add_kernel("inplace_mul_sub"); 00100 prog_.add_kernel("norm_2"); 00101 prog_.add_kernel("add"); 00102 init_done[context_.handle()] = true; 00103 } //if 00104 } //init 00105 }; // struct 00106 00107 template <> 00108 struct vector<float, 1> 00109 { 00110 static std::string program_name() 00111 { 00112 return "f_vector_1"; 00113 } 00114 static void init() 00115 { 00116 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply(); 00117 static std::map<cl_context, bool> init_done; 00118 viennacl::ocl::context & context_ = viennacl::ocl::current_context(); 00119 if (!init_done[context_.handle()]) 00120 { 00121 std::string source; 00122 source.append(vector_align1_inplace_divide); 00123 source.append(vector_align1_inplace_div_sub); 00124 source.append(vector_align1_vmax); 00125 source.append(vector_align1_index_norm_inf); 00126 source.append(vector_align1_sub); 00127 source.append(vector_align1_mul_add); 00128 source.append(vector_align1_inplace_sub); 00129 source.append(vector_align1_inner_prod); 00130 source.append(vector_align1_mult); 00131 source.append(vector_align1_diag_precond); 00132 source.append(vector_align1_inplace_mul_add); 00133 source.append(vector_align1_norm_1); 00134 source.append(vector_align1_divide); 00135 source.append(vector_align1_swap); 00136 source.append(vector_align1_norm_inf); 00137 source.append(vector_align1_inplace_div_add); 00138 source.append(vector_align1_sqrt_sum); 00139 source.append(vector_align1_inplace_add); 00140 source.append(vector_align1_mul_sub); 00141 source.append(vector_align1_sum); 00142 source.append(vector_align1_cpu_mul_add); 00143 source.append(vector_align1_cpu_mult); 00144 source.append(vector_align1_cpu_inplace_mul_add); 00145 source.append(vector_align1_cpu_inplace_mult); 00146 source.append(vector_align1_plane_rotation); 00147 source.append(vector_align1_clear); 00148 source.append(vector_align1_inplace_mult); 00149 source.append(vector_align1_inplace_mul_sub); 00150 source.append(vector_align1_norm_2); 00151 source.append(vector_align1_add); 00152 std::string prog_name = program_name(); 00153 #ifdef VIENNACL_BUILD_INFO 00154 std::cout << "Creating program " << prog_name << std::endl; 00155 #endif 00156 context_.add_program(source, prog_name); 00157 viennacl::ocl::program & prog_ = context_.get_program(prog_name); 00158 prog_.add_kernel("inplace_divide"); 00159 prog_.add_kernel("inplace_div_sub"); 00160 prog_.add_kernel("vmax"); 00161 prog_.add_kernel("index_norm_inf"); 00162 prog_.add_kernel("sub"); 00163 prog_.add_kernel("mul_add"); 00164 prog_.add_kernel("inplace_sub"); 00165 prog_.add_kernel("inner_prod"); 00166 prog_.add_kernel("mult"); 00167 prog_.add_kernel("diag_precond"); 00168 prog_.add_kernel("inplace_mul_add"); 00169 prog_.add_kernel("norm_1"); 00170 prog_.add_kernel("divide"); 00171 prog_.add_kernel("swap"); 00172 prog_.add_kernel("norm_inf"); 00173 prog_.add_kernel("inplace_div_add"); 00174 prog_.add_kernel("sqrt_sum"); 00175 prog_.add_kernel("inplace_add"); 00176 prog_.add_kernel("mul_sub"); 00177 prog_.add_kernel("sum"); 00178 prog_.add_kernel("cpu_mul_add"); 00179 prog_.add_kernel("cpu_mult"); 00180 prog_.add_kernel("cpu_inplace_mul_add"); 00181 prog_.add_kernel("cpu_inplace_mult"); 00182 prog_.add_kernel("plane_rotation"); 00183 prog_.add_kernel("clear"); 00184 prog_.add_kernel("inplace_mult"); 00185 prog_.add_kernel("inplace_mul_sub"); 00186 prog_.add_kernel("norm_2"); 00187 prog_.add_kernel("add"); 00188 init_done[context_.handle()] = true; 00189 } //if 00190 } //init 00191 }; // struct 00192 00193 template <> 00194 struct vector<float, 16> 00195 { 00196 static std::string program_name() 00197 { 00198 return "f_vector_16"; 00199 } 00200 static void init() 00201 { 00202 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply(); 00203 static std::map<cl_context, bool> init_done; 00204 viennacl::ocl::context & context_ = viennacl::ocl::current_context(); 00205 if (!init_done[context_.handle()]) 00206 { 00207 std::string source; 00208 source.append(vector_align16_inplace_divide); 00209 source.append(vector_align4_inplace_div_sub); 00210 source.append(vector_align1_vmax); 00211 source.append(vector_align1_index_norm_inf); 00212 source.append(vector_align16_sub); 00213 source.append(vector_align4_mul_add); 00214 source.append(vector_align16_inplace_sub); 00215 source.append(vector_align1_inner_prod); 00216 source.append(vector_align16_mult); 00217 source.append(vector_align1_diag_precond); 00218 source.append(vector_align4_inplace_mul_add); 00219 source.append(vector_align1_norm_1); 00220 source.append(vector_align16_divide); 00221 source.append(vector_align1_swap); 00222 source.append(vector_align1_norm_inf); 00223 source.append(vector_align4_inplace_div_add); 00224 source.append(vector_align1_sqrt_sum); 00225 source.append(vector_align16_inplace_add); 00226 source.append(vector_align1_mul_sub); 00227 source.append(vector_align1_sum); 00228 source.append(vector_align4_cpu_mul_add); 00229 source.append(vector_align16_cpu_mult); 00230 source.append(vector_align4_cpu_inplace_mul_add); 00231 source.append(vector_align1_cpu_inplace_mult); 00232 source.append(vector_align1_plane_rotation); 00233 source.append(vector_align1_clear); 00234 source.append(vector_align16_inplace_mult); 00235 source.append(vector_align4_inplace_mul_sub); 00236 source.append(vector_align1_norm_2); 00237 source.append(vector_align16_add); 00238 std::string prog_name = program_name(); 00239 #ifdef VIENNACL_BUILD_INFO 00240 std::cout << "Creating program " << prog_name << std::endl; 00241 #endif 00242 context_.add_program(source, prog_name); 00243 viennacl::ocl::program & prog_ = context_.get_program(prog_name); 00244 prog_.add_kernel("inplace_divide"); 00245 prog_.add_kernel("inplace_div_sub"); 00246 prog_.add_kernel("vmax"); 00247 prog_.add_kernel("index_norm_inf"); 00248 prog_.add_kernel("sub"); 00249 prog_.add_kernel("mul_add"); 00250 prog_.add_kernel("inplace_sub"); 00251 prog_.add_kernel("inner_prod"); 00252 prog_.add_kernel("mult"); 00253 prog_.add_kernel("diag_precond"); 00254 prog_.add_kernel("inplace_mul_add"); 00255 prog_.add_kernel("norm_1"); 00256 prog_.add_kernel("divide"); 00257 prog_.add_kernel("swap"); 00258 prog_.add_kernel("norm_inf"); 00259 prog_.add_kernel("inplace_div_add"); 00260 prog_.add_kernel("sqrt_sum"); 00261 prog_.add_kernel("inplace_add"); 00262 prog_.add_kernel("mul_sub"); 00263 prog_.add_kernel("sum"); 00264 prog_.add_kernel("cpu_mul_add"); 00265 prog_.add_kernel("cpu_mult"); 00266 prog_.add_kernel("cpu_inplace_mul_add"); 00267 prog_.add_kernel("cpu_inplace_mult"); 00268 prog_.add_kernel("plane_rotation"); 00269 prog_.add_kernel("clear"); 00270 prog_.add_kernel("inplace_mult"); 00271 prog_.add_kernel("inplace_mul_sub"); 00272 prog_.add_kernel("norm_2"); 00273 prog_.add_kernel("add"); 00274 init_done[context_.handle()] = true; 00275 } //if 00276 } //init 00277 }; // struct 00278 00279 00280 00282 template <> 00283 struct vector<double, 4> 00284 { 00285 static std::string program_name() 00286 { 00287 return "d_vector_4"; 00288 } 00289 static void init() 00290 { 00291 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply(); 00292 static std::map<cl_context, bool> init_done; 00293 viennacl::ocl::context & context_ = viennacl::ocl::current_context(); 00294 if (!init_done[context_.handle()]) 00295 { 00296 std::string source; 00297 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension(); 00298 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_divide, fp64_ext)); 00299 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_sub, fp64_ext)); 00300 source.append(viennacl::tools::make_double_kernel(vector_align1_vmax, fp64_ext)); 00301 source.append(viennacl::tools::make_double_kernel(vector_align1_index_norm_inf, fp64_ext)); 00302 source.append(viennacl::tools::make_double_kernel(vector_align1_sub, fp64_ext)); 00303 source.append(viennacl::tools::make_double_kernel(vector_align4_mul_add, fp64_ext)); 00304 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_sub, fp64_ext)); 00305 source.append(viennacl::tools::make_double_kernel(vector_align1_inner_prod, fp64_ext)); 00306 source.append(viennacl::tools::make_double_kernel(vector_align1_mult, fp64_ext)); 00307 source.append(viennacl::tools::make_double_kernel(vector_align1_diag_precond, fp64_ext)); 00308 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_add, fp64_ext)); 00309 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_1, fp64_ext)); 00310 source.append(viennacl::tools::make_double_kernel(vector_align1_divide, fp64_ext)); 00311 source.append(viennacl::tools::make_double_kernel(vector_align1_swap, fp64_ext)); 00312 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_inf, fp64_ext)); 00313 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_add, fp64_ext)); 00314 source.append(viennacl::tools::make_double_kernel(vector_align1_sqrt_sum, fp64_ext)); 00315 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_add, fp64_ext)); 00316 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_sub, fp64_ext)); 00317 source.append(viennacl::tools::make_double_kernel(vector_align1_sum, fp64_ext)); 00318 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_mul_add, fp64_ext)); 00319 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_mult, fp64_ext)); 00320 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_inplace_mul_add, fp64_ext)); 00321 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mult, fp64_ext)); 00322 source.append(viennacl::tools::make_double_kernel(vector_align1_plane_rotation, fp64_ext)); 00323 source.append(viennacl::tools::make_double_kernel(vector_align1_clear, fp64_ext)); 00324 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mult, fp64_ext)); 00325 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_sub, fp64_ext)); 00326 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_2, fp64_ext)); 00327 source.append(viennacl::tools::make_double_kernel(vector_align1_add, fp64_ext)); 00328 std::string prog_name = program_name(); 00329 #ifdef VIENNACL_BUILD_INFO 00330 std::cout << "Creating program " << prog_name << std::endl; 00331 #endif 00332 context_.add_program(source, prog_name); 00333 viennacl::ocl::program & prog_ = context_.get_program(prog_name); 00334 prog_.add_kernel("inplace_divide"); 00335 prog_.add_kernel("inplace_div_sub"); 00336 prog_.add_kernel("vmax"); 00337 prog_.add_kernel("index_norm_inf"); 00338 prog_.add_kernel("sub"); 00339 prog_.add_kernel("mul_add"); 00340 prog_.add_kernel("inplace_sub"); 00341 prog_.add_kernel("inner_prod"); 00342 prog_.add_kernel("mult"); 00343 prog_.add_kernel("diag_precond"); 00344 prog_.add_kernel("inplace_mul_add"); 00345 prog_.add_kernel("norm_1"); 00346 prog_.add_kernel("divide"); 00347 prog_.add_kernel("swap"); 00348 prog_.add_kernel("norm_inf"); 00349 prog_.add_kernel("inplace_div_add"); 00350 prog_.add_kernel("sqrt_sum"); 00351 prog_.add_kernel("inplace_add"); 00352 prog_.add_kernel("mul_sub"); 00353 prog_.add_kernel("sum"); 00354 prog_.add_kernel("cpu_mul_add"); 00355 prog_.add_kernel("cpu_mult"); 00356 prog_.add_kernel("cpu_inplace_mul_add"); 00357 prog_.add_kernel("cpu_inplace_mult"); 00358 prog_.add_kernel("plane_rotation"); 00359 prog_.add_kernel("clear"); 00360 prog_.add_kernel("inplace_mult"); 00361 prog_.add_kernel("inplace_mul_sub"); 00362 prog_.add_kernel("norm_2"); 00363 prog_.add_kernel("add"); 00364 init_done[context_.handle()] = true; 00365 } //if 00366 } //init 00367 }; // struct 00368 00369 template <> 00370 struct vector<double, 1> 00371 { 00372 static std::string program_name() 00373 { 00374 return "d_vector_1"; 00375 } 00376 static void init() 00377 { 00378 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply(); 00379 static std::map<cl_context, bool> init_done; 00380 viennacl::ocl::context & context_ = viennacl::ocl::current_context(); 00381 if (!init_done[context_.handle()]) 00382 { 00383 std::string source; 00384 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension(); 00385 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_divide, fp64_ext)); 00386 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_div_sub, fp64_ext)); 00387 source.append(viennacl::tools::make_double_kernel(vector_align1_vmax, fp64_ext)); 00388 source.append(viennacl::tools::make_double_kernel(vector_align1_index_norm_inf, fp64_ext)); 00389 source.append(viennacl::tools::make_double_kernel(vector_align1_sub, fp64_ext)); 00390 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_add, fp64_ext)); 00391 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_sub, fp64_ext)); 00392 source.append(viennacl::tools::make_double_kernel(vector_align1_inner_prod, fp64_ext)); 00393 source.append(viennacl::tools::make_double_kernel(vector_align1_mult, fp64_ext)); 00394 source.append(viennacl::tools::make_double_kernel(vector_align1_diag_precond, fp64_ext)); 00395 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mul_add, fp64_ext)); 00396 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_1, fp64_ext)); 00397 source.append(viennacl::tools::make_double_kernel(vector_align1_divide, fp64_ext)); 00398 source.append(viennacl::tools::make_double_kernel(vector_align1_swap, fp64_ext)); 00399 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_inf, fp64_ext)); 00400 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_div_add, fp64_ext)); 00401 source.append(viennacl::tools::make_double_kernel(vector_align1_sqrt_sum, fp64_ext)); 00402 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_add, fp64_ext)); 00403 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_sub, fp64_ext)); 00404 source.append(viennacl::tools::make_double_kernel(vector_align1_sum, fp64_ext)); 00405 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_mul_add, fp64_ext)); 00406 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_mult, fp64_ext)); 00407 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mul_add, fp64_ext)); 00408 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mult, fp64_ext)); 00409 source.append(viennacl::tools::make_double_kernel(vector_align1_plane_rotation, fp64_ext)); 00410 source.append(viennacl::tools::make_double_kernel(vector_align1_clear, fp64_ext)); 00411 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mult, fp64_ext)); 00412 source.append(viennacl::tools::make_double_kernel(vector_align1_inplace_mul_sub, fp64_ext)); 00413 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_2, fp64_ext)); 00414 source.append(viennacl::tools::make_double_kernel(vector_align1_add, fp64_ext)); 00415 std::string prog_name = program_name(); 00416 #ifdef VIENNACL_BUILD_INFO 00417 std::cout << "Creating program " << prog_name << std::endl; 00418 #endif 00419 context_.add_program(source, prog_name); 00420 viennacl::ocl::program & prog_ = context_.get_program(prog_name); 00421 prog_.add_kernel("inplace_divide"); 00422 prog_.add_kernel("inplace_div_sub"); 00423 prog_.add_kernel("vmax"); 00424 prog_.add_kernel("index_norm_inf"); 00425 prog_.add_kernel("sub"); 00426 prog_.add_kernel("mul_add"); 00427 prog_.add_kernel("inplace_sub"); 00428 prog_.add_kernel("inner_prod"); 00429 prog_.add_kernel("mult"); 00430 prog_.add_kernel("diag_precond"); 00431 prog_.add_kernel("inplace_mul_add"); 00432 prog_.add_kernel("norm_1"); 00433 prog_.add_kernel("divide"); 00434 prog_.add_kernel("swap"); 00435 prog_.add_kernel("norm_inf"); 00436 prog_.add_kernel("inplace_div_add"); 00437 prog_.add_kernel("sqrt_sum"); 00438 prog_.add_kernel("inplace_add"); 00439 prog_.add_kernel("mul_sub"); 00440 prog_.add_kernel("sum"); 00441 prog_.add_kernel("cpu_mul_add"); 00442 prog_.add_kernel("cpu_mult"); 00443 prog_.add_kernel("cpu_inplace_mul_add"); 00444 prog_.add_kernel("cpu_inplace_mult"); 00445 prog_.add_kernel("plane_rotation"); 00446 prog_.add_kernel("clear"); 00447 prog_.add_kernel("inplace_mult"); 00448 prog_.add_kernel("inplace_mul_sub"); 00449 prog_.add_kernel("norm_2"); 00450 prog_.add_kernel("add"); 00451 init_done[context_.handle()] = true; 00452 } //if 00453 } //init 00454 }; // struct 00455 00456 template <> 00457 struct vector<double, 16> 00458 { 00459 static std::string program_name() 00460 { 00461 return "d_vector_16"; 00462 } 00463 static void init() 00464 { 00465 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply(); 00466 static std::map<cl_context, bool> init_done; 00467 viennacl::ocl::context & context_ = viennacl::ocl::current_context(); 00468 if (!init_done[context_.handle()]) 00469 { 00470 std::string source; 00471 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension(); 00472 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_divide, fp64_ext)); 00473 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_sub, fp64_ext)); 00474 source.append(viennacl::tools::make_double_kernel(vector_align1_vmax, fp64_ext)); 00475 source.append(viennacl::tools::make_double_kernel(vector_align1_index_norm_inf, fp64_ext)); 00476 source.append(viennacl::tools::make_double_kernel(vector_align16_sub, fp64_ext)); 00477 source.append(viennacl::tools::make_double_kernel(vector_align4_mul_add, fp64_ext)); 00478 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_sub, fp64_ext)); 00479 source.append(viennacl::tools::make_double_kernel(vector_align1_inner_prod, fp64_ext)); 00480 source.append(viennacl::tools::make_double_kernel(vector_align16_mult, fp64_ext)); 00481 source.append(viennacl::tools::make_double_kernel(vector_align1_diag_precond, fp64_ext)); 00482 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_add, fp64_ext)); 00483 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_1, fp64_ext)); 00484 source.append(viennacl::tools::make_double_kernel(vector_align16_divide, fp64_ext)); 00485 source.append(viennacl::tools::make_double_kernel(vector_align1_swap, fp64_ext)); 00486 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_inf, fp64_ext)); 00487 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_div_add, fp64_ext)); 00488 source.append(viennacl::tools::make_double_kernel(vector_align1_sqrt_sum, fp64_ext)); 00489 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_add, fp64_ext)); 00490 source.append(viennacl::tools::make_double_kernel(vector_align1_mul_sub, fp64_ext)); 00491 source.append(viennacl::tools::make_double_kernel(vector_align1_sum, fp64_ext)); 00492 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_mul_add, fp64_ext)); 00493 source.append(viennacl::tools::make_double_kernel(vector_align16_cpu_mult, fp64_ext)); 00494 source.append(viennacl::tools::make_double_kernel(vector_align4_cpu_inplace_mul_add, fp64_ext)); 00495 source.append(viennacl::tools::make_double_kernel(vector_align1_cpu_inplace_mult, fp64_ext)); 00496 source.append(viennacl::tools::make_double_kernel(vector_align1_plane_rotation, fp64_ext)); 00497 source.append(viennacl::tools::make_double_kernel(vector_align1_clear, fp64_ext)); 00498 source.append(viennacl::tools::make_double_kernel(vector_align16_inplace_mult, fp64_ext)); 00499 source.append(viennacl::tools::make_double_kernel(vector_align4_inplace_mul_sub, fp64_ext)); 00500 source.append(viennacl::tools::make_double_kernel(vector_align1_norm_2, fp64_ext)); 00501 source.append(viennacl::tools::make_double_kernel(vector_align16_add, fp64_ext)); 00502 std::string prog_name = program_name(); 00503 #ifdef VIENNACL_BUILD_INFO 00504 std::cout << "Creating program " << prog_name << std::endl; 00505 #endif 00506 context_.add_program(source, prog_name); 00507 viennacl::ocl::program & prog_ = context_.get_program(prog_name); 00508 prog_.add_kernel("inplace_divide"); 00509 prog_.add_kernel("inplace_div_sub"); 00510 prog_.add_kernel("vmax"); 00511 prog_.add_kernel("index_norm_inf"); 00512 prog_.add_kernel("sub"); 00513 prog_.add_kernel("mul_add"); 00514 prog_.add_kernel("inplace_sub"); 00515 prog_.add_kernel("inner_prod"); 00516 prog_.add_kernel("mult"); 00517 prog_.add_kernel("diag_precond"); 00518 prog_.add_kernel("inplace_mul_add"); 00519 prog_.add_kernel("norm_1"); 00520 prog_.add_kernel("divide"); 00521 prog_.add_kernel("swap"); 00522 prog_.add_kernel("norm_inf"); 00523 prog_.add_kernel("inplace_div_add"); 00524 prog_.add_kernel("sqrt_sum"); 00525 prog_.add_kernel("inplace_add"); 00526 prog_.add_kernel("mul_sub"); 00527 prog_.add_kernel("sum"); 00528 prog_.add_kernel("cpu_mul_add"); 00529 prog_.add_kernel("cpu_mult"); 00530 prog_.add_kernel("cpu_inplace_mul_add"); 00531 prog_.add_kernel("cpu_inplace_mult"); 00532 prog_.add_kernel("plane_rotation"); 00533 prog_.add_kernel("clear"); 00534 prog_.add_kernel("inplace_mult"); 00535 prog_.add_kernel("inplace_mul_sub"); 00536 prog_.add_kernel("norm_2"); 00537 prog_.add_kernel("add"); 00538 init_done[context_.handle()] = true; 00539 } //if 00540 } //init 00541 }; // struct 00542 00543 00544 } //namespace kernels 00545 } //namespace linalg 00546 } //namespace viennacl 00547 #endif
1.7.6.1