|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_MATRIX_OPERATIONS_HPP_ 00016 #define _VIENNACL_MATRIX_OPERATIONS_HPP_ 00017 00022 #include "viennacl/forwards.h" 00023 #include "viennacl/ocl/device.hpp" 00024 #include "viennacl/ocl/handle.hpp" 00025 #include "viennacl/ocl/kernel.hpp" 00026 #include "viennacl/scalar.hpp" 00027 #include "viennacl/vector.hpp" 00028 #include "viennacl/tools/tools.hpp" 00029 #include "viennacl/tools/matrix_kernel_class_deducer.hpp" 00030 #include "viennacl/tools/matrix_prod_kernel_class_deducer.hpp" 00031 #include "viennacl/linalg/kernels/vector_kernels.h" 00032 #include "viennacl/linalg/kernels/matrix_row_kernels.h" 00033 #include "viennacl/linalg/kernels/matrix_col_kernels.h" 00034 00035 #include "viennacl/linalg/kernels/matrix_prod_col_col_col_kernels.h" 00036 #include "viennacl/linalg/kernels/matrix_prod_col_col_row_kernels.h" 00037 #include "viennacl/linalg/kernels/matrix_prod_col_row_col_kernels.h" 00038 #include "viennacl/linalg/kernels/matrix_prod_col_row_row_kernels.h" 00039 00040 #include "viennacl/linalg/kernels/matrix_prod_row_col_col_kernels.h" 00041 #include "viennacl/linalg/kernels/matrix_prod_row_col_row_kernels.h" 00042 #include "viennacl/linalg/kernels/matrix_prod_row_row_col_kernels.h" 00043 #include "viennacl/linalg/kernels/matrix_prod_row_row_row_kernels.h" 00044 00045 namespace viennacl 00046 { 00047 namespace linalg 00048 { 00049 00058 template<class TYPE, typename F, unsigned int ALIGNMENT> 00059 void add(const viennacl::matrix<TYPE, F, ALIGNMENT> & mat1, 00060 const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2, 00061 viennacl::matrix<TYPE, F, ALIGNMENT> & result) 00062 { 00063 assert(result.size1() == mat1.size1()); 00064 assert(result.size2() == mat1.size2()); 00065 assert(result.size1() == mat2.size1()); 00066 assert(result.size2() == mat2.size2()); 00067 00068 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass; 00069 00070 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "add"); 00071 assert( (mat1.internal_size() == mat2.internal_size()) 00072 && "Operands must have same dimension and memory layout in this version of ViennaCL!"); 00073 unsigned int size = std::min(mat1.internal_size(), mat2.internal_size()); 00074 00075 viennacl::ocl::enqueue(k(mat1, mat2, result, size)); 00076 } 00077 00086 template<class TYPE, typename F, unsigned int ALIGNMENT> 00087 void inplace_add(viennacl::matrix<TYPE, F, ALIGNMENT> & result, 00088 const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2) 00089 { 00090 assert(result.size1() == mat2.size1()); 00091 assert(result.size2() == mat2.size2()); 00092 00093 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass; 00094 00095 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_add"); 00096 assert( (result.internal_size() == mat2.internal_size()) 00097 && "Operands must have same dimension and memory layout in this version of ViennaCL!"); 00098 unsigned int size = std::min(result.internal_size(), mat2.internal_size()); 00099 00100 viennacl::ocl::enqueue(k(result, mat2, size)); 00101 } 00102 00103 00112 template<class TYPE, typename F, unsigned int ALIGNMENT> 00113 void sub(const viennacl::matrix<TYPE, F, ALIGNMENT> & mat1, 00114 const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2, 00115 viennacl::matrix<TYPE, F, ALIGNMENT> & result) 00116 { 00117 assert(result.size1() == mat1.size1()); 00118 assert(result.size2() == mat1.size2()); 00119 assert(result.size1() == mat2.size1()); 00120 assert(result.size2() == mat2.size2()); 00121 00122 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass; 00123 00124 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "sub"); 00125 assert( (mat1.internal_size() == mat2.internal_size()) 00126 && "Operands must have same dimension and memory layout in this version of ViennaCL!"); 00127 unsigned int size = std::min(mat1.internal_size(), mat2.internal_size()); 00128 00129 viennacl::ocl::enqueue(k(mat1, mat2, result, size)); 00130 } 00131 00140 template<class TYPE, typename F, unsigned int ALIGNMENT> 00141 void inplace_sub(viennacl::matrix<TYPE, F, ALIGNMENT> & result, 00142 const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2) 00143 { 00144 assert(result.size1() == mat2.size1()); 00145 assert(result.size2() == mat2.size2()); 00146 00147 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass; 00148 00149 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_sub"); 00150 assert( (result.internal_size() == mat2.internal_size()) 00151 && "Operands must have same dimension and memory layout in this version of ViennaCL!"); 00152 unsigned int size = std::min(result.internal_size(), mat2.internal_size()); 00153 00154 viennacl::ocl::enqueue(k(result, mat2, size)); 00155 } 00156 00165 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00166 void inplace_mult(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & result, 00167 SCALARTYPE val) 00168 { 00169 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00170 00171 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "cpu_inplace_mult"); 00172 unsigned int size = result.internal_size(); 00173 00174 viennacl::ocl::enqueue(k(result, val, size)); 00175 } 00176 00177 00186 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00187 void inplace_mult(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & result, 00188 viennacl::scalar<SCALARTYPE> const & val) 00189 { 00190 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00191 00192 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_mult"); 00193 unsigned int size = result.internal_size(); 00194 00195 viennacl::ocl::enqueue(k(result, val, size)); 00196 } 00197 00198 00199 00208 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00209 void inplace_divide(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & result, 00210 viennacl::scalar<SCALARTYPE> const & val) 00211 { 00212 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00213 00214 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_divide"); 00215 unsigned int size = result.internal_size(); 00216 00217 viennacl::ocl::enqueue(k(result, val, size)); 00218 } 00219 00220 // A * x 00228 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00229 viennacl::vector_expression<const viennacl::matrix<SCALARTYPE, F, ALIGNMENT>, 00230 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00231 op_prod > prod_impl(const viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & mat, 00232 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec) 00233 { 00234 return viennacl::vector_expression<const viennacl::matrix<SCALARTYPE, F, ALIGNMENT>, 00235 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00236 op_prod >(mat, vec); 00237 } 00238 00247 template<class TYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00248 void prod_impl(const viennacl::matrix<TYPE, F, ALIGNMENT> & mat, 00249 const viennacl::vector<TYPE, VECTOR_ALIGNMENT> & vec, 00250 viennacl::vector<TYPE, VECTOR_ALIGNMENT> & result) 00251 { 00252 assert(mat.size2() == vec.size()); 00253 // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead 00254 assert(vec.handle() != result.handle() && "No direct inplace matrix-vector product possible. Introduce a temporary!"); 00255 result.resize(mat.size1()); 00256 00257 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass; 00258 00259 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "vec_mul"); 00260 viennacl::ocl::enqueue( 00261 k(mat, mat.size1(), mat.size2(), mat.internal_size1(), mat.internal_size2(), vec, result)); 00262 } 00263 00264 00265 00266 // trans(A) * x 00274 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00275 viennacl::vector_expression<const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00276 const matrix<SCALARTYPE, F, ALIGNMENT>, 00277 op_trans>, 00278 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00279 op_prod > prod_impl(const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00280 const matrix<SCALARTYPE, F, ALIGNMENT>, 00281 op_trans> & proxy, 00282 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec) 00283 { 00284 return viennacl::vector_expression<const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00285 const matrix<SCALARTYPE, F, ALIGNMENT>, 00286 op_trans>, 00287 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00288 op_prod >(proxy, vec); 00289 } 00290 00293 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00294 void prod_impl(const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00295 const matrix<SCALARTYPE, F, ALIGNMENT>, 00296 op_trans> & mat, 00297 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec, 00298 viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & result) 00299 { 00300 trans_prod_impl(mat.lhs(), vec, result); 00301 } 00302 00311 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00312 void trans_prod_impl(const matrix<SCALARTYPE, F, ALIGNMENT> & mat, 00313 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec, 00314 viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & result) 00315 { 00316 assert(mat.size1() == vec.size()); //remember: mat is transposed! 00317 // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead 00318 assert(vec.handle() != result.handle() && "No direct inplace matrix-vector product possible. Introduce a temporary!"); 00319 result.resize(mat.size2()); 00320 00321 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00322 00323 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "trans_vec_mul"); 00324 00325 viennacl::ocl::enqueue(k(mat, mat.size1(), mat.size2(), mat.internal_size1(), mat.internal_size2(), vec, result)); 00326 } 00327 00328 00329 00335 template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT> 00336 void prod_impl(const viennacl::matrix<TYPE, F1, ALIGNMENT> & A, 00337 const viennacl::matrix<TYPE, F2, ALIGNMENT> & B, 00338 viennacl::matrix<TYPE, F3, ALIGNMENT> & C, 00339 int block_size = 15) // [JW] added ability to set block size from outside .. 00340 { 00341 assert(A.size1() == C.size1()); 00342 assert(A.size2() == B.size1()); 00343 assert(B.size2() == C.size2()); 00344 // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead 00345 assert(C.handle() != A.handle() 00346 && C.handle() != B.handle() 00347 && "No direct inplace matrix-matrix product possible. Introduce a temporary!"); 00348 00349 typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>, 00350 viennacl::matrix<TYPE, F2, ALIGNMENT>, 00351 viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass; 00352 KernelClass::init(); 00353 00354 //std::cout << "KernelClass::program_name() : " << KernelClass::program_name() << std::endl; 00355 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AA"); 00356 00357 /*k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1() / 2, block_size / 2)); 00358 k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2() / 2, block_size / 2)); 00359 k.local_work_size(0, block_size / 2); 00360 k.local_work_size(1, block_size / 2);*/ 00361 00362 k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size)); 00363 k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size)); 00364 k.local_work_size(0, block_size); 00365 k.local_work_size(1, block_size); 00366 00367 viennacl::ocl::enqueue( 00368 k(A, A.size1(), A.size2(), A.internal_size1(), A.internal_size2(), 00369 B, B.size1(), B.size2(), B.internal_size1(), B.internal_size2(), 00370 C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(), 00371 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size), 00372 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )); 00373 } 00374 00375 00381 template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT> 00382 void prod_impl(const viennacl::matrix_expression< const matrix<TYPE, F1, ALIGNMENT>, 00383 const matrix<TYPE, F1, ALIGNMENT>, 00384 op_trans> & A, 00385 const viennacl::matrix<TYPE, F2, ALIGNMENT> & B, 00386 viennacl::matrix<TYPE, F3, ALIGNMENT> & C) 00387 { 00388 assert(A.size2() == C.size1()); 00389 assert(A.size1() == B.size1()); 00390 assert(B.size2() == C.size2()); 00391 // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead 00392 assert(C.handle() != A.lhs().handle() 00393 && C.handle() != B.handle() 00394 && "No direct inplace matrix-matrix product possible. Introduce a temporary!"); 00395 00396 int block_size = 15; 00397 00398 typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>, 00399 viennacl::matrix<TYPE, F2, ALIGNMENT>, 00400 viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass; 00401 KernelClass::init(); 00402 00403 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TA"); 00404 00405 k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size)); 00406 k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size)); 00407 00408 k.local_work_size(0, block_size); 00409 k.local_work_size(1, block_size); 00410 viennacl::ocl::enqueue( 00411 k(A.lhs(), A.lhs().size1(), A.lhs().size2(), A.lhs().internal_size1(), A.lhs().internal_size2(), 00412 B, B.size1(), B.size2(), B.internal_size1(), B.internal_size2(), 00413 C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(), 00414 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size), 00415 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )); 00416 } 00417 00418 00424 template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT> 00425 void prod_impl(const viennacl::matrix<TYPE, F1, ALIGNMENT> & A, 00426 const viennacl::matrix_expression< const matrix<TYPE, F2, ALIGNMENT>, 00427 const matrix<TYPE, F2, ALIGNMENT>, 00428 op_trans> & B, 00429 viennacl::matrix<TYPE, F3, ALIGNMENT> & C) 00430 { 00431 assert(A.size1() == C.size1()); 00432 assert(A.size2() == B.size2()); 00433 assert(B.size1() == C.size2()); 00434 // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead 00435 assert(C.handle() != A.handle() 00436 && C.handle() != B.lhs().handle() 00437 && "No direct inplace matrix-matrix product possible. Introduce a temporary!"); 00438 00439 int block_size = 15; 00440 00441 typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>, 00442 viennacl::matrix<TYPE, F2, ALIGNMENT>, 00443 viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass; 00444 KernelClass::init(); 00445 00446 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AT"); 00447 00448 k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size)); 00449 k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size)); 00450 00451 k.local_work_size(0, block_size); 00452 k.local_work_size(1, block_size); 00453 viennacl::ocl::enqueue( 00454 k(A, A.size1(), A.size2(), A.internal_size1(), A.internal_size2(), 00455 B.lhs(), B.lhs().size1(), B.lhs().size2(), B.lhs().internal_size1(), B.lhs().internal_size2(), 00456 C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(), 00457 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size), 00458 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )); 00459 } 00460 00461 00462 00468 template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT> 00469 void prod_impl(const viennacl::matrix_expression< const matrix<TYPE, F1, ALIGNMENT>, 00470 const matrix<TYPE, F1, ALIGNMENT>, 00471 op_trans> & A, 00472 const viennacl::matrix_expression< const matrix<TYPE, F2, ALIGNMENT>, 00473 const matrix<TYPE, F2, ALIGNMENT>, 00474 op_trans> & B, 00475 viennacl::matrix<TYPE, F3, ALIGNMENT> & C) 00476 { 00477 assert(A.size2() == C.size1()); 00478 assert(A.size1() == B.size2()); 00479 assert(B.size1() == C.size2()); 00480 // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead 00481 assert(C.handle() != A.lhs().handle() 00482 && C.handle() != B.lhs().handle() 00483 && "No direct inplace matrix-matrix product possible. Introduce a temporary!"); 00484 00485 int block_size = 15; 00486 00487 typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>, 00488 viennacl::matrix<TYPE, F2, ALIGNMENT>, 00489 viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass; 00490 KernelClass::init(); 00491 00492 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TT"); 00493 00494 k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size)); 00495 k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size)); 00496 00497 k.local_work_size(0, block_size); 00498 k.local_work_size(1, block_size); 00499 viennacl::ocl::enqueue( 00500 k(A.lhs(), A.lhs().size1(), A.lhs().size2(), A.lhs().internal_size1(), A.lhs().internal_size2(), 00501 B.lhs(), B.lhs().size1(), B.lhs().size2(), B.lhs().internal_size1(), B.lhs().internal_size2(), 00502 C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(), 00503 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size), 00504 viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )); 00505 } 00506 00507 00508 00509 00510 00511 00512 00513 00514 00515 00516 00522 template<class SCALARTYPE, unsigned int VA1, unsigned int VA2> 00523 viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>, 00524 const viennacl::vector<SCALARTYPE, VA2>, 00525 op_prod> outer_prod(const viennacl::vector<SCALARTYPE, VA1> & vec1, 00526 const viennacl::vector<SCALARTYPE, VA2> & vec2) 00527 { 00528 return viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>, 00529 const viennacl::vector<SCALARTYPE, VA2>, 00530 op_prod>(vec1, vec2); 00531 } 00532 00533 00534 00543 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00544 void rank_1_update(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & mat1, 00545 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1, 00546 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2) 00547 { 00548 assert(mat1.size1() == vec1.size()); 00549 assert(mat1.size2() == vec2.size()); 00550 00551 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00552 00553 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "rank1_update"); 00554 00555 viennacl::ocl::enqueue(k(mat1, mat1.size1(), mat1.size2(), mat1.internal_size1(), mat1.internal_size2(), vec1, vec2)); 00556 } 00557 00558 00568 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00569 void scaled_rank_1_update(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & mat1, 00570 SCALARTYPE val, 00571 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1, 00572 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2) 00573 { 00574 assert(mat1.size1() == vec1.size()); 00575 assert(mat1.size2() == vec2.size()); 00576 00577 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00578 00579 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "scaled_rank1_update"); 00580 00581 viennacl::ocl::enqueue(k(mat1, mat1.size1(), mat1.size2(), mat1.internal_size1(), mat1.internal_size2(), 00582 val, vec1, vec2)); 00583 } 00584 00585 } //namespace linalg 00586 00587 00588 //v = A * x 00593 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00594 template <typename F, unsigned int MAT_ALIGNMENT> 00595 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00596 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00597 const viennacl::vector<SCALARTYPE, ALIGNMENT>, 00598 viennacl::op_prod> & proxy) 00599 { 00600 // check for the special case x = A * x 00601 if (proxy.rhs().handle() == this->handle()) 00602 { 00603 viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size()); 00604 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00605 *this = result; 00606 return *this; 00607 } 00608 else 00609 { 00610 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); 00611 return *this; 00612 } 00613 return *this; 00614 } 00615 00616 //v += A * x 00621 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00622 template <typename F, unsigned int MAT_ALIGNMENT> 00623 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00624 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00625 const vector<SCALARTYPE, ALIGNMENT>, 00626 op_prod> & proxy) 00627 { 00628 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00629 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00630 *this += result; 00631 return *this; 00632 } 00633 00638 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00639 template <typename F, unsigned int MAT_ALIGNMENT> 00640 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00641 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00642 const vector<SCALARTYPE, ALIGNMENT>, 00643 op_prod> & proxy) 00644 { 00645 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00646 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00647 *this -= result; 00648 return *this; 00649 } 00650 00651 00652 //free functions: 00657 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00658 template <typename F, unsigned int MAT_ALIGNMENT> 00659 viennacl::vector<SCALARTYPE, ALIGNMENT> 00660 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00661 const vector<SCALARTYPE, ALIGNMENT>, 00662 op_prod> & proxy) 00663 { 00664 assert(proxy.lhs().size1() == size()); 00665 vector<SCALARTYPE, ALIGNMENT> result(size()); 00666 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00667 result += *this; 00668 return result; 00669 } 00670 00675 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00676 template <typename F, unsigned int MAT_ALIGNMENT> 00677 viennacl::vector<SCALARTYPE, ALIGNMENT> 00678 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00679 const vector<SCALARTYPE, ALIGNMENT>, 00680 op_prod> & proxy) 00681 { 00682 assert(proxy.lhs().size1() == size()); 00683 vector<SCALARTYPE, ALIGNMENT> result(size()); 00684 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00685 result = *this - result; 00686 return result; 00687 } 00688 00689 00691 00692 00693 //v = trans(A) * x 00698 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00699 template <typename F, unsigned int MAT_ALIGNMENT> 00700 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00701 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00702 const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00703 op_trans>, 00704 const viennacl::vector<SCALARTYPE, ALIGNMENT>, 00705 viennacl::op_prod> & proxy) 00706 { 00707 // check for the special case x = trans(A) * x 00708 if (proxy.rhs().handle() == this->handle()) 00709 { 00710 viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size()); 00711 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00712 *this = result; 00713 return *this; 00714 } 00715 else 00716 { 00717 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); 00718 return *this; 00719 } 00720 return *this; 00721 } 00722 00723 //v += A * x 00728 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00729 template <typename F, unsigned int MAT_ALIGNMENT> 00730 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00731 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00732 const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00733 op_trans>, 00734 const vector<SCALARTYPE, ALIGNMENT>, 00735 op_prod> & proxy) 00736 { 00737 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00738 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00739 *this += result; 00740 return *this; 00741 } 00742 00747 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00748 template <typename F, unsigned int MAT_ALIGNMENT> 00749 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00750 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00751 const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00752 op_trans>, 00753 const vector<SCALARTYPE, ALIGNMENT>, 00754 op_prod> & proxy) 00755 { 00756 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00757 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00758 *this -= result; 00759 return *this; 00760 } 00761 00762 00763 //free functions: 00768 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00769 template <typename F, unsigned int MAT_ALIGNMENT> 00770 viennacl::vector<SCALARTYPE, ALIGNMENT> 00771 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00772 const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00773 op_trans>, 00774 const vector<SCALARTYPE, ALIGNMENT>, 00775 op_prod> & proxy) 00776 { 00777 assert(proxy.lhs().size1() == size()); 00778 vector<SCALARTYPE, ALIGNMENT> result(size()); 00779 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00780 result += *this; 00781 return result; 00782 } 00783 00788 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00789 template <typename F, unsigned int MAT_ALIGNMENT> 00790 viennacl::vector<SCALARTYPE, ALIGNMENT> 00791 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00792 const matrix<SCALARTYPE, F, MAT_ALIGNMENT>, 00793 op_trans>, 00794 const vector<SCALARTYPE, ALIGNMENT>, 00795 op_prod> & proxy) 00796 { 00797 assert(proxy.lhs().size1() == size()); 00798 vector<SCALARTYPE, ALIGNMENT> result(size()); 00799 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00800 result = *this - result; 00801 return result; 00802 } 00803 00804 00805 } //namespace viennacl 00806 00807 00808 #endif
1.7.6.1