|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_MATRIX_HPP_ 00016 #define _VIENNACL_MATRIX_HPP_ 00017 00022 #include "viennacl/forwards.h" 00023 #include "viennacl/ocl/backend.hpp" 00024 #include "viennacl/scalar.hpp" 00025 #include "viennacl/vector.hpp" 00026 #include "viennacl/linalg/matrix_operations.hpp" 00027 #include "viennacl/tools/tools.hpp" 00028 #include "viennacl/tools/matrix_size_deducer.hpp" 00029 #include "viennacl/tools/matrix_kernel_class_deducer.hpp" 00030 00031 namespace viennacl 00032 { 00034 struct row_major 00035 { 00043 static unsigned int mem_index(unsigned int i, unsigned int j, unsigned int num_rows, unsigned int num_cols) 00044 { 00045 return i * num_cols + j; 00046 } 00047 00048 static unsigned int internal_size1(unsigned int rows, unsigned int alignment) 00049 { 00050 return viennacl::tools::roundUpToNextMultiple<unsigned int>(rows, alignment);; 00051 } 00052 00053 static unsigned int internal_size2(unsigned int cols, unsigned int alignment) 00054 { 00055 return viennacl::tools::roundUpToNextMultiple<unsigned int>(cols, alignment); 00056 } 00057 }; 00058 00059 struct column_major 00060 { 00068 static unsigned int mem_index(unsigned int i, unsigned int j, unsigned int num_rows, unsigned int num_cols) 00069 { 00070 return i + j * num_rows; 00071 } 00072 00073 static unsigned int internal_size1(unsigned int rows, unsigned int alignment) 00074 { 00075 return viennacl::tools::roundUpToNextMultiple<unsigned int>(rows, alignment); 00076 } 00077 00078 static unsigned int internal_size2(unsigned int cols, unsigned int alignment) 00079 { 00080 return viennacl::tools::roundUpToNextMultiple<unsigned int>(cols, alignment); 00081 } 00082 }; 00083 00084 00085 template <typename LHS, typename RHS, typename OP> 00086 class matrix_expression 00087 { 00088 public: 00090 //*/ 00091 //typedef typename viennacl::tools::VECTOR_EXTRACTOR<LHS, RHS>::ResultType VectorType; 00092 00093 matrix_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {} 00094 00097 LHS & lhs() const { return _lhs; } 00100 RHS & rhs() const { return _rhs; } 00101 00103 unsigned int size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(_lhs, _rhs); } 00104 unsigned int size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(_lhs, _rhs); } 00105 00106 private: 00108 LHS & _lhs; 00110 RHS & _rhs; 00111 }; 00112 00113 00115 struct row_iteration {}; 00116 00118 struct col_iteration {}; 00119 00120 //STL-like iterator. TODO: STL-compliance... 00121 template <typename ROWCOL, typename MATRIXTYPE> 00122 class matrix_iterator 00123 { 00124 typedef matrix_iterator<ROWCOL, MATRIXTYPE> self_type; 00125 public: 00126 typedef typename MATRIXTYPE::value_type value_type; 00127 00128 matrix_iterator(MATRIXTYPE & mat, unsigned int start_row, unsigned int start_col) : _mat(mat), _row(start_row), _col(start_col) {}; 00129 00130 value_type operator*(void) { return _mat(_row, _col); } 00131 self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(_mat, _row, _col); return *this; } 00132 self_type & operator++(int) { self_type tmp = *this; ++(*this); return tmp; } 00133 00134 bool operator==(self_type const & other) { return (_row == other._row) && (_col == other._col); } 00135 bool operator!=(self_type const & other) { return !(*this == other); } 00136 00137 unsigned int index1() { return _row; } 00138 unsigned int index2() { return _col; } 00139 00140 MATRIXTYPE & operator()(void) const { return _mat; } 00141 00142 private: 00143 MATRIXTYPE & _mat; 00144 unsigned int _row; 00145 unsigned int _col; 00146 }; 00147 00154 template <class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00155 class matrix 00156 { 00157 00158 public: 00159 00160 typedef matrix_iterator<row_iteration, matrix<SCALARTYPE, F, ALIGNMENT> > iterator1; 00161 typedef matrix_iterator<col_iteration, matrix<SCALARTYPE, F, ALIGNMENT> > iterator2; 00162 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00163 00165 matrix() : _rows(0), _columns(0) 00166 { 00167 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00168 KernelClass::init(); 00169 }; 00170 00176 explicit matrix(unsigned int rows, unsigned int columns) : 00177 _rows(rows), _columns(columns) 00178 { 00179 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00180 KernelClass::init(); 00181 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()); 00182 } 00183 00184 explicit matrix(cl_mem mem, unsigned int rows, unsigned int columns) : 00185 _rows(rows), _columns(columns) 00186 { 00187 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00188 KernelClass::init(); 00189 _elements = mem; 00190 _elements.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00191 } 00192 00193 template <typename LHS, typename RHS, typename OP> 00194 matrix(matrix_expression< LHS, RHS, OP> const & proxy) : _rows(proxy.size1()), _columns(proxy.size2()) 00195 { 00196 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00197 KernelClass::init(); 00198 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()); 00199 00200 *this = proxy; 00201 } 00202 00203 00204 //copy constructor: 00205 matrix(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) : 00206 _rows(mat.size1()), _columns(mat.size2()), 00207 _elements(viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size())) 00208 { 00209 cl_int err; 00210 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL); 00211 VIENNACL_ERR_CHECK(err); 00212 } 00213 00214 matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) 00215 { 00216 resize(mat.size1(), mat.size2(), false); 00217 cl_int err; 00218 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL); 00219 VIENNACL_ERR_CHECK(err); 00220 return *this; 00221 } 00222 00223 matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00224 const matrix<SCALARTYPE, F, ALIGNMENT>, 00225 op_trans> & proxy) 00226 { 00227 assert(handle() != proxy.lhs().handle() && "Self-assignment of matrix transpose not implemented"); 00228 assert(proxy.lhs().size1() == size2() && "Matrix dimensions do not match!"); 00229 assert(proxy.lhs().size2() == size1() && "Matrix dimensions do not match!"); 00230 00231 resize(proxy.lhs().size2(), proxy.lhs().size1(), false); 00232 00233 std::vector<SCALARTYPE> temp(proxy.lhs().internal_size()); 00234 00235 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), 00236 proxy.lhs().handle(), CL_TRUE, 0, 00237 sizeof(SCALARTYPE)*proxy.lhs().internal_size(), 00238 &(temp[0]), 0, NULL, NULL); 00239 VIENNACL_ERR_CHECK(err); 00240 viennacl::ocl::get_queue().finish(); 00241 00242 /* 00243 for (size_t i=0; i<proxy.lhs().size1(); ++i) 00244 { 00245 for (size_t j=0; j<proxy.lhs().size2(); ++j) 00246 std::cout << temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", "; 00247 }*/ 00248 00249 std::vector<SCALARTYPE> temp_trans(internal_size()); 00250 00251 for (size_t i=0; i<proxy.lhs().size1(); ++i) 00252 for (size_t j=0; j<proxy.lhs().size2(); ++j) 00253 temp_trans[F::mem_index(j,i, internal_size1(), internal_size2())] 00254 = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())]; 00255 00256 /* 00257 for (size_t i=0; i<proxy.lhs().size1(); ++i) 00258 { 00259 for (size_t j=0; j<proxy.lhs().size2(); ++j) 00260 std::cout << temp_trans[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", "; 00261 }*/ 00262 00263 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, 00264 sizeof(SCALARTYPE)*internal_size(), 00265 &(temp_trans[0])); 00266 00267 return *this; 00268 } 00269 00270 00271 00279 void resize(unsigned int rows, unsigned int columns, bool preserve = true) 00280 { 00281 assert(rows > 0 && columns > 0); 00282 if (preserve) 00283 { 00284 //get old entries: 00285 std::vector< SCALARTYPE > old_entries(internal_size()); 00286 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), //src 00287 handle(), //dest 00288 CL_TRUE, //blocking 00289 0, //offset 00290 sizeof(SCALARTYPE)*internal_size(), //size 00291 &(old_entries[0]), //destination 00292 0, NULL, NULL); 00293 VIENNACL_ERR_CHECK(err); 00294 00295 //set up entries of new matrix: 00296 std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT)); 00297 for (unsigned int i=0; i<rows; ++i) 00298 { 00299 if (i >= _rows) 00300 continue; 00301 00302 for (unsigned int j=0; j<columns; ++j) 00303 { 00304 if (j >= _columns) 00305 continue; 00306 new_entries[F::mem_index(i, j, F::internal_size1(rows, ALIGNMENT), F::internal_size2(columns, ALIGNMENT))] 00307 = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())]; 00308 } 00309 } 00310 00311 //copy new entries to GPU: 00312 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries); 00313 _rows = rows; 00314 _columns = columns; 00315 } 00316 else //discard old entries: 00317 { 00318 _rows = rows; 00319 _columns = columns; 00320 00321 std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT)); 00322 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries); 00323 } 00324 } 00325 00326 00327 //read-write access to an element of the vector 00330 entry_proxy<SCALARTYPE> operator()(unsigned int row_index, unsigned int col_index) 00331 { 00332 return entry_proxy<SCALARTYPE>(F::mem_index(row_index, col_index, internal_size1(), internal_size2()), _elements); 00333 } 00334 00337 scalar<SCALARTYPE> operator()(unsigned int row_index, unsigned int col_index) const 00338 { 00339 scalar<SCALARTYPE> tmp; 00340 cl_int err; 00341 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), 00342 _elements, 00343 tmp.handle(), 00344 sizeof(SCALARTYPE) * F::mem_index(row_index, col_index, internal_size1(), internal_size2()), 00345 0, 00346 sizeof(SCALARTYPE), 00347 0, 00348 NULL, 00349 NULL); 00350 //assert(err == CL_SUCCESS); 00351 VIENNACL_ERR_CHECK(err); 00352 return tmp; 00353 } 00354 00355 00356 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00357 const matrix<SCALARTYPE, F, ALIGNMENT>, 00358 op_add > 00359 operator + (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 00360 { 00361 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00362 const matrix<SCALARTYPE, F, ALIGNMENT>, 00363 op_add > (*this, other); 00364 } 00365 00366 00367 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 00368 { 00369 viennacl::linalg::inplace_add(*this, other); 00370 return *this; 00371 } 00372 00373 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00374 const matrix<SCALARTYPE, F, ALIGNMENT>, 00375 op_sub > 00376 operator - (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 00377 { 00378 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00379 const matrix<SCALARTYPE, F, ALIGNMENT>, 00380 op_sub > (*this, other); 00381 } 00382 00383 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 00384 { 00385 viennacl::linalg::inplace_sub(*this, other); 00386 return *this; 00387 } 00388 00389 template <unsigned int A1, unsigned int A2> 00390 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const vector<SCALARTYPE, A1>, 00391 const vector<SCALARTYPE, A2>, 00392 op_prod > & proxy) 00393 { 00394 viennacl::linalg::rank_1_update(*this, proxy.lhs(), proxy.rhs()); 00395 return *this; 00396 } 00397 00398 template <unsigned int A1, unsigned int A2> 00399 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const vector<SCALARTYPE, A1>, 00400 const vector<SCALARTYPE, A2>, 00401 op_prod > & proxy) 00402 { 00403 viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0), proxy.lhs(), proxy.rhs()); 00404 return *this; 00405 } 00406 00407 template <unsigned int A1, unsigned int A2> 00408 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>, 00409 const vector<SCALARTYPE, A2>, 00410 op_prod >, 00411 const SCALARTYPE, 00412 op_prod > & proxy) 00413 { 00414 viennacl::linalg::scaled_rank_1_update(*this, proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs()); 00415 return *this; 00416 } 00417 00418 template <unsigned int A1, unsigned int A2> 00419 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>, 00420 const vector<SCALARTYPE, A2>, 00421 op_prod >, 00422 const SCALARTYPE, 00423 op_prod > & proxy) 00424 { 00425 viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0) * proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs()); 00426 return *this; 00427 } 00428 00429 00430 matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (SCALARTYPE val) 00431 { 00432 viennacl::linalg::inplace_mult(*this, val); 00433 return *this; 00434 } 00435 00436 matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (scalar<SCALARTYPE> const & val) 00437 { 00438 viennacl::linalg::inplace_mult(*this, val); 00439 return *this; 00440 } 00441 00442 matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (SCALARTYPE val) 00443 { 00444 viennacl::linalg::inplace_mult(*this, SCALARTYPE(1.0) / val); 00445 return *this; 00446 } 00447 00448 matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (scalar<SCALARTYPE> const & val) 00449 { 00450 viennacl::linalg::inplace_divide(*this, val); 00451 return *this; 00452 } 00453 00454 00455 //this = A * B and related (with trans()) 00456 template <typename MatrixType1, typename MatrixType2> 00457 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< MatrixType1, 00458 MatrixType2, 00459 op_prod > & proxy) 00460 { 00461 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); 00462 return *this; 00463 } 00464 00465 //this = A + B 00466 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00467 const matrix<SCALARTYPE, F, ALIGNMENT>, 00468 op_add > & proxy) 00469 { 00470 viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this); 00471 return *this; 00472 } 00473 00474 //this = A - B 00475 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00476 const matrix<SCALARTYPE, F, ALIGNMENT>, 00477 op_sub > & proxy) 00478 { 00479 viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this); 00480 return *this; 00481 } 00482 00483 00485 const unsigned int & size1() const { return _rows;} 00487 const unsigned int & size2() const { return _columns; } 00488 00490 void clear() 00491 { 00492 unsigned int internal_size = internal_size1() * internal_size2(); 00493 00494 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass; 00495 00496 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "clear"); 00497 viennacl::ocl::enqueue(k(_elements, internal_size)); 00498 } 00499 00500 00501 //const unsigned int row_stride() const { return roundUpToNextMultiple<unsigned int>(columns(), ALIGNMENT); } 00503 const unsigned int internal_size1() const { return F::internal_size1(size1(), ALIGNMENT); } 00505 const unsigned int internal_size2() const { return F::internal_size2(size2(), ALIGNMENT); } 00507 const unsigned int internal_size() const { return internal_size1() * internal_size2(); } 00508 00510 const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; } 00511 00512 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment 00513 template <typename CPU_MATRIX> 00514 friend void copy(const CPU_MATRIX & cpu_matrix, 00515 matrix & gpu_matrix ); 00516 00517 template <typename SCALARTYPE2, typename A1, typename A2> 00518 friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix, 00519 matrix & gpu_matrix ); 00520 00521 template <typename SCALARTYPE2> 00522 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin, 00523 SCALARTYPE2 * cpu_matrix_end, 00524 matrix & gpu_matrix); 00525 00526 #ifdef VIENNACL_HAVE_EIGEN 00527 friend void copy(const Eigen::MatrixXf & cpu_matrix, 00528 matrix & gpu_matrix); 00529 00530 friend void copy(const Eigen::MatrixXd & cpu_matrix, 00531 matrix & gpu_matrix); 00532 #endif 00533 00534 #ifdef VIENNACL_HAVE_MTL4 00535 template <typename SCALARTYPE2, typename T> 00536 friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix, 00537 matrix & gpu_matrix); 00538 #endif 00539 #else 00540 template <typename CPU_MATRIX, typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2> 00541 friend void copy(const CPU_MATRIX & cpu_matrix, 00542 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix ); 00543 00544 template <typename SCALARTYPE2, typename A1, typename A2, typename F2, unsigned int ALIGNMENT2> 00545 friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix, 00546 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix ); 00547 00548 template <typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2> 00549 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin, 00550 SCALARTYPE2 * cpu_matrix_end, 00551 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix); 00552 00553 #ifdef VIENNACL_HAVE_EIGEN 00554 template <typename F2, unsigned int ALIGNMENT2> 00555 friend void copy(const Eigen::MatrixXf & cpu_matrix, 00556 matrix<float, F2, ALIGNMENT2> & gpu_matrix); 00557 00558 template <typename F2, unsigned int ALIGNMENT2> 00559 friend void copy(const Eigen::MatrixXd & cpu_matrix, 00560 matrix<double, F2, ALIGNMENT2> & gpu_matrix); 00561 #endif 00562 00563 #ifdef VIENNACL_HAVE_MTL4 00564 template <typename SCALARTYPE2, typename T, typename F2, unsigned int ALIGNMENT2> 00565 friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix, 00566 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix); 00567 #endif 00568 #endif 00569 00570 private: 00571 unsigned int _rows; 00572 unsigned int _columns; 00573 viennacl::ocl::handle<cl_mem> _elements; 00574 }; //matrix 00575 00581 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00582 std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix) 00583 { 00584 std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size()); 00585 cl_int err; 00586 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &tmp[0], 0, NULL, NULL); 00587 VIENNACL_ERR_CHECK(err); 00588 viennacl::ocl::get_queue().finish(); 00589 00590 s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]"; 00591 00592 s << "("; 00593 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00594 { 00595 s << "("; 00596 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00597 { 00598 s << tmp[ i * gpu_matrix.internal_size2() + j ]; 00599 if (j < gpu_matrix.size2() - 1) 00600 s << ","; 00601 } 00602 s << ")"; 00603 if (i < gpu_matrix.size1() - 1) 00604 s << ","; 00605 } 00606 s << ")"; 00607 return s; 00608 } 00609 00615 template<typename LHS, typename RHS, typename OP> 00616 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr) 00617 { 00618 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType ScalarType; 00619 00620 matrix<ScalarType> temp = expr; 00621 s << temp; 00622 return s; 00623 } 00624 00626 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT> 00627 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00628 const matrix<SCALARTYPE, F, ALIGNMENT>, 00629 op_trans> trans(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) 00630 { 00631 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>, 00632 const matrix<SCALARTYPE, F, ALIGNMENT>, 00633 op_trans>(mat, mat); 00634 } 00635 00636 00638 00639 // 00640 //cpu to gpu, generic type: 00641 // 00647 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 00648 void copy(const CPU_MATRIX & cpu_matrix, 00649 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix ) 00650 { 00651 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()), 00652 static_cast<unsigned int>(cpu_matrix.size2()), false); 00653 00654 std::vector<SCALARTYPE> data(gpu_matrix.internal_size()); 00655 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00656 { 00657 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00658 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); 00659 } 00660 00661 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00662 } 00663 00664 // 00665 //cpu to gpu, STL type: 00666 // 00672 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT> 00673 void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix, 00674 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix ) 00675 { 00676 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size()), 00677 static_cast<unsigned int>(cpu_matrix[0].size()), 00678 false); 00679 00680 std::vector<SCALARTYPE> data(gpu_matrix.internal_size()); 00681 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00682 { 00683 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00684 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j]; 00685 } 00686 00687 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00688 } 00689 00690 00691 // 00692 //cpu to gpu, another STL type: 00693 // 00700 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 00701 void fast_copy(SCALARTYPE * cpu_matrix_begin, 00702 SCALARTYPE * cpu_matrix_end, 00703 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix) 00704 { 00705 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, 00706 sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), 00707 cpu_matrix_begin); 00708 } 00709 00710 00711 #ifdef VIENNACL_HAVE_EIGEN 00712 00717 template <typename F, unsigned int ALIGNMENT> 00718 void copy(const Eigen::MatrixXf & cpu_matrix, 00719 matrix<float, F, ALIGNMENT> & gpu_matrix) 00720 { 00721 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.rows()), 00722 static_cast<unsigned int>(cpu_matrix.cols()), 00723 false); 00724 00725 std::vector<float> data(gpu_matrix.internal_size()); 00726 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00727 { 00728 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00729 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); 00730 } 00731 00732 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00733 } 00734 00740 template <typename F, unsigned int ALIGNMENT> 00741 void copy(const Eigen::MatrixXd & cpu_matrix, 00742 matrix<double, F, ALIGNMENT> & gpu_matrix) 00743 { 00744 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.rows()), 00745 static_cast<unsigned int>(cpu_matrix.cols()), 00746 false); 00747 00748 std::vector<double> data(gpu_matrix.internal_size()); 00749 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00750 { 00751 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00752 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j); 00753 } 00754 00755 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00756 } 00757 #endif 00758 00759 #ifdef VIENNACL_HAVE_MTL4 00760 00765 template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT> 00766 void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix, 00767 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix) 00768 { 00769 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.num_rows()), 00770 static_cast<unsigned int>(cpu_matrix.num_cols()), 00771 false); 00772 00773 std::vector<SCALARTYPE> data(gpu_matrix.internal_size()); 00774 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00775 { 00776 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00777 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j]; 00778 } 00779 00780 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data); 00781 } 00782 #endif 00783 00784 00785 00786 00787 // 00788 //gpu to cpu, generic type 00789 // 00795 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 00796 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix, 00797 CPU_MATRIX & cpu_matrix ) 00798 { 00799 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) ) 00800 { 00801 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size()); 00802 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL); 00803 VIENNACL_ERR_CHECK(err); 00804 00805 //now copy entries to cpu_matrix: 00806 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00807 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00808 cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; 00809 } 00810 } 00811 00812 //gpu to cpu, STL type 00818 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT> 00819 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix, 00820 std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix) 00821 { 00822 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) 00823 && (cpu_matrix.size() >= gpu_matrix.size1()) && (cpu_matrix[0].size() >= gpu_matrix.size2())) 00824 { 00825 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size()); 00826 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL); 00827 VIENNACL_ERR_CHECK(err); 00828 00829 //now copy entries to cpu_matrix: 00830 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i) 00831 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 00832 cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())]; 00833 } 00834 } 00835 00836 //gpu to cpu, STL type 00842 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT> 00843 void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix, 00844 SCALARTYPE * cpu_matrix_begin) 00845 { 00846 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), 00847 gpu_matrix.handle(), 00848 CL_TRUE, 0, 00849 sizeof(SCALARTYPE)*gpu_matrix.internal_size(), 00850 cpu_matrix_begin, 0, NULL, NULL); 00851 VIENNACL_ERR_CHECK(err); 00852 } 00853 00854 00855 00856 00857 00858 00859 00860 00861 00862 // outer_prod(v1, v2) * val; 00863 template<typename CPU_SCALAR, typename SCALARTYPE,unsigned int VECTOR_ALIGNMENT> 00864 viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00865 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00866 op_prod>, 00867 const SCALARTYPE, 00868 op_prod> operator*(const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00869 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00870 op_prod> & proxy, 00871 CPU_SCALAR val) 00872 { 00873 return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00874 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00875 op_prod>, 00876 const SCALARTYPE, 00877 op_prod>(proxy, static_cast<SCALARTYPE>(val)); 00878 } 00879 00880 // val * outer_prod(v1, v2); 00881 template <typename CPU_SCALAR, typename SCALARTYPE, unsigned int VA1, unsigned int VA2> 00882 viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>, 00883 const viennacl::vector<SCALARTYPE, VA2>, 00884 op_prod>, 00885 const SCALARTYPE, 00886 op_prod> operator*(CPU_SCALAR val, 00887 viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>, 00888 const viennacl::vector<SCALARTYPE, VA2>, 00889 op_prod> const & proxy) 00890 { 00891 return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>, 00892 const viennacl::vector<SCALARTYPE, VA2>, 00893 op_prod>, 00894 const SCALARTYPE, 00895 op_prod>(proxy, static_cast<SCALARTYPE>(val)); 00896 } 00897 00898 00899 00900 } //namespace viennacl 00901 00902 #endif
1.7.6.1