|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_COMPRESSED_MATRIX_HPP_ 00016 #define _VIENNACL_COMPRESSED_MATRIX_HPP_ 00017 00022 #include <vector> 00023 #include <list> 00024 #include <map> 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/ocl/backend.hpp" 00027 #include "viennacl/vector.hpp" 00028 00029 #include "viennacl/linalg/compressed_matrix_operations.hpp" 00030 00031 #include "viennacl/tools/tools.hpp" 00032 00033 namespace viennacl 00034 { 00035 00036 00037 //provide copy-operation: 00052 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00053 void copy(const CPU_MATRIX & cpu_matrix, 00054 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00055 { 00056 //std::cout << "copy for (" << cpu_matrix.size1() << ", " << cpu_matrix.size2() << ", " << cpu_matrix.nnz() << ")" << std::endl; 00057 00058 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) 00059 { 00060 //determine nonzeros: 00061 long num_entries = 0; 00062 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00063 row_it != cpu_matrix.end1(); 00064 ++row_it) 00065 { 00066 unsigned int entries_per_row = 0; 00067 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00068 col_it != row_it.end(); 00069 ++col_it) 00070 { 00071 ++entries_per_row; 00072 } 00073 num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT); 00074 } 00075 00076 if (num_entries == 0) //we copy an empty matrix 00077 { 00078 num_entries = 1; 00079 } 00080 00081 //set up matrix entries: 00082 std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1); 00083 std::vector<unsigned int> col_buffer(num_entries); 00084 std::vector<SCALARTYPE> elements(num_entries); 00085 00086 unsigned int row_index = 0; 00087 unsigned int data_index = 0; 00088 00089 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00090 row_it != cpu_matrix.end1(); 00091 ++row_it) 00092 { 00093 row_buffer[row_index] = data_index; 00094 ++row_index; 00095 00096 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00097 col_it != row_it.end(); 00098 ++col_it) 00099 { 00100 col_buffer[data_index] = static_cast<unsigned int>(col_it.index2()); 00101 elements[data_index] = *col_it; 00102 ++data_index; 00103 } 00104 data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT); //take care of alignment 00105 } 00106 row_buffer[row_index] = data_index; 00107 00108 gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries); 00109 } 00110 } 00111 00112 00113 //adapted for std::vector< std::map < > > argument: 00119 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00120 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix, 00121 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00122 { 00123 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix); 00124 } 00125 00126 #ifdef VIENNACL_HAVE_EIGEN 00127 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT> 00128 void copy(const Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix, 00129 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix) 00130 { 00131 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(eigen_matrix.rows()); 00132 00133 for (int k=0; k < eigen_matrix.outerSize(); ++k) 00134 for (typename Eigen::SparseMatrix<SCALARTYPE, flags>::InnerIterator it(eigen_matrix, k); it; ++it) 00135 stl_matrix[it.row()][it.col()] = it.value(); 00136 00137 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix); 00138 } 00139 #endif 00140 00141 00142 #ifdef VIENNACL_HAVE_MTL4 00143 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00144 void copy(const mtl::compressed2D<SCALARTYPE> & cpu_matrix, 00145 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix) 00146 { 00147 typedef mtl::compressed2D<SCALARTYPE> MatrixType; 00148 00149 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(cpu_matrix.num_rows()); 00150 00151 using mtl::traits::range_generator; 00152 using mtl::traits::range::min; 00153 00154 // Choose between row and column traversal 00155 typedef typename min<range_generator<mtl::tag::row, MatrixType>, 00156 range_generator<mtl::tag::col, MatrixType> >::type range_type; 00157 range_type my_range; 00158 00159 // Type of outer cursor 00160 typedef typename range_type::type c_type; 00161 // Type of inner cursor 00162 typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type; 00163 00164 // Define the property maps 00165 typename mtl::traits::row<MatrixType>::type row(cpu_matrix); 00166 typename mtl::traits::col<MatrixType>::type col(cpu_matrix); 00167 typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix); 00168 00169 // Now iterate over the matrix 00170 for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor) 00171 for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor) 00172 stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor); 00173 00174 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix); 00175 } 00176 #endif 00177 00178 00179 00180 00181 00182 00183 00184 // 00185 // gpu to cpu: 00186 // 00196 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00197 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00198 CPU_MATRIX & cpu_matrix ) 00199 { 00200 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00201 { 00202 cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false); 00203 00204 //get raw data from memory: 00205 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1); 00206 std::vector<unsigned int> col_buffer(gpu_matrix.nnz()); 00207 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00208 00209 //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; 00210 00211 cl_int err; 00212 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL); 00213 VIENNACL_ERR_CHECK(err); 00214 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL); 00215 VIENNACL_ERR_CHECK(err); 00216 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL); 00217 VIENNACL_ERR_CHECK(err); 00218 viennacl::ocl::get_queue().finish(); 00219 00220 //fill the cpu_matrix: 00221 unsigned int data_index = 0; 00222 for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row) 00223 { 00224 while (data_index < row_buffer[row]) 00225 { 00226 if (col_buffer[data_index] >= gpu_matrix.size2()) 00227 { 00228 std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; 00229 return; 00230 } 00231 00232 if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) 00233 cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index]; 00234 ++data_index; 00235 } 00236 } 00237 } 00238 } 00239 00240 00246 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00247 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00248 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix) 00249 { 00250 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix); 00251 copy(gpu_matrix, temp); 00252 } 00253 00254 00255 #ifdef VIENNACL_HAVE_EIGEN 00256 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT> 00257 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00258 Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix) 00259 { 00260 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00261 { 00262 assert(static_cast<unsigned int>(eigen_matrix.rows()) >= gpu_matrix.size1() 00263 && static_cast<unsigned int>(eigen_matrix.cols()) >= gpu_matrix.size2() 00264 && "Provided Eigen compressed matrix is too small!"); 00265 00266 //get raw data from memory: 00267 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1); 00268 std::vector<unsigned int> col_buffer(gpu_matrix.nnz()); 00269 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00270 00271 cl_int err; 00272 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), 00273 CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL); 00274 VIENNACL_ERR_CHECK(err); 00275 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), 00276 CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL); 00277 VIENNACL_ERR_CHECK(err); 00278 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), 00279 CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL); 00280 VIENNACL_ERR_CHECK(err); 00281 viennacl::ocl::get_queue().finish(); 00282 00283 eigen_matrix.setZero(); 00284 eigen_matrix.startFill(); 00285 unsigned int data_index = 0; 00286 for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row) 00287 { 00288 while (data_index < row_buffer[row]) 00289 { 00290 assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer"); 00291 if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) 00292 eigen_matrix.fill(row-1, col_buffer[data_index]) = elements[data_index]; 00293 ++data_index; 00294 } 00295 } 00296 eigen_matrix.endFill(); 00297 } 00298 } 00299 #endif 00300 00301 00302 00303 #ifdef VIENNACL_HAVE_MTL4 00304 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00305 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00306 mtl::compressed2D<SCALARTYPE> & mtl4_matrix) 00307 { 00308 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00309 { 00310 assert(mtl4_matrix.num_rows() >= gpu_matrix.size1() 00311 && mtl4_matrix.num_cols() >= gpu_matrix.size2() 00312 && "Provided MTL4 compressed matrix is too small!"); 00313 00314 //get raw data from memory: 00315 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1); 00316 std::vector<unsigned int> col_buffer(gpu_matrix.nnz()); 00317 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00318 00319 cl_int err; 00320 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), 00321 CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL); 00322 VIENNACL_ERR_CHECK(err); 00323 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), 00324 CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL); 00325 VIENNACL_ERR_CHECK(err); 00326 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), 00327 CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL); 00328 VIENNACL_ERR_CHECK(err); 00329 viennacl::ocl::get_queue().finish(); 00330 00331 //set_to_zero(mtl4_matrix); 00332 //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2()); 00333 00334 mtl::matrix::inserter< mtl::compressed2D<SCALARTYPE> > ins(mtl4_matrix); 00335 unsigned int data_index = 0; 00336 for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row) 00337 { 00338 while (data_index < row_buffer[row]) 00339 { 00340 assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer"); 00341 if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) 00342 ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<SCALARTYPE> >::value_type(elements[data_index]); 00343 ++data_index; 00344 } 00345 } 00346 } 00347 } 00348 #endif 00349 00350 00351 00352 00353 00355 00360 template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */> 00361 class compressed_matrix 00362 { 00363 public: 00364 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00365 00367 compressed_matrix() : _rows(0), _cols(0), _nonzeros(0) { viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init(); } 00368 00375 explicit compressed_matrix(unsigned int rows, unsigned int cols, unsigned int nonzeros = 0) : 00376 _rows(rows), _cols(cols), _nonzeros(nonzeros) 00377 { 00378 viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init(); 00379 00380 if (rows > 0) 00381 _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * rows); 00382 if (nonzeros > 0) 00383 { 00384 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * nonzeros); 00385 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros); 00386 } 00387 } 00388 00389 explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, unsigned int rows, unsigned int cols, unsigned int nonzeros) : 00390 _rows(rows), _cols(cols), _nonzeros(nonzeros) 00391 { 00392 _row_buffer = mem_row_buffer; 00393 _row_buffer.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00394 _col_buffer = mem_col_buffer; 00395 _col_buffer.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00396 _elements = mem_elements; 00397 _elements.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. 00398 } 00399 00400 00409 void set(unsigned int * row_jumper, unsigned int * col_buffer, SCALARTYPE * elements, unsigned int cols, unsigned int nonzeros) 00410 { 00411 assert(cols > 0); 00412 assert(nonzeros > 0); 00413 //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; 00414 _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * (cols + 1), row_jumper); 00415 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * nonzeros, col_buffer); 00416 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros, elements); 00417 _nonzeros = nonzeros; 00418 _rows = cols; 00419 _cols = cols; 00420 } 00421 00423 void reserve(unsigned int new_nonzeros) 00424 { 00425 if (new_nonzeros > _nonzeros) 00426 { 00427 viennacl::ocl::handle<cl_mem> _col_buffer_old = _col_buffer; 00428 viennacl::ocl::handle<cl_mem> _elements_old = _elements; 00429 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * new_nonzeros); 00430 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * new_nonzeros); 00431 00432 cl_int err; 00433 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _col_buffer_old, _col_buffer, 0, 0, sizeof(unsigned int)*_nonzeros, 0, NULL, NULL); 00434 VIENNACL_ERR_CHECK(err); 00435 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _elements_old, _elements, 0, 0, sizeof(SCALARTYPE)*_nonzeros, 0, NULL, NULL); 00436 VIENNACL_ERR_CHECK(err); 00437 00438 _nonzeros = new_nonzeros; 00439 } 00440 } 00441 00448 void resize(unsigned int new_size1, unsigned int new_size2, bool preserve = true) 00449 { 00450 assert(new_size1 > 0 && new_size2 > 0); 00451 //std::cout << "Resizing from (" << _rows << ", " << _cols << ") to (" << new_size1 << ", " << new_size2 << ")" << std::endl; 00452 00453 if (new_size1 != _rows || new_size2 != _cols) 00454 { 00455 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix; 00456 if (_rows > 0) 00457 stl_sparse_matrix.resize(_rows); 00458 00459 if (preserve && _rows > 0) 00460 viennacl::copy(*this, stl_sparse_matrix); 00461 00462 stl_sparse_matrix.resize(new_size1); 00463 00464 //discard entries with column index larger than new_size2 00465 if (new_size2 < _cols && _rows > 0) 00466 { 00467 for (size_t i=0; i<stl_sparse_matrix.size(); ++i) 00468 { 00469 std::list<unsigned int> to_delete; 00470 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin(); 00471 it != stl_sparse_matrix[i].end(); 00472 ++it) 00473 { 00474 if (it->first >= new_size2) 00475 to_delete.push_back(it->first); 00476 } 00477 00478 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it) 00479 stl_sparse_matrix[i].erase(*it); 00480 } 00481 } 00482 00483 copy(stl_sparse_matrix, *this); 00484 00485 _rows = new_size1; 00486 _cols = new_size2; 00487 } 00488 } 00489 00491 const unsigned int & size1() const { return _rows; } 00493 const unsigned int & size2() const { return _cols; } 00495 const unsigned int & nnz() const { return _nonzeros; } 00496 00498 const viennacl::ocl::handle<cl_mem> & handle1() const { return _row_buffer; } 00500 const viennacl::ocl::handle<cl_mem> & handle2() const { return _col_buffer; } 00502 const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; } 00503 00504 private: 00506 compressed_matrix(compressed_matrix const &); 00507 00509 compressed_matrix & operator=(compressed_matrix const &); 00510 00511 00512 unsigned int _rows; 00513 unsigned int _cols; 00514 unsigned int _nonzeros; 00515 viennacl::ocl::handle<cl_mem> _row_buffer; 00516 viennacl::ocl::handle<cl_mem> _col_buffer; 00517 viennacl::ocl::handle<cl_mem> _elements; 00518 }; 00519 00520 00521 00522 00523 } 00524 00525 #endif
1.7.6.1