|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_COORDINATE_MATRIX_HPP_ 00016 #define _VIENNACL_COORDINATE_MATRIX_HPP_ 00017 00022 #include <map> 00023 #include <vector> 00024 #include <list> 00025 00026 #include "viennacl/forwards.h" 00027 #include "viennacl/ocl/backend.hpp" 00028 #include "viennacl/vector.hpp" 00029 00030 #include "viennacl/linalg/coordinate_matrix_operations.hpp" 00031 00032 namespace viennacl 00033 { 00034 00035 00036 //provide copy-operation: 00044 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00045 void copy(const CPU_MATRIX & cpu_matrix, 00046 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00047 { 00048 size_t group_num = 64; 00049 00050 // Step 1: Determine nonzeros: 00051 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) 00052 { 00053 unsigned int num_entries = 0; 00054 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00055 row_it != cpu_matrix.end1(); 00056 ++row_it) 00057 { 00058 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00059 col_it != row_it.end(); 00060 ++col_it) 00061 { 00062 ++num_entries; 00063 } 00064 } 00065 00066 // Step 2: Set up matrix data: 00067 std::cout << "Number of entries: " << num_entries << std::endl; 00068 gpu_matrix.nonzeros_ = num_entries; 00069 gpu_matrix.rows_ = cpu_matrix.size1(); 00070 gpu_matrix.cols_ = cpu_matrix.size2(); 00071 00072 std::vector<cl_uint> coord_buffer(2*gpu_matrix.internal_nnz()); 00073 std::vector<cl_uint> group_boundaries(group_num + 1); 00074 std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz()); 00075 00076 unsigned int data_index = 0; 00077 unsigned int current_fraction = 0; 00078 00079 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00080 row_it != cpu_matrix.end1(); 00081 ++row_it) 00082 { 00083 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00084 col_it != row_it.end(); 00085 ++col_it) 00086 { 00087 coord_buffer[2*data_index] = static_cast<unsigned int>(col_it.index1()); 00088 coord_buffer[2*data_index + 1] = static_cast<unsigned int>(col_it.index2()); 00089 elements[data_index] = *col_it; 00090 ++data_index; 00091 } 00092 00093 if (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries) //split data equally over 64 groups 00094 group_boundaries[++current_fraction] = data_index; 00095 } 00096 00097 //write end of last group: 00098 group_boundaries[group_num] = data_index; 00099 //group_boundaries[1] = data_index; //for one compute unit 00100 00101 /*std::cout << "Group boundaries: " << std::endl; 00102 for (size_t i=0; i<group_boundaries.size(); ++i) 00103 std::cout << group_boundaries[i] << std::endl;*/ 00104 00105 gpu_matrix.coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, coord_buffer); 00106 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, elements); 00107 gpu_matrix.group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, group_boundaries); 00108 } 00109 } 00110 00116 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00117 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix, 00118 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00119 { 00120 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix); 00121 } 00122 00123 //gpu to cpu: 00133 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00134 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00135 CPU_MATRIX & cpu_matrix ) 00136 { 00137 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00138 { 00139 cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false); 00140 00141 //get raw data from memory: 00142 std::vector<unsigned int> coord_buffer(2*gpu_matrix.nnz()); 00143 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00144 00145 //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl; 00146 00147 cl_int err; 00148 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle12(), CL_TRUE, 0, sizeof(unsigned int)* 2 *gpu_matrix.nnz(), &(coord_buffer[0]), 0, NULL, NULL); 00149 VIENNACL_ERR_CHECK(err); 00150 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL); 00151 VIENNACL_ERR_CHECK(err); 00152 viennacl::ocl::get_queue().finish(); 00153 00154 //fill the cpu_matrix: 00155 for (unsigned int index = 0; index < gpu_matrix.nnz(); ++index) 00156 { 00157 cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index]; 00158 } 00159 } 00160 } 00161 00167 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00168 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00169 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix) 00170 { 00171 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix); 00172 copy(gpu_matrix, temp); 00173 } 00174 00175 00177 00184 template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */ > 00185 class coordinate_matrix 00186 { 00187 public: 00188 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00189 00191 coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0) { viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); } 00192 00199 coordinate_matrix(unsigned int rows, unsigned int cols, unsigned int nonzeros = 0) : 00200 rows_(rows), cols_(cols), nonzeros_(nonzeros) 00201 { 00202 viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); 00203 if (nonzeros > 0) 00204 { 00205 coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz()); 00206 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz()); 00207 group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1)); 00208 } 00209 } 00210 00212 void reserve(unsigned int new_nonzeros) 00213 { 00214 if (new_nonzeros > nonzeros_) 00215 { 00216 viennacl::ocl::handle<cl_mem> coord_buffer_old = coord_buffer_; 00217 viennacl::ocl::handle<cl_mem> elements_old = elements_; 00218 coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz()); 00219 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz()); 00220 00221 cl_int err; 00222 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, 0, sizeof(cl_uint) * 2 * nonzeros_, 0, NULL, NULL); 00223 VIENNACL_ERR_CHECK(err); 00224 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, 0, sizeof(SCALARTYPE)*nonzeros_, 0, NULL, NULL); 00225 VIENNACL_ERR_CHECK(err); 00226 00227 //new memory must be padded with zeros: 00228 std::vector<long> temp(internal_nnz() - nonzeros_); 00229 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, nonzeros_, sizeof(cl_uint) * 2 * temp.size(), 0, NULL, NULL); 00230 VIENNACL_ERR_CHECK(err); 00231 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, nonzeros_, sizeof(SCALARTYPE)*temp.size(), 0, NULL, NULL); 00232 VIENNACL_ERR_CHECK(err); 00233 } 00234 } 00235 00242 void resize(unsigned int new_size1, unsigned int new_size2, bool preserve = true) 00243 { 00244 assert (new_size1 > 0 && new_size2 > 0); 00245 00246 if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer 00247 { 00248 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix; 00249 if (rows_ > 0) 00250 stl_sparse_matrix.resize(rows_); 00251 00252 if (preserve && rows_ > 0) 00253 viennacl::copy(*this, stl_sparse_matrix); 00254 00255 stl_sparse_matrix.resize(new_size1); 00256 00257 std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl; 00258 if (new_size2 < cols_ && rows_ > 0) 00259 { 00260 for (size_t i=0; i<stl_sparse_matrix.size(); ++i) 00261 { 00262 std::list<unsigned int> to_delete; 00263 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin(); 00264 it != stl_sparse_matrix[i].end(); 00265 ++it) 00266 { 00267 if (it->first >= new_size2) 00268 to_delete.push_back(it->first); 00269 } 00270 00271 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it) 00272 stl_sparse_matrix[i].erase(*it); 00273 } 00274 std::cout << "Cropping done..." << std::endl; 00275 } 00276 00277 rows_ = new_size1; 00278 cols_ = new_size2; 00279 viennacl::copy(stl_sparse_matrix, *this); 00280 } 00281 00282 rows_ = new_size1; 00283 cols_ = new_size2; 00284 } 00285 00286 00288 unsigned int size1() const { return rows_; } 00290 unsigned int size2() const { return cols_; } 00292 unsigned int nnz() const { return nonzeros_; } 00294 unsigned int internal_nnz() const { return viennacl::tools::roundUpToNextMultiple<unsigned int>(nonzeros_, ALIGNMENT);; } 00295 00297 const viennacl::ocl::handle<cl_mem> & handle12() const { return coord_buffer_; } 00299 const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; } 00301 const viennacl::ocl::handle<cl_mem> & handle3() const { return group_boundaries_; } 00302 00303 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment 00304 template <typename CPU_MATRIX> 00305 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix ); 00306 #else 00307 template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2> 00308 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix ); 00309 #endif 00310 00311 private: 00313 coordinate_matrix(coordinate_matrix const &); 00314 00316 coordinate_matrix & operator=(coordinate_matrix const &); 00317 00318 00319 unsigned int rows_; 00320 unsigned int cols_; 00321 unsigned int nonzeros_; 00322 viennacl::ocl::handle<cl_mem> coord_buffer_; 00323 viennacl::ocl::handle<cl_mem> elements_; 00324 viennacl::ocl::handle<cl_mem> group_boundaries_; 00325 }; 00326 00327 00328 } 00329 00330 #endif
1.7.6.1