|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_COORDINATE_MATRIX_OPERATIONS_HPP_ 00016 #define _VIENNACL_COORDINATE_MATRIX_OPERATIONS_HPP_ 00017 00022 #include "viennacl/forwards.h" 00023 #include "viennacl/ocl/device.hpp" 00024 #include "viennacl/ocl/handle.hpp" 00025 #include "viennacl/ocl/kernel.hpp" 00026 #include "viennacl/scalar.hpp" 00027 #include "viennacl/vector.hpp" 00028 #include "viennacl/tools/tools.hpp" 00029 #include "viennacl/linalg/kernels/coordinate_matrix_kernels.h" 00030 00031 namespace viennacl 00032 { 00033 namespace linalg 00034 { 00035 00036 00037 // A * x 00045 template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00046 vector_expression<const coordinate_matrix<SCALARTYPE, ALIGNMENT>, 00047 const vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00048 op_prod > prod_impl(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & mat, 00049 const vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec) 00050 { 00051 return vector_expression<const coordinate_matrix<SCALARTYPE, ALIGNMENT>, 00052 const vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00053 op_prod >(mat, vec); 00054 } 00055 00056 // A * x 00065 template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00066 viennacl::vector_expression<const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT>, 00067 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00068 viennacl::op_prod > prod_impl(const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT> & mat, 00069 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec, 00070 size_t NUM_THREADS) 00071 { 00072 return viennacl::vector_expression<const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT>, 00073 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00074 viennacl::op_prod >(mat, vec); 00075 } 00076 00077 //namespace { 00086 template<class TYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00087 void prod_impl(const viennacl::coordinate_matrix<TYPE, ALIGNMENT> & mat, 00088 const viennacl::vector<TYPE, VECTOR_ALIGNMENT> & vec, 00089 viennacl::vector<TYPE, VECTOR_ALIGNMENT> & result) 00090 { 00091 assert(mat.size1() == result.size()); 00092 assert(mat.size2() == vec.size()); 00093 result.clear(); 00094 00095 //std::cout << "prod(coordinate_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl; 00096 00097 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::coordinate_matrix<TYPE, ALIGNMENT>::program_name(), "vec_mul"); 00098 unsigned int thread_num = 256; //k.local_work_size(0); 00099 00100 k.local_work_size(0, thread_num); 00101 00102 k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases 00103 //k.global_work_size(0, thread_num); //Only one work group 00104 viennacl::ocl::enqueue(k(mat.handle12(), mat, mat.handle3(), 00105 vec, 00106 result, 00107 viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num), 00108 viennacl::ocl::local_mem(sizeof(TYPE)*thread_num)) ); 00109 00110 } 00111 //}; 00112 00113 } //namespace linalg 00114 00115 00116 00121 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00122 template <unsigned int MAT_ALIGNMENT> 00123 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00124 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00125 const viennacl::vector<SCALARTYPE, ALIGNMENT>, 00126 viennacl::op_prod> & proxy) 00127 { 00128 // check for the special case x = A * x 00129 if (proxy.rhs().handle() == this->handle()) 00130 { 00131 viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size()); 00132 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00133 *this = result; 00134 return *this; 00135 } 00136 else 00137 { 00138 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); 00139 return *this; 00140 } 00141 return *this; 00142 } 00143 00144 //v += A * x 00149 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00150 template <unsigned int MAT_ALIGNMENT> 00151 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00152 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00153 const vector<SCALARTYPE, ALIGNMENT>, 00154 op_prod> & proxy) 00155 { 00156 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00157 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00158 *this += result; 00159 return *this; 00160 } 00161 00166 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00167 template <unsigned int MAT_ALIGNMENT> 00168 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00169 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00170 const vector<SCALARTYPE, ALIGNMENT>, 00171 op_prod> & proxy) 00172 { 00173 vector<SCALARTYPE, ALIGNMENT> result(proxy.get_lhs().size1()); 00174 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00175 *this -= result; 00176 return *this; 00177 } 00178 00179 00180 //free functions: 00185 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00186 template <unsigned int MAT_ALIGNMENT> 00187 viennacl::vector<SCALARTYPE, ALIGNMENT> 00188 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00189 const vector<SCALARTYPE, ALIGNMENT>, 00190 op_prod> & proxy) 00191 { 00192 assert(proxy.get_lhs().size1() == size()); 00193 vector<SCALARTYPE, ALIGNMENT> result(size()); 00194 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00195 result += *this; 00196 return result; 00197 } 00198 00203 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00204 template <unsigned int MAT_ALIGNMENT> 00205 viennacl::vector<SCALARTYPE, ALIGNMENT> 00206 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00207 const vector<SCALARTYPE, ALIGNMENT>, 00208 op_prod> & proxy) 00209 { 00210 assert(proxy.get_lhs().size1() == size()); 00211 vector<SCALARTYPE, ALIGNMENT> result(size()); 00212 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00213 result = *this - result; 00214 return result; 00215 } 00216 00217 } //namespace viennacl 00218 00219 00220 #endif
1.7.6.1