|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_COMPRESSED_MATRIX_OPERATIONS_HPP_ 00016 #define _VIENNACL_COMPRESSED_MATRIX_OPERATIONS_HPP_ 00017 00022 #include "viennacl/forwards.h" 00023 #include "viennacl/ocl/device.hpp" 00024 #include "viennacl/ocl/handle.hpp" 00025 #include "viennacl/ocl/kernel.hpp" 00026 #include "viennacl/scalar.hpp" 00027 #include "viennacl/vector.hpp" 00028 #include "viennacl/tools/tools.hpp" 00029 #include "viennacl/linalg/kernels/compressed_matrix_kernels.h" 00030 00031 namespace viennacl 00032 { 00033 namespace linalg 00034 { 00035 // A * x 00043 template<class SCALARTYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00044 vector_expression<const compressed_matrix<SCALARTYPE, ALIGNMENT>, 00045 const vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00046 op_prod > prod_impl(const compressed_matrix<SCALARTYPE, ALIGNMENT> & mat, 00047 const vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec) 00048 { 00049 return vector_expression<const compressed_matrix<SCALARTYPE, ALIGNMENT>, 00050 const vector<SCALARTYPE, VECTOR_ALIGNMENT>, 00051 op_prod >(mat, vec); 00052 } 00053 00063 template<class TYPE, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT> 00064 void prod_impl(const viennacl::compressed_matrix<TYPE, ALIGNMENT> & mat, 00065 const viennacl::vector<TYPE, VECTOR_ALIGNMENT> & vec, 00066 viennacl::vector<TYPE, VECTOR_ALIGNMENT> & result, 00067 size_t NUM_THREADS = 0) 00068 { 00069 assert(mat.size1() == result.size()); 00070 assert(mat.size2() == vec.size()); 00071 00072 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<TYPE, ALIGNMENT>::program_name(), "vec_mul"); 00073 00074 viennacl::ocl::enqueue(k(mat.handle1(), mat.handle2(), mat.handle(), vec, result, mat.size1())); 00075 } 00076 00082 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT, unsigned int VEC_ALIGNMENT> 00083 void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & L, vector<SCALARTYPE, VEC_ALIGNMENT> & vec, viennacl::linalg::unit_lower_tag) 00084 { 00085 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>::program_name(), "lu_forward"); 00086 unsigned int threads = k.local_work_size(); 00087 00088 k.global_work_size(k.local_work_size()); 00089 viennacl::ocl::enqueue(k(L.handle1(), L.handle2(), L, 00090 viennacl::ocl::local_mem(sizeof(int) * (threads+1)), 00091 viennacl::ocl::local_mem(sizeof(SCALARTYPE) * threads), 00092 vec, L.size1())); 00093 } 00094 00101 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT, unsigned int VEC_ALIGNMENT, typename TAG> 00102 vector<SCALARTYPE, VEC_ALIGNMENT> solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & L, 00103 const vector<SCALARTYPE, VEC_ALIGNMENT> & vec, 00104 const viennacl::linalg::unit_lower_tag & tag) 00105 { 00106 // do an inplace solve on the result vector: 00107 vector<SCALARTYPE, VEC_ALIGNMENT> result(vec.size()); 00108 result = vec; 00109 00110 inplace_solve(L, result, tag); 00111 00112 return result; 00113 } 00114 00115 00121 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT, unsigned int VEC_ALIGNMENT> 00122 void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & U, vector<SCALARTYPE, VEC_ALIGNMENT> & vec, viennacl::linalg::upper_tag) 00123 { 00124 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>::program_name(), "lu_backward"); 00125 unsigned int threads = k.local_work_size(); 00126 00127 k.global_work_size(k.local_work_size()); 00128 viennacl::ocl::enqueue(k(U.handle1(), U.handle2(), U.handle(), 00129 viennacl::ocl::local_mem(sizeof(int) * (threads+2)), 00130 viennacl::ocl::local_mem(sizeof(SCALARTYPE) * (threads+2)), 00131 vec, U.size1())); 00132 } 00133 00140 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT, unsigned int VEC_ALIGNMENT, typename TAG> 00141 vector<SCALARTYPE, VEC_ALIGNMENT> solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & L, 00142 const vector<SCALARTYPE, VEC_ALIGNMENT> & vec, 00143 viennacl::linalg::upper_tag const & tag) 00144 { 00145 // do an inplace solve on the result vector: 00146 vector<SCALARTYPE, VEC_ALIGNMENT> result(vec.size()); 00147 result = vec; 00148 00149 inplace_solve(L, result, tag); 00150 00151 return result; 00152 } 00153 00154 00155 } //namespace linalg 00156 00157 00158 00159 //v = A * x 00164 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00165 template <unsigned int MAT_ALIGNMENT> 00166 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00167 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00168 const viennacl::vector<SCALARTYPE, ALIGNMENT>, 00169 viennacl::op_prod> & proxy) 00170 { 00171 // check for the special case x = A * x 00172 if (proxy.rhs().handle() == this->handle()) 00173 { 00174 viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size()); 00175 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00176 *this = result; 00177 return *this; 00178 } 00179 else 00180 { 00181 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); 00182 return *this; 00183 } 00184 return *this; 00185 } 00186 00187 //v += A * x 00192 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00193 template <unsigned int MAT_ALIGNMENT> 00194 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00195 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00196 const vector<SCALARTYPE, ALIGNMENT>, 00197 op_prod> & proxy) 00198 { 00199 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00200 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00201 *this += result; 00202 return *this; 00203 } 00204 00209 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00210 template <unsigned int MAT_ALIGNMENT> 00211 viennacl::vector<SCALARTYPE, ALIGNMENT> & 00212 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00213 const vector<SCALARTYPE, ALIGNMENT>, 00214 op_prod> & proxy) 00215 { 00216 vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1()); 00217 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00218 *this -= result; 00219 return *this; 00220 } 00221 00222 00223 //free functions: 00228 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00229 template <unsigned int MAT_ALIGNMENT> 00230 viennacl::vector<SCALARTYPE, ALIGNMENT> 00231 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00232 const vector<SCALARTYPE, ALIGNMENT>, 00233 op_prod> & proxy) 00234 { 00235 assert(proxy.lhs().size1() == size()); 00236 vector<SCALARTYPE, ALIGNMENT> result(size()); 00237 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00238 result += *this; 00239 return result; 00240 } 00241 00246 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00247 template <unsigned int MAT_ALIGNMENT> 00248 viennacl::vector<SCALARTYPE, ALIGNMENT> 00249 viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00250 const vector<SCALARTYPE, ALIGNMENT>, 00251 op_prod> & proxy) 00252 { 00253 assert(proxy.lhs().size1() == size()); 00254 vector<SCALARTYPE, ALIGNMENT> result(size()); 00255 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 00256 result = *this - result; 00257 return result; 00258 } 00259 00260 } //namespace viennacl 00261 00262 00263 #endif
1.7.6.1