|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_BICGSTAB_HPP_ 00016 #define _VIENNACL_BICGSTAB_HPP_ 00017 00022 #include <vector> 00023 #include <cmath> 00024 #include "viennacl/forwards.h" 00025 #include "viennacl/tools/tools.hpp" 00026 #include "viennacl/linalg/prod.hpp" 00027 #include "viennacl/linalg/inner_prod.hpp" 00028 00029 namespace viennacl 00030 { 00031 namespace linalg 00032 { 00033 00036 class bicgstab_tag 00037 { 00038 public: 00044 bicgstab_tag(double tol = 1e-8, unsigned int max_iterations = 300) : _tol(tol), _iterations(max_iterations) {}; 00045 00047 double tolerance() const { return _tol; } 00049 unsigned int max_iterations() const { return _iterations; } 00050 00052 unsigned int iters() const { return iters_taken_; } 00053 void iters(unsigned int i) const { iters_taken_ = i; } 00054 00056 double error() const { return last_error_; } 00058 void error(double e) const { last_error_ = e; } 00059 00060 private: 00061 double _tol; 00062 unsigned int _iterations; 00063 00064 //return values from solver 00065 mutable unsigned int iters_taken_; 00066 mutable double last_error_; 00067 }; 00068 00069 00079 template <typename MatrixType, typename VectorType> 00080 VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag) 00081 { 00082 typedef typename viennacl::tools::result_of::value_type<VectorType>::type ScalarType; 00083 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER<ScalarType>::ResultType CPU_ScalarType; 00084 unsigned int problem_size = viennacl::tools::traits::size(rhs); 00085 VectorType result(problem_size); 00086 viennacl::tools::traits::clear(result); 00087 00088 VectorType residual = rhs; 00089 VectorType p = rhs; 00090 VectorType r0star = rhs; 00091 VectorType tmp0(problem_size); 00092 VectorType tmp1(problem_size); 00093 VectorType s(problem_size); 00094 00095 CPU_ScalarType ip_rr0star = viennacl::linalg::inner_prod(rhs,r0star); 00096 CPU_ScalarType norm_rhs_host = ip_rr0star; 00097 CPU_ScalarType beta; 00098 CPU_ScalarType alpha; 00099 CPU_ScalarType omega; 00100 ScalarType inner_prod_temp; //temporary variable for inner product computation 00101 ScalarType new_ip_rr0star = 0; 00102 00103 for (unsigned int i = 0; i < tag.max_iterations(); ++i) 00104 { 00105 tag.iters(i+1); 00106 tmp0 = viennacl::linalg::prod(matrix, p); 00107 //alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star); 00108 inner_prod_temp = viennacl::linalg::inner_prod(tmp0, r0star); 00109 alpha = ip_rr0star / static_cast<CPU_ScalarType>(inner_prod_temp); 00110 00111 //s = residual - alpha*tmp0; 00112 s = residual; 00113 s -= alpha*tmp0; 00114 00115 //std::cout << "alpha: " << alpha << std::endl; 00116 /*std::cout << "s[0]: " << s[0] << std::endl; 00117 std::cout << "s[end]: " << s[s.size()-1] << std::endl; 00118 for (long k=0; k<10; ++k) 00119 std::cout << s[k] << std::endl; 00120 exit(0);*/ 00121 00122 00123 tmp1 = viennacl::linalg::prod(matrix, s); 00124 //omega = viennacl::linalg::inner_prod(tmp1, s) / viennacl::linalg::inner_prod(tmp1, tmp1); 00125 inner_prod_temp = viennacl::linalg::inner_prod(tmp1, s); 00126 omega = inner_prod_temp; 00127 inner_prod_temp = viennacl::linalg::inner_prod(tmp1, tmp1); 00128 omega /= inner_prod_temp; 00129 00130 //result += alpha * p + omega * s; 00131 result += alpha * p; 00132 result += omega * s; 00133 00134 //residual = s - omega * tmp1; 00135 residual = s; 00136 residual -= omega*tmp1; 00137 00138 new_ip_rr0star = viennacl::linalg::inner_prod(residual,r0star); 00139 if (std::fabs( viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host) < tag.tolerance() * tag.tolerance()) 00140 break; 00141 00142 //beta = new_ip_rr0star / ip_rr0star * alpha/omega; 00143 CPU_ScalarType cpu_temp = new_ip_rr0star; //read from device only once 00144 beta = cpu_temp / ip_rr0star * alpha/omega; 00145 ip_rr0star = cpu_temp; 00146 00147 //p = residual + beta * (p - omega*tmp0); 00148 p -= omega * tmp0; 00149 p *= beta; 00150 p += residual; 00151 } 00152 00153 //store last error estimate: 00154 tag.error(std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host))); 00155 00156 return result; 00157 } 00158 00159 template <typename MatrixType, typename VectorType> 00160 VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag, viennacl::linalg::no_precond) 00161 { 00162 return solve(matrix, rhs, tag); 00163 } 00164 00175 template <typename MatrixType, typename VectorType, typename PreconditionerType> 00176 VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag, PreconditionerType const & precond) 00177 { 00178 typedef typename viennacl::tools::result_of::value_type<VectorType>::type ScalarType; 00179 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER<ScalarType>::ResultType CPU_ScalarType; 00180 unsigned int problem_size = viennacl::tools::traits::size(rhs); 00181 VectorType result(problem_size); 00182 result.clear(); 00183 00184 VectorType residual = rhs; 00185 precond.apply(residual); 00186 VectorType r0star = residual; //can be chosen arbitrarily in fact 00187 VectorType tmp0(problem_size); 00188 VectorType tmp1(problem_size); 00189 VectorType s(problem_size); 00190 00191 VectorType p = residual; 00192 00193 CPU_ScalarType ip_rr0star = viennacl::linalg::inner_prod(residual,r0star); 00194 CPU_ScalarType norm_rhs_host = ip_rr0star; 00195 CPU_ScalarType beta; 00196 CPU_ScalarType alpha; 00197 CPU_ScalarType omega; 00198 ScalarType new_ip_rr0star = 0; 00199 ScalarType inner_prod_temp; //temporary variable for inner product 00200 00201 for (unsigned int i = 0; i < tag.max_iterations(); ++i) 00202 { 00203 tag.iters(i+1); 00204 tmp0 = viennacl::linalg::prod(matrix, p); 00205 precond.apply(tmp0); 00206 //alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star); 00207 inner_prod_temp = viennacl::linalg::inner_prod(tmp0, r0star); 00208 alpha = ip_rr0star / static_cast<CPU_ScalarType>(inner_prod_temp); 00209 00210 //s = residual - alpha*tmp0; 00211 s = residual; 00212 s -= alpha*tmp0; 00213 00214 tmp1 = viennacl::linalg::prod(matrix, s); 00215 precond.apply(tmp1); 00216 //omega = viennacl::linalg::inner_prod(tmp1, s) / viennacl::linalg::inner_prod(tmp1, tmp1); 00217 inner_prod_temp = viennacl::linalg::inner_prod(tmp1, s); 00218 omega = inner_prod_temp; 00219 inner_prod_temp = viennacl::linalg::inner_prod(tmp1, tmp1); 00220 omega /= inner_prod_temp; 00221 00222 //result += alpha * p + omega * s; 00223 result += alpha * p; 00224 result += omega * s; 00225 //residual = s - omega * tmp1; 00226 residual = s; 00227 residual -= omega*tmp1; 00228 00229 new_ip_rr0star = viennacl::linalg::inner_prod(residual,r0star); 00230 if (std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host) < tag.tolerance() * tag.tolerance() ) 00231 break; 00232 00233 //beta = new_ip_rr0star / ip_rr0star * alpha/omega; 00234 CPU_ScalarType cpu_temp = new_ip_rr0star; //read from device only once 00235 beta = cpu_temp / ip_rr0star * alpha/omega; 00236 ip_rr0star = cpu_temp; 00237 00238 //p = residual + beta * (p - omega*tmp0); 00239 p -= omega * tmp0; 00240 //p = residual + beta * p; 00241 p *= beta; 00242 p += residual; 00243 } 00244 00245 //store last error estimate: 00246 tag.error(std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host))); 00247 00248 return result; 00249 } 00250 00251 } 00252 } 00253 00254 #endif
1.7.6.1