|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 #ifndef _VIENNACL_GMRES_HPP_ 00016 #define _VIENNACL_GMRES_HPP_ 00017 00022 #include <vector> 00023 #include <cmath> 00024 #include "viennacl/forwards.h" 00025 #include "viennacl/tools/tools.hpp" 00026 #include "viennacl/linalg/norm_2.hpp" 00027 #include "viennacl/linalg/prod.hpp" 00028 #include "viennacl/linalg/inner_prod.hpp" 00029 00030 namespace viennacl 00031 { 00032 namespace linalg 00033 { 00034 00037 class gmres_tag //generalized minimum residual 00038 { 00039 public: 00046 gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20) 00047 : _tol(tol), _iterations(max_iterations), _krylov_dim(krylov_dim), iters_taken_(0) {}; 00048 00050 double tolerance() const { return _tol; } 00052 unsigned int max_iterations() const { return _iterations; } 00054 unsigned int krylov_dim() const { return _krylov_dim; } 00056 unsigned int max_restarts() const 00057 { 00058 unsigned int ret = _iterations / _krylov_dim; 00059 if (ret > 0 && (ret * _krylov_dim == _iterations) ) 00060 return ret - 1; 00061 return ret; 00062 } 00063 00065 unsigned int iters() const { return iters_taken_; } 00067 void iters(unsigned int i) const { iters_taken_ = i; } 00068 00070 double error() const { return last_error_; } 00072 void error(double e) const { last_error_ = e; } 00073 00074 private: 00075 double _tol; 00076 unsigned int _iterations; 00077 unsigned int _krylov_dim; 00078 00079 //return values from solver 00080 mutable unsigned int iters_taken_; 00081 mutable double last_error_; 00082 }; 00083 00084 namespace 00085 { 00086 00087 template <typename SRC_VECTOR, typename DEST_VECTOR> 00088 void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, unsigned int len) 00089 { 00090 for (unsigned int i=0; i<len; ++i) 00091 dest[i] = src[i]; 00092 } 00093 00094 template <typename ScalarType, typename DEST_VECTOR> 00095 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, unsigned int len) 00096 { 00097 viennacl::copy(src.begin(), src.begin() + len, dest.begin()); 00098 } 00099 00100 template <typename ScalarType> 00101 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, viennacl::vector<ScalarType> & dest, unsigned int len) 00102 { 00103 viennacl::copy(src.begin(), src.begin() + len, dest.begin()); 00104 } 00105 00106 } 00107 00118 template <typename MatrixType, typename VectorType, typename PreconditionerType> 00119 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond) 00120 { 00121 typedef typename viennacl::tools::result_of::value_type<VectorType>::type ScalarType; 00122 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER<ScalarType>::ResultType CPU_ScalarType; 00123 unsigned int problem_size = viennacl::tools::traits::size(rhs); 00124 VectorType result(problem_size); 00125 viennacl::tools::traits::clear(result); 00126 unsigned int krylov_dim = tag.krylov_dim(); 00127 if (problem_size < tag.krylov_dim()) 00128 krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already) 00129 00130 VectorType res(problem_size); 00131 VectorType v_k_tilde(problem_size); 00132 VectorType v_k_tilde_temp(problem_size); 00133 00134 std::vector< std::vector<CPU_ScalarType> > R(krylov_dim); 00135 std::vector<CPU_ScalarType> projection_rhs(krylov_dim); 00136 std::vector<VectorType> U(krylov_dim); 00137 00138 const CPU_ScalarType gpu_scalar_minus_1 = static_cast<CPU_ScalarType>(-1); //representing the scalar '-1' on the GPU. Prevents blocking write operations 00139 const CPU_ScalarType gpu_scalar_1 = static_cast<CPU_ScalarType>(1); //representing the scalar '1' on the GPU. Prevents blocking write operations 00140 const CPU_ScalarType gpu_scalar_2 = static_cast<CPU_ScalarType>(2); //representing the scalar '2' on the GPU. Prevents blocking write operations 00141 00142 CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs); 00143 00144 unsigned int k; 00145 for (k = 0; k < krylov_dim; ++k) 00146 { 00147 R[k].resize(tag.krylov_dim()); 00148 viennacl::tools::traits::resize(U[k], problem_size); 00149 } 00150 00151 //std::cout << "Starting GMRES..." << std::endl; 00152 tag.iters(0); 00153 00154 for (unsigned int it = 0; it <= tag.max_restarts(); ++it) 00155 { 00156 res = rhs; 00157 res -= viennacl::linalg::prod(matrix, result); //initial guess zero 00158 precond.apply(res); 00159 00160 CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res); 00161 CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0); 00162 00163 if (rho_0 / norm_rhs < tag.tolerance()) 00164 { 00165 //std::cout << "Allowed Error reached at begin of loop" << std::endl; 00166 tag.error(rho_0 / norm_rhs); 00167 return result; 00168 } 00169 00170 res /= rho_0; 00171 for (k=0; k<krylov_dim; ++k) 00172 { 00173 viennacl::tools::traits::clear(R[k]); 00174 viennacl::tools::traits::clear(U[k]); 00175 R[k].resize(krylov_dim); 00176 viennacl::tools::traits::resize(U[k], problem_size); 00177 } 00178 00179 for (k = 0; k < krylov_dim; ++k) 00180 { 00181 tag.iters( tag.iters() + 1 ); //increase iteration counter 00182 00183 //compute v_k = A * v_{k-1} via Householder matrices 00184 if (k == 0) 00185 { 00186 v_k_tilde = viennacl::linalg::prod(matrix, res); 00187 precond.apply(v_k_tilde); 00188 } 00189 else 00190 { 00191 viennacl::tools::traits::clear(v_k_tilde); 00192 v_k_tilde[k-1] = gpu_scalar_1; 00193 //Householder rotations part 1 00194 for (int i = k-1; i > -1; --i) 00195 v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2); 00196 00197 v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde); 00198 precond.apply(v_k_tilde_temp); 00199 v_k_tilde = v_k_tilde_temp; 00200 00201 //Householder rotations part 2 00202 for (unsigned int i = 0; i < k; ++i) 00203 v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2); 00204 } 00205 00206 viennacl::tools::traits::clear(U[k]); 00207 viennacl::tools::traits::resize(U[k], problem_size); 00208 //copy first k entries from v_k_tilde to U[k]: 00209 gmres_copy_helper(v_k_tilde, U[k], k); 00210 00211 U[k][k] = std::sqrt( viennacl::linalg::inner_prod(v_k_tilde, v_k_tilde) - viennacl::linalg::inner_prod(U[k], U[k]) ); 00212 00213 //copy first k+1 entries from U[k] to R[k] 00214 gmres_copy_helper(U[k], R[k], k+1); 00215 00216 U[k] -= v_k_tilde; 00217 U[k] *= gpu_scalar_minus_1 / viennacl::linalg::norm_2( U[k] ); 00218 00219 res -= U[k] * (viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2); 00220 00221 projection_rhs[k] = res[k]; 00222 rho *= std::sin( std::acos(projection_rhs[k] / rho) ); 00223 00224 if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance()) 00225 { 00226 //std::cout << "Krylov space big enough" << endl; 00227 tag.error( std::fabs(rho*rho_0 / norm_rhs) ); 00228 break; 00229 } 00230 00231 } // for k 00232 00233 //inplace solution of the upper triangular matrix: 00234 for (int i=k-1; i>-1; --i) 00235 { 00236 for (unsigned int j=i+1; j<k; ++j) 00237 //temp_rhs[i] -= R[i][j] * temp_rhs[j]; //if R is not transposed 00238 projection_rhs[i] -= R[j][i] * projection_rhs[j]; //R is transposed 00239 00240 projection_rhs[i] /= R[i][i]; 00241 } 00242 00243 res *= projection_rhs[0]; 00244 00245 if (k > 0) 00246 { 00247 for (unsigned int i = 0; i < k-1; ++i) 00248 { 00249 res[i] += projection_rhs[i+1]; 00250 } 00251 } 00252 00253 for (int i = k-1; i > -1; --i) 00254 res -= U[i] * (viennacl::linalg::inner_prod(U[i], res) * gpu_scalar_2); 00255 00256 res *= rho_0; 00257 result += res; 00258 00259 if ( std::fabs(rho*rho_0 / norm_rhs) < tag.tolerance() ) 00260 { 00261 //std::cout << "Allowed Error reached at end of loop" << std::endl; 00262 tag.error(std::fabs(rho*rho_0 / norm_rhs)); 00263 return result; 00264 } 00265 00266 res = rhs; 00267 res -= viennacl::linalg::prod(matrix, result); 00268 //std::cout << "norm_2(r)=" << norm_2(r) << std::endl; 00269 //std::cout << "std::abs(rho*rho_0)=" << std::abs(rho*rho_0) << std::endl; 00270 //std::cout << r << std::endl; 00271 00272 tag.error(std::fabs(rho*rho_0)); 00273 } 00274 00275 return result; 00276 } 00277 00280 template <typename MatrixType, typename VectorType> 00281 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag) 00282 { 00283 return solve(matrix, rhs, tag, no_precond()); 00284 } 00285 00286 00287 } 00288 } 00289 00290 #endif
1.7.6.1