|
ViennaCL - The Vienna Computing Library
1.1.2
|
00001 /* ======================================================================= 00002 Copyright (c) 2010, Institute for Microelectronics, TU Vienna. 00003 http://www.iue.tuwien.ac.at 00004 ----------------- 00005 ViennaCL - The Vienna Computing Library 00006 ----------------- 00007 00008 authors: Karl Rupp rupp@iue.tuwien.ac.at 00009 Florian Rudolf flo.rudy+viennacl@gmail.com 00010 Josef Weinbub weinbub@iue.tuwien.ac.at 00011 00012 license: MIT (X11), see file LICENSE in the ViennaCL base directory 00013 ======================================================================= */ 00014 00015 00016 #ifndef VIENNACL_IO_KERNEL_PARAMETERS_HPP 00017 #define VIENNACL_IO_KERNEL_PARAMETERS_HPP 00018 00023 #include "viennacl/ocl/backend.hpp" 00024 #include "pugixml/src/pugixml.hpp" 00025 00026 namespace viennacl { 00027 namespace io { 00028 00029 namespace tag { 00030 std::string root = "parameters"; 00031 std::string devices = "devices"; 00032 std::string device = "device"; 00033 std::string name = "name"; 00034 std::string driver = "driver"; 00035 std::string compun = "computeunits"; 00036 std::string workgrp = "workgroupsize"; 00037 std::string tests = "tests"; 00038 std::string test = "test"; 00039 std::string numeric = "numeric"; 00040 std::string kernels = "kernels"; 00041 std::string kernel = "kernel"; 00042 std::string params = "params"; 00043 std::string param = "param"; 00044 std::string value = "value"; 00045 std::string alignment = "alignment"; 00046 } // end namespace tag 00047 00048 namespace val { 00049 std::string globsize = "globalsize"; 00050 std::string locsize = "localsize"; 00051 std::string vec = "vector"; 00052 std::string matrix = "matrix"; 00053 std::string compmat = "compressed_matrix"; 00054 std::string fl = "float"; 00055 std::string dbl = "double"; 00056 } 00057 00059 struct parameter_database 00060 { 00061 parameter_database () 00062 { 00063 root = doc.append_child(); 00064 root.set_name(tag::root.c_str()); 00065 last = root; 00066 00067 devices_open = false; 00068 tests_open = false; 00069 kernels_open = false; 00070 parameters_open = false; 00071 } 00072 00073 void add_device() 00074 { 00075 pugi::xml_node dev; 00076 if(devices_open) 00077 { 00078 dev = devices.append_child(); 00079 dev.set_name(tag::device.c_str()); 00080 } 00081 else 00082 { 00083 devices = last.append_child(); 00084 devices.set_name(tag::devices.c_str()); 00085 00086 dev = devices.append_child(); 00087 dev.set_name(tag::device.c_str()); 00088 00089 devices_open = true; 00090 } 00091 last = dev; 00092 } 00093 00094 void add_test() 00095 { 00096 pugi::xml_node test; 00097 if(tests_open) 00098 { 00099 test = tests.append_child(); 00100 test.set_name(tag::test.c_str()); 00101 } 00102 else 00103 { 00104 tests = last.append_child(); 00105 tests.set_name(tag::tests.c_str()); 00106 00107 test = tests.append_child(); 00108 test.set_name(tag::test.c_str()); 00109 00110 tests_open = true; 00111 } 00112 last = test; 00113 // close the current kernels section 00114 // so a new one is created for this new test 00115 kernels_open = false; 00116 } 00117 00118 void add_kernel() 00119 { 00120 pugi::xml_node kern; 00121 if(kernels_open) 00122 { 00123 kern = kernels.append_child(); 00124 kern.set_name(tag::kernel.c_str()); 00125 } 00126 else 00127 { 00128 kernels = last.append_child(); 00129 kernels.set_name(tag::kernels.c_str()); 00130 00131 kern = kernels.append_child(); 00132 kern.set_name(tag::kernel.c_str()); 00133 00134 kernels_open = true; 00135 } 00136 last = kern; 00137 00138 // close the current parameters section 00139 // so a new one is created for this new kernel 00140 parameters_open = false; 00141 } 00142 00143 void add_parameter() 00144 { 00145 pugi::xml_node para; 00146 00147 if(parameters_open) 00148 { 00149 para = parameters.append_child(); 00150 para.set_name(tag::param.c_str()); 00151 } 00152 else 00153 { 00154 parameters = last.append_child(); 00155 parameters.set_name(tag::params.c_str()); 00156 00157 para = parameters.append_child(); 00158 para.set_name(tag::param.c_str()); 00159 00160 parameters_open = true; 00161 } 00162 last = para; 00163 } 00164 00165 template<typename ValueT> 00166 void add_data_node(std::string tagstr, ValueT data) 00167 { 00168 std::stringstream ss; 00169 ss << data; 00170 add_data_node(tagstr, ss.str()); 00171 } 00172 00173 void add_data_node(std::string tagstr, std::string data) 00174 { 00175 pugi::xml_node node = last.append_child(); 00176 00177 if(tagstr == tag::name) 00178 node.set_name(tag::name.c_str()); 00179 else if(tagstr == tag::driver) 00180 node.set_name(tag::driver.c_str()); 00181 else if(tagstr == tag::numeric) 00182 node.set_name(tag::numeric.c_str()); 00183 else if(tagstr == tag::alignment) 00184 node.set_name(tag::alignment.c_str()); 00185 else if(tagstr == tag::value) 00186 node.set_name(tag::value.c_str()); 00187 else if(tagstr == tag::compun) 00188 node.set_name(tag::compun.c_str()); 00189 else if(tagstr == tag::workgrp) 00190 node.set_name(tag::workgrp.c_str()); 00191 else 00192 std::cout << "# Error adding data node: node tag not recognized .." << std::endl; 00193 node.append_child(pugi::node_pcdata).set_value(data.c_str()); 00194 } 00195 00196 void load(std::string filename) 00197 { 00198 doc.load_file(filename.c_str()); 00199 } 00200 00201 void dump(std::string filename) 00202 { 00203 std::ofstream outstream(filename.c_str()); 00204 this->dump(outstream); 00205 outstream.close(); 00206 } 00207 00208 void dump(std::ostream& stream = std::cout) 00209 { 00210 doc.save(stream, " "); 00211 } 00212 00213 pugi::xml_document doc; 00214 pugi::xml_node root; 00215 pugi::xml_node devices; 00216 pugi::xml_node tests; 00217 pugi::xml_node kernels; 00218 pugi::xml_node parameters; 00219 pugi::xml_node last; 00220 00221 bool devices_open; 00222 bool tests_open; 00223 bool kernels_open; 00224 bool parameters_open; 00225 00226 }; 00227 00229 template <typename T> 00230 struct first_letter_of_type 00231 { 00232 static char get(); //intentionally not implemented, class must be specialized 00233 }; 00234 00235 template <> 00236 struct first_letter_of_type <float> 00237 { 00238 static char get() { return 'f'; } 00239 }; 00240 00241 template <> 00242 struct first_letter_of_type <double> 00243 { 00244 static char get() { return 'd'; } 00245 }; 00246 00247 template <typename T> 00248 struct program_for_vcltype 00249 { 00250 static std::string get(); //intentionally not implemented, class must be specialized 00251 }; 00252 00253 template <typename T, unsigned int ALIGNMENT> 00254 struct program_for_vcltype < viennacl::vector<T, ALIGNMENT> > 00255 { 00256 static std::string get() 00257 { 00258 std::stringstream ss; 00259 ss << first_letter_of_type<T>::get() << "_vector_" << ALIGNMENT; 00260 return ss.str(); 00261 } 00262 }; 00263 00264 template <typename T, unsigned int ALIGNMENT> 00265 struct program_for_vcltype < viennacl::matrix<T, row_major, ALIGNMENT> > 00266 { 00267 static std::string get() 00268 { 00269 std::stringstream ss; 00270 ss << first_letter_of_type<T>::get() << "_matrix_row_" << ALIGNMENT; 00271 return ss.str(); 00272 } 00273 }; 00274 00275 template <typename T, unsigned int ALIGNMENT> 00276 struct program_for_vcltype < viennacl::matrix<T, column_major, ALIGNMENT> > 00277 { 00278 static std::string get() 00279 { 00280 std::stringstream ss; 00281 ss << first_letter_of_type<T>::get() << "_matrix_col_" << ALIGNMENT; 00282 return ss.str(); 00283 } 00284 }; 00285 00286 template <typename T, unsigned int ALIGNMENT> 00287 struct program_for_vcltype < viennacl::compressed_matrix<T, ALIGNMENT> > 00288 { 00289 static std::string get() 00290 { 00291 std::stringstream ss; 00292 ss << first_letter_of_type<T>::get() << "_compressed_matrix_" << ALIGNMENT; 00293 return ss.str(); 00294 } 00295 }; 00296 00297 template<typename SCALARTYPE, unsigned int ALIGNMENT> 00298 void set_kernel_params(std::string program_name, 00299 std::string kernel_name, 00300 unsigned int glob, //total no. of threads 00301 unsigned int loc) //threads per work group 00302 { 00303 //get kernel from pool and set work sizes: 00304 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(program_name, kernel_name); 00305 k.global_work_size(0, glob); 00306 k.local_work_size(0, loc); 00307 00308 //std::cout << "Setting [" << glob << ", " << loc << "] for kernel " << kernel_name << std::endl; 00309 } 00310 00311 template<typename VclBasicType> 00312 void tune_impl(parameter_database& paras, std::string parent) 00313 { 00314 typedef typename VclBasicType::value_type::value_type SCALARTYPE; 00315 00316 // create dummy vectors; the kernels have to be created .. 00317 VclBasicType dummy; 00318 00319 // extract the kernels for which parameters are present 00320 std::string kernel_str = parent+"/kernels/kernel/name/text()"; 00321 pugi::xpath_node_set kernel_res = paras.doc.select_nodes(kernel_str.c_str()); 00322 00323 typedef std::vector<std::string> kernels_type; 00324 kernels_type kernels; 00325 std::cout << "Retrieving kernels..." << std::endl; 00326 for (pugi::xpath_node_set::const_iterator it = kernel_res.begin(); it != kernel_res.end(); ++it) 00327 { 00328 std::stringstream ss; 00329 it->node().print(ss, " "); 00330 std::string kern(ss.str()); 00331 kern.erase(std::remove(kern.begin(), kern.end(), '\n'), kern.end()); //trim trailing linebreak 00332 kernels.push_back(kern); 00333 } 00334 00335 // retrieve the actual parameters 00336 std::cout << "Retrieving actual parameters..." << std::endl; 00337 for(typename kernels_type::iterator iter = kernels.begin(); 00338 iter != kernels.end(); iter++) 00339 { 00340 // retrieving the work group .. 00341 std::string wg_str = parent+"/kernels/kernel[name='"+*iter+"']/params/param[name='"+val::globsize+"']/value/text()"; 00342 pugi::xpath_node_set wg_res = paras.doc.select_nodes(wg_str.c_str()); 00343 00344 unsigned int global_size(0); 00345 00346 std::stringstream ss; 00347 ss << wg_res[0].node().value(); 00348 ss >> global_size; 00349 00350 // retrieving the local_workers .. 00351 std::string lw_str = parent+"/kernels/kernel[name='"+*iter+"']/params/param[name='"+val::locsize+"']/value/text()"; 00352 pugi::xpath_node_set lw_res = paras.doc.select_nodes(lw_str.c_str()); 00353 00354 unsigned int local_workers(0); 00355 00356 ss.clear(); 00357 ss << lw_res[0].node().value(); 00358 ss >> local_workers; 00359 00360 //std::cout << "kernel: " << *iter << " wg: " << work_group << " lw: " << local_workers << std::endl; 00361 00362 // set the parameters 00363 set_kernel_params<SCALARTYPE,1> (program_for_vcltype<VclBasicType>::get(), *iter, global_size, local_workers); 00364 //set_kernel_params<SCALARTYPE,4> (*iter, work_group * local_workers, local_workers); 00365 //set_kernel_params<SCALARTYPE,16>(*iter, work_group * local_workers, local_workers); 00366 } 00367 } 00368 00370 template <typename T> 00371 struct to_string {}; 00372 00373 template <> 00374 struct to_string<float> 00375 { 00376 static std::string get() { return "float"; } 00377 }; 00378 00379 template <> 00380 struct to_string<double> 00381 { 00382 static std::string get() { return "double"; } 00383 }; 00384 00390 template<typename VclBasicType> 00391 void read_kernel_parameters(std::string filename) 00392 { 00393 typedef typename VclBasicType::value_type::value_type SCALARTYPE; 00394 00395 parameter_database paras; 00396 paras.load(filename); 00397 00398 std::string devname = viennacl::ocl::current_device().name(); 00399 00400 // check if tune parameters for the current device are present 00401 std::string device_str = "/parameters/devices/device[name='"+devname+"']"; 00402 pugi::xpath_node_set device_res = paras.doc.select_nodes(device_str.c_str()); 00403 00404 if(device_res.size() == 0) 00405 { 00406 std::cout << "Tuner: There are no parameters for this device present!" << std::endl; 00407 // evaluate the parameters for this device? 00408 } 00409 00410 // check if tune parameters for float exist 00411 std::string numeric_str = device_str+"/tests/test[numeric='"+to_string<SCALARTYPE>::get()+"']"; 00412 pugi::xpath_node_set numeric_res = paras.doc.select_nodes(numeric_str.c_str()); 00413 00414 if(numeric_res.size() > 0) 00415 { 00416 tune_impl<VclBasicType>(paras, numeric_str); 00417 } 00418 else 00419 { 00420 std::cout << "Tuner: There are no parameters for numeric type float present!" << std::endl; 00421 } 00422 00423 // // check if tune parameters for double exist 00424 // std::string double_str = device_str+"/tests/test[numeric='"+val::dbl+"']"; 00425 // pugi::xpath_node_set double_res = paras.doc.select_nodes(double_str.c_str()); 00426 // 00427 // if(double_res.size() > 0) 00428 // { 00429 // tune_impl<double>(paras, double_str); 00430 // } 00431 // else 00432 // { 00433 // std::cout << "Tuner: There are no parameters for numeric type double present!" << std::endl; 00434 // } 00435 00436 } 00437 00438 } // end namespace io 00439 00440 } // end namespace viennacl 00441 00442 #endif
1.7.6.1