rheolef  7.0
solver_mumps.cc
Go to the documentation of this file.
1 // in // some independant sparse linear systems (eg. sparse mass matrix local
2 // https://listes.ens-lyon.fr/sympa/arc/mumps-users
3 // From: Alfredo Buttari <alfredo.buttari@enseeiht.fr>
4 #include "rheolef/config.h"
5 #ifdef _RHEOLEF_HAVE_MUMPS
6 #include "solver_mumps.h"
7 
8 #define DEBUG_MUMPS_SCOTCH
9 #ifdef DEBUG_MUMPS_SCOTCH
10 #undef _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH // has failed on some linear systems: prefer seq-scotch when available
11 #endif // DEBUG_MUMPS
12 
13 namespace rheolef {
14 using namespace std;
15 
16 template<class T, class M>
17 static
18 typename csr<T,M>::size_type
19 nnz_dia_upper (const csr<T,M>& a)
20 {
21  typedef typename csr<T,M>::size_type size_type;
22  size_type dia_nnz = 0;
23  typename csr<T,M>::const_iterator dia_ia = a.begin();
24  for (size_type i = 0, n = a.nrow(); i < n; i++) {
25  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
26  size_type j = (*p).first;
27  if (j >= i) dia_nnz++;
28  }
29  }
30  return dia_nnz;
31 }
32 template<class T, class M>
33 static
34 typename csr<T,M>::size_type
35 nnz_ext_upper (const csr<T,M>& a)
36 {
37  return 0;
38 }
39 #ifdef _RHEOLEF_HAVE_MPI
40 template<class T>
41 static
43 nnz_ext_upper (const csr<T,distributed>& a)
44 {
46  size_type first_i = a.row_ownership().first_index();
47  size_type ext_nnz = 0;
48  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
49  for (size_type i = 0, n = a.nrow(); i < n; i++) {
50  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
51  size_type dis_i = first_i + i;
52  size_type dis_j = a.jext2dis_j ((*p).first);
53  if (dis_j >= dis_i) ext_nnz++;
54  }
55  }
56  return ext_nnz;
57 }
58 #endif // _RHEOLEF_HAVE_MPI
59 template<class T, class M>
60 static
61 void
62 csr2mumps_struct_seq (const csr<T,M>& a, vector<int>& row, vector<int>& col, bool use_symmetry)
63 {
64  typedef typename csr<T,M>::size_type size_type;
65  typename csr<T,M>::const_iterator dia_ia = a.begin();
66  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
67  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
68  size_type j = (*p).first;
69  if (!use_symmetry || j >= i) {
70  row[q] = i + 1;
71  col[q] = j + 1;
72  q++;
73  }
74  }
75  }
76 }
77 template<class T, class M>
78 static
79 void
80 csr2mumps_values_seq (const csr<T,M>& a, vector<T>& val, bool use_symmetry)
81 {
82  typedef typename csr<T,M>::size_type size_type;
83  typename csr<T,M>::const_iterator dia_ia = a.begin();
84  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
85  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
86  size_type j = (*p).first;
87  if (!use_symmetry || j >= i) {
88  val[q] = (*p).second;
89  q++;
90  }
91  }
92  }
93 }
94 #ifdef _RHEOLEF_HAVE_MPI
95 template<class T>
96 static
97 void
98 csr2mumps_struct_dis (const csr<T,sequential>& a, vector<int>& row, vector<int>& col, bool use_symmetry, bool)
99 {
100 }
101 template<class T>
102 static
103 void
104 csr2mumps_struct_dis (const csr<T,distributed>& a, vector<int>& row, vector<int>& col, bool use_symmetry, bool drop_ext_nnz)
105 {
106  typedef typename csr<T,distributed>::size_type size_type;
107  typename csr<T,distributed>::const_iterator dia_ia = a.begin();
108  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
109  size_type first_i = a.row_ownership().first_index();
110  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
111  for (typename csr<T,distributed>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
112  size_type dis_i = first_i + i;
113  size_type dis_j = first_i + (*p).first;
114  if (!use_symmetry || dis_j >= dis_i) {
115  row[q] = dis_i + 1;
116  col[q] = dis_j + 1;
117  q++;
118  }
119  }
120  if (drop_ext_nnz) continue;
121  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
122  size_type dis_i = first_i + i;
123  size_type dis_j = a.jext2dis_j ((*p).first);
124  if (!use_symmetry || dis_j >= dis_i) {
125  row[q] = dis_i + 1;
126  col[q] = dis_j + 1;
127  q++;
128  }
129  }
130  }
131 }
132 template<class T>
133 static
134 void
135 csr2mumps_values_dis (const csr<T,sequential>& a, vector<T>& val, bool use_symmetry, bool)
136 {
137 }
138 template<class T>
139 static
140 void
141 csr2mumps_values_dis (const csr<T,distributed>& a, vector<T>& val, bool use_symmetry, bool drop_ext_nnz)
142 {
143  typedef typename csr<T,distributed>::size_type size_type;
144  size_type first_i = a.row_ownership().first_index();
145  typename csr<T,distributed>::const_iterator dia_ia = a.begin();
146  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
147  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
148  for (typename csr<T,distributed>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
149  size_type dis_i = first_i + i;
150  size_type dis_j = first_i + (*p).first;
151  if (!use_symmetry || dis_j >= dis_i) {
152  val[q] = (*p).second;
153  q++;
154  }
155  }
156  if (drop_ext_nnz) continue;
157  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
158  size_type dis_i = first_i + i;
159  size_type dis_j = a.jext2dis_j ((*p).first);
160  if (!use_symmetry || dis_j >= dis_i) {
161  val[q] = (*p).second;
162  q++;
163  }
164  }
165  }
166 }
167 #endif // _RHEOLEF_HAVE_MPI
168 template<class T, class M>
169 solver_mumps_rep<T,M>::solver_mumps_rep (const csr<T,M>& a, const solver_option& opt)
170  : solver_abstract_rep<T,M>(opt),
171  _has_mumps_instance(false),
172  _drop_ext_nnz(false),
173  _mumps_par(),
174  _row(),
175  _col(),
176  _a00(0),
177  _det()
178 {
179  size_type nproc = communicator().size();
180  _drop_ext_nnz = is_distributed<M>::value && opt.force_seq;
181 #ifndef TO_CLEAN
182  if (_drop_ext_nnz) {
183  warning_macro ("in test: _drop_ext_nnz=true and nproc=" << nproc);
184  }
185 #endif // TO_CLEAN
186  if (a.dis_nrow() <= 1) {
187  if (a.nrow() == 1) {
188  _a00 = (*(a.begin()[0])).second;
189  }
190  _has_mumps_instance = false;
191  return;
192  }
193  _has_mumps_instance = true;
194  bool use_symmetry = a.is_symmetric();
195  bool use_definite_positive = a.is_definite_positive();
196  _mumps_par.par = 1; // use parallel
197 #ifdef _RHEOLEF_HAVE_MPI
198  _mumps_par.comm_fortran = MPI_Comm_c2f (a.row_ownership().comm());
199 #endif // _RHEOLEF_HAVE_MPI
200  if (use_symmetry && !use_definite_positive) {
201  // sym invertible but could be undef: could have either < 0 or > 0 eigenvalues
202  // => leads to similar cpu with s.d.p. but without the asumption of positive or negative :
203  _mumps_par.sym = 2;
204  } else if (use_symmetry && use_definite_positive) {
205 #if 0
206  _mumps_par.sym = 1;
207  _mumps_par.sym = 2;
208  _mumps_par.cntl[1-1] = 0.0;
209 #endif // 0
210  _mumps_par.sym = 2;
211  } else {
212  _mumps_par.sym = 0;
213  }
214  _mumps_par.job = -1;
215  dmumps_c(&_mumps_par);
216  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occurred");
217  if (opt.verbose_level != 0) {
218  _mumps_par.icntl[1-1] = 1; // error output
219  _mumps_par.icntl[2-1] = 1; // verbose output
220  _mumps_par.icntl[3-1] = 1; // global infos
221  _mumps_par.icntl[4-1] = opt.verbose_level; // verbosity level
222  // strcpy (_mumps_par.write_problem, "dump_mumps"); // dump sparse structure
223  } else {
224  _mumps_par.icntl[1-1] = -1;
225  _mumps_par.icntl[2-1] = -1;
226  _mumps_par.icntl[3-1] = -1;
227  _mumps_par.icntl[4-1] = 0;
228  }
229  if (opt.compute_determinant) {
230  _mumps_par.icntl[33-1] = 1;
231  }
232  _mumps_par.icntl[5-1] = 0; // matrix is in assembled format
233 #if defined(_RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH) || defined(_RHEOLEF_HAVE_MUMPS_WITH_SCOTCH)
234  _mumps_par.icntl[7-1] = 3; // sequential ordering: 3==scotch
235 #elif defined(_RHEOLEF_HAVE_MUMPS_WITH_PARMETIS) || defined(_RHEOLEF_HAVE_MUMPS_WITH_METIS)
236  _mumps_par.icntl[7-1] = 5; // sequential ordering: 5==metis
237 #else // _RHEOLEF_HAVE_MUMPS_WITH_PARMETIS
238  _mumps_par.icntl[7-1] = 7; // ordering: 7=auto
239 #endif // _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH
240  _mumps_par.icntl[14-1] = 40; // default 20 seems to be insufficient in some cases
241  _mumps_par.icntl[29-1] = 0; // 0: auto; 1: ptscotch; 2: parmetis
242  _mumps_par.icntl[22-1] = 0; // 0: in-core ; 1: out-of-core TODO
244  _mumps_par.icntl[18-1] = 3; // distributed
245 
246 #if defined(_RHEOLEF_HAVE_MUMPS_WITH_SCOTCH) || defined(_RHEOLEF_HAVE_MUMPS_WITH_METIS)
247  _mumps_par.icntl[28-1] = 1; // sequential ordering (prefered, more robust)
248 #elif defined(_RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH) || defined(_RHEOLEF_HAVE_MUMPS_WITH_PARMETIS)
249  _mumps_par.icntl[28-1] = 2; // parallel ordering (less robust, but faster)
250 #endif // _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH || _RHEOLEF_HAVE_MUMPS_WITH_PARMETIS
251 
252 #ifdef _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH
253  _mumps_par.icntl[29-1] = 1; // ptscotch=1
254 #elif _RHEOLEF_HAVE_MUMPS_WITH_PARMETIS
255  _mumps_par.icntl[29-1] = 2; // parmetis=2
256 #else
257  _mumps_par.icntl[29-1] = 0; // automatic choice
258 #endif // _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH
259  } else {
260  _mumps_par.icntl[18-1] = 0; // sequential
261  }
262  _mumps_par.n = a.dis_nrow();
263  size_type dia_nnz = ((use_symmetry) ? nnz_dia_upper(a) : a.nnz());
264  size_type ext_nnz = ((use_symmetry) ? nnz_ext_upper(a) : a.ext_nnz());
265  size_type dis_nnz = dia_nnz + (_drop_ext_nnz ? 0 : ext_nnz);
267 #ifdef _RHEOLEF_HAVE_MPI
268  _row.resize (dis_nnz);
269  _col.resize (dis_nnz);
270  csr2mumps_struct_dis (a, _row, _col, use_symmetry, _drop_ext_nnz);
271  _mumps_par.nz_loc = dis_nnz;
272  _mumps_par.irn_loc = _row.begin().operator->();
273  _mumps_par.jcn_loc = _col.begin().operator->();
274 #endif // _RHEOLEF_HAVE_MPI
275  } else { // sequential
276  _row.resize (dia_nnz);
277  _col.resize (dia_nnz);
278  csr2mumps_struct_seq (a, _row, _col, use_symmetry);
279  _mumps_par.nz = dia_nnz;
280  _mumps_par.irn = _row.begin().operator->();
281  _mumps_par.jcn = _col.begin().operator->();
282  }
283  _mumps_par.job = 1;
284  dmumps_c(&_mumps_par);
285  trace_macro ("symbolic: ordering effectively used = " << _mumps_par.infog[7-1]); // 3=scoth, etc
286  check_macro (_mumps_par.infog[1-1] == 0, "mumps: error infog(1)="<<_mumps_par.infog[1-1]<<" has occurred (see mumps/infog(1) documentation; infog(2)="<<_mumps_par.infog[2-1]<<")");
287  update_values (a);
288 }
289 template<class T, class M>
290 void
291 solver_mumps_rep<T,M>::update_values (const csr<T,M>& a)
292 {
293  if (a.dis_nrow() <= 1) {
294  if (a.nrow() == 1) {
295  _a00 = (*(a.begin()[0])).second;
296  }
297  return;
298  }
299  bool use_symmetry = a.is_symmetric();
300 #ifdef TO_CLEAN
301  size_type dia_nnz = a.nnz();
302  size_type dis_nnz = dia_nnz + a.ext_nnz();
303 #endif // TO_CLEAN
304  size_type dia_nnz = ((use_symmetry) ? nnz_dia_upper(a) : a.nnz());
305  size_type ext_nnz = ((use_symmetry) ? nnz_ext_upper(a) : a.ext_nnz());
306  size_type dis_nnz = dia_nnz + (_drop_ext_nnz ? 0 : ext_nnz);
307  vector<T> val;
309 #ifdef _RHEOLEF_HAVE_MPI
310  val.resize (dis_nnz);
311  csr2mumps_values_dis (a, val, use_symmetry, _drop_ext_nnz);
312  _mumps_par.a_loc = val.begin().operator->();
313 #endif // _RHEOLEF_HAVE_MPI
314  } else { // sequential
315  val.resize (dia_nnz);
316  csr2mumps_values_seq (a, val, use_symmetry);
317  _mumps_par.a = val.begin().operator->();
318  }
319  _mumps_par.job = 2;
320  bool finished = false;
321  while (!finished && _mumps_par.icntl[14-1] <= 2000) {
322  dmumps_c(&_mumps_par);
323  if ((_mumps_par.infog[1-1] == -8 || _mumps_par.infog[1-1] == -9) &&
324  _mumps_par.infog[2-1] >= 0) {
325  _mumps_par.icntl[14-1] += 10;
326  if (_mumps_par.icntl[14-1] > 100) {
327  dis_warning_macro ("numeric factorization: increase working space of "<<_mumps_par.icntl[14-1]<< "% and retry...");
328  }
329  } else {
330  finished = true;
331  }
332  }
333  check_macro (_mumps_par.infog[1-1] == 0,
334  "solver failed: mumps error infog(1)=" <<_mumps_par.infog[1-1]
335  << ", infog(2)=" <<_mumps_par.infog[2-1]
336  << ", icntl(14)="<<_mumps_par.icntl[14-1]
337  << ", icntl(23)="<<_mumps_par.icntl[23-1]);
338  if (_mumps_par.icntl[33-1] != 0) {
339  _det.mantissa = _mumps_par.rinfog[12-1];
340  _det.exponant = _mumps_par.infog[34-1];
341  _det.base = 2;
342  }
343 }
344 template<class T, class M>
345 vec<T,M>
346 solver_mumps_rep<T,M>::solve (const vec<T,M>& b) const
347 {
348  if (b.dis_size() <= 1) {
349  if (b.size() == 1) {
350  return vec<T,M>(b.ownership(), b[0]/_a00);
351  } else {
352  return vec<T,M>(b.ownership());
353  }
354  }
355  vec<T,M> x;
356  vector<T> sol;
357  vector<int> perm;
358  size_type sol_size = 0;
359  vec<T,M> b_host;
360  T dummy; // mumps faild when zero sizes and 0 pointers...
361  int idummy;
363  _mumps_par.icntl[21-1] = 1; // 1: solution is distributed
364  _mumps_par.icntl[10-1] = 0; // nstep of iterative refinement (not in distributed version?)
365  communicator comm = b.ownership().comm();
366  distributor host_ownership (b.dis_size(), comm, comm.rank() == 0 ? b.dis_size() : 0);
367  b_host.resize (host_ownership);
368  size_type first_i = b.ownership().first_index();
369  for (size_type i = 0, n = b.size(); i < n; i++) {
370  size_type dis_i = first_i + i;
371  b_host.dis_entry(dis_i) = b[i];
372  }
373  b_host.dis_entry_assembly();
374  if (comm.rank() == 0) {
375  _mumps_par.nrhs = 1;
376  _mumps_par.rhs = b_host.begin().operator->();
377  }
378  sol_size = _mumps_par.info[23-1];
379  sol.resize (sol_size);
380  perm.resize (sol_size);
381  _mumps_par.lsol_loc = sol_size;
382  _mumps_par.sol_loc = (sol_size > 0) ? sol.begin().operator->() : &dummy;
383  _mumps_par.isol_loc = (sol_size > 0) ? perm.begin().operator->() : &idummy;
384  } else { // sequential
385  _mumps_par.icntl[21-1] = 0; // 0: sol goes on the proc=0, inplace in rhs
386  _mumps_par.icntl[10-1] = 0; // nstep of iterative refinement (not in distributed version?)
387  x = b;
388  _mumps_par.nrhs = 1;
389  _mumps_par.rhs = x.begin().operator->();
390  }
391  _mumps_par.icntl [9-1] = 1; // solve A*x=b : ctrl=1 ; otherwise solve A^t*x=b TODO
392  _mumps_par.job = 3;
393  dmumps_c(&_mumps_par);
394  check_macro (_mumps_par.infog[1-1] >= 0,
395  "solver failed: mumps error infog(1)="<<_mumps_par.infog[1-1]
396  << ", infog(2)="<<_mumps_par.infog[2-1]);
397  if (_mumps_par.infog[1-1] > 0) {
398  warning_macro ("mumps warning infog(1)="<<_mumps_par.infog[1-1]
399  << ", infog(2)="<<_mumps_par.infog[2-1]);
400  }
402  x.resize (b.ownership());
403  for (size_t i = 0; i < sol_size; i++) {
404  size_type dis_i = perm[i] - 1;
405  assert_macro (dis_i < x.dis_size(), "invalid index");
406  x.dis_entry(dis_i) = sol[i];
407  }
408  x.dis_entry_assembly();
409  }
410  return x;
411 }
412 template<class T, class M>
413 vec<T,M>
414 solver_mumps_rep<T,M>::trans_solve (const vec<T,M>& b) const
415 {
416  error_macro ("not yet");
417  return vec<T,M>();
418 }
419 template<class T, class M>
420 solver_mumps_rep<T,M>::~solver_mumps_rep ()
421 {
422  if (!_has_mumps_instance) return;
423  _has_mumps_instance = false;
424  _mumps_par.job = -2;
425  dmumps_c(&_mumps_par);
426  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occurred");
427 }
428 
429 template class solver_mumps_rep<double,sequential>;
430 
431 #ifdef _RHEOLEF_HAVE_MPI
432 template class solver_mumps_rep<double,distributed>;
433 #endif // _RHEOLEF_HAVE_MPI
434 
435 } // namespace rheolef
436 #endif // MUMPS
#define error_macro(message)
Definition: compiler.h:100
#define dis_warning_macro(message)
Definition: dis_macros.h:9
STL namespace.
irheostream, orheostream - large data streams
Definition: compiler.h:7
#define assert_macro(ok_condition, message)
Definition: compiler.h:121
#define warning_macro(message)
Definition: compiler.h:102
#define check_macro(ok_condition, message)
Definition: compiler.h:104
void solve(tiny_matrix< T > &a, tiny_vector< size_t > &piv, const tiny_vector< T > &b, tiny_vector< T > &x)
Definition: tiny_lu.h:50
static const bool value
Definition: distributed.h:16
#define trace_macro(message)
Definition: compiler.h:120
void update_values(const csr< T, M > &a)
Expr1::memory_type M
Definition: vec_expr_v2.h:310
static iorheo::force_initialization dummy
Definition: iorheo.cc:105