rheolef  6.6
solver_mumps.cc
Go to the documentation of this file.
1 // in // some independant sparse linear systems (eg. sparse mass matrix local
2 // https://listes.ens-lyon.fr/sympa/arc/mumps-users
3 // From: Alfredo Buttari <alfredo.buttari@enseeiht.fr>
4 #include "rheolef/config.h"
5 #ifdef _RHEOLEF_HAVE_MUMPS
6 #include "solver_mumps.h"
7 
8 #ifdef DEBUG_FROGGY
9 #undef _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH // has failed on some linear systems: prefer seq-scotch when available
10 #endif // DEBUG_FROGGY
11 
12 namespace rheolef {
13 using namespace std;
14 
15 template<class T, class M>
16 static
17 typename csr<T,M>::size_type
18 nnz_dia_upper (const csr<T,M>& a)
19 {
20  typedef typename csr<T,M>::size_type size_type;
21  size_type dia_nnz = 0;
22  typename csr<T,M>::const_iterator dia_ia = a.begin();
23  for (size_type i = 0, n = a.nrow(); i < n; i++) {
24  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
25  size_type j = (*p).first;
26  if (j >= i) dia_nnz++;
27  }
28  }
29  return dia_nnz;
30 }
31 template<class T, class M>
32 static
33 typename csr<T,M>::size_type
34 nnz_ext_upper (const csr<T,M>& a)
35 {
36  return 0;
37 }
38 #ifdef _RHEOLEF_HAVE_MPI
39 template<class T>
40 static
42 nnz_ext_upper (const csr<T,distributed>& a)
43 {
45  size_type first_i = a.row_ownership().first_index();
46  size_type ext_nnz = 0;
47  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
48  for (size_type i = 0, n = a.nrow(); i < n; i++) {
49  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
50  size_type dis_i = first_i + i;
51  size_type dis_j = a.jext2dis_j ((*p).first);
52  if (dis_j >= dis_i) ext_nnz++;
53  }
54  }
55  return ext_nnz;
56 }
57 #endif // _RHEOLEF_HAVE_MPI
58 template<class T, class M>
59 static
60 void
61 csr2mumps_struct_seq (const csr<T,M>& a, vector<int>& row, vector<int>& col, bool use_symmetry)
62 {
63  typedef typename csr<T,M>::size_type size_type;
64  typename csr<T,M>::const_iterator dia_ia = a.begin();
65  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
66  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
67  size_type j = (*p).first;
68  if (!use_symmetry || j >= i) {
69  row[q] = i + 1;
70  col[q] = j + 1;
71  q++;
72  }
73  }
74  }
75 }
76 template<class T, class M>
77 static
78 void
79 csr2mumps_values_seq (const csr<T,M>& a, vector<T>& val, bool use_symmetry)
80 {
81  typedef typename csr<T,M>::size_type size_type;
82  typename csr<T,M>::const_iterator dia_ia = a.begin();
83  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
84  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
85  size_type j = (*p).first;
86  if (!use_symmetry || j >= i) {
87  val[q] = (*p).second;
88  q++;
89  }
90  }
91  }
92 }
93 #ifdef _RHEOLEF_HAVE_MPI
94 template<class T>
95 static
96 void
97 csr2mumps_struct_dis (const csr<T,sequential>& a, vector<int>& row, vector<int>& col, bool use_symmetry, bool)
98 {
99 }
100 template<class T>
101 static
102 void
103 csr2mumps_struct_dis (const csr<T,distributed>& a, vector<int>& row, vector<int>& col, bool use_symmetry, bool drop_ext_nnz)
104 {
105  typedef typename csr<T,distributed>::size_type size_type;
106  typename csr<T,distributed>::const_iterator dia_ia = a.begin();
107  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
108  size_type first_i = a.row_ownership().first_index();
109  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
110  for (typename csr<T,distributed>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
111  size_type dis_i = first_i + i;
112  size_type dis_j = first_i + (*p).first;
113  if (!use_symmetry || dis_j >= dis_i) {
114  row[q] = dis_i + 1;
115  col[q] = dis_j + 1;
116  q++;
117  }
118  }
119  if (drop_ext_nnz) continue;
120  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
121  size_type dis_i = first_i + i;
122  size_type dis_j = a.jext2dis_j ((*p).first);
123  if (!use_symmetry || dis_j >= dis_i) {
124  row[q] = dis_i + 1;
125  col[q] = dis_j + 1;
126  q++;
127  }
128  }
129  }
130 }
131 template<class T>
132 static
133 void
134 csr2mumps_values_dis (const csr<T,sequential>& a, vector<T>& val, bool use_symmetry, bool)
135 {
136 }
137 template<class T>
138 static
139 void
140 csr2mumps_values_dis (const csr<T,distributed>& a, vector<T>& val, bool use_symmetry, bool drop_ext_nnz)
141 {
142  typedef typename csr<T,distributed>::size_type size_type;
143  size_type first_i = a.row_ownership().first_index();
144  typename csr<T,distributed>::const_iterator dia_ia = a.begin();
145  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
146  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
147  for (typename csr<T,distributed>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
148  size_type dis_i = first_i + i;
149  size_type dis_j = first_i + (*p).first;
150  if (!use_symmetry || dis_j >= dis_i) {
151  val[q] = (*p).second;
152  q++;
153  }
154  }
155  if (drop_ext_nnz) continue;
156  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
157  size_type dis_i = first_i + i;
158  size_type dis_j = a.jext2dis_j ((*p).first);
159  if (!use_symmetry || dis_j >= dis_i) {
160  val[q] = (*p).second;
161  q++;
162  }
163  }
164  }
165 }
166 #endif // _RHEOLEF_HAVE_MPI
167 template<class T, class M>
168 solver_mumps_rep<T,M>::solver_mumps_rep (const csr<T,M>& a, const solver_option_type& opt)
169  : solver_abstract_rep<T,M>(opt),
170  _has_mumps_instance(false),
171  _drop_ext_nnz(false),
172  _mumps_par(),
173  _row(),
174  _col(),
175  _a00(0),
176  _det()
177 {
178  size_type nproc = communicator().size();
179  _drop_ext_nnz = is_distributed<M>::value && opt.force_seq;
180 #ifndef TO_CLEAN
181  if (_drop_ext_nnz) {
182  warning_macro ("in test: _drop_ext_nnz=true and nproc=" << nproc);
183  }
184 #endif // TO_CLEAN
185  if (a.dis_nrow() <= 1) {
186  if (a.nrow() == 1) {
187  _a00 = (*(a.begin()[0])).second;
188  }
189  _has_mumps_instance = false;
190  return;
191  }
192  _has_mumps_instance = true;
193  bool use_symmetry = a.is_symmetric();
194  bool use_definite_positive = a.is_definite_positive();
195  _mumps_par.par = 1; // use parallel
196 #ifdef _RHEOLEF_HAVE_MPI
197  _mumps_par.comm_fortran = MPI_Comm_c2f (a.row_ownership().comm());
198 #endif // _RHEOLEF_HAVE_MPI
199  if (use_symmetry && !use_definite_positive) {
200  // sym invertible but could be undef: could have either < 0 or > 0 eigenvalues
201  // => leads to similar cpu with s.d.p. but without the asumption of positive or negative :
202  _mumps_par.sym = 2;
203  } else if (use_symmetry && use_definite_positive) {
204 #if 0
205  _mumps_par.sym = 1;
206  _mumps_par.sym = 2;
207  _mumps_par.cntl[1-1] = 0.0;
208 #endif // 0
209  _mumps_par.sym = 2;
210  } else {
211  _mumps_par.sym = 0;
212  }
213  _mumps_par.job = -1;
214  dmumps_c(&_mumps_par);
215  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occured");
216  if (opt.verbose_level != 0) {
217  _mumps_par.icntl[1-1] = 1; // error output
218  _mumps_par.icntl[2-1] = 1; // verbose output
219  _mumps_par.icntl[3-1] = 1; // global infos
220  _mumps_par.icntl[4-1] = opt.verbose_level; // verbosity level
221  // strcpy (_mumps_par.write_problem, "dump_mumps"); // dump sparse structure
222  } else {
223  _mumps_par.icntl[1-1] = -1;
224  _mumps_par.icntl[2-1] = -1;
225  _mumps_par.icntl[3-1] = -1;
226  _mumps_par.icntl[4-1] = 0;
227  }
228  if (opt.compute_determinant) {
229  _mumps_par.icntl[33-1] = 1;
230  }
231  _mumps_par.icntl[5-1] = 0; // matrix is in assembled format
232 #if defined(_RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH) || defined(_RHEOLEF_HAVE_MUMPS_WITH_SCOTCH)
233  _mumps_par.icntl[7-1] = 3; // sequential ordering: 3==scotch
234 #elif defined(_RHEOLEF_HAVE_MUMPS_WITH_PARMETIS) || defined(_RHEOLEF_HAVE_MUMPS_WITH_METIS)
235  _mumps_par.icntl[7-1] = 5; // sequential ordering: 5==metis
236 #else // _RHEOLEF_HAVE_MUMPS_WITH_PARMETIS
237  _mumps_par.icntl[7-1] = 7; // ordering: 7=auto
238 #endif // _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH
239  _mumps_par.icntl[14-1] = 40; // default 20 seems to be insufficient in some cases
240  _mumps_par.icntl[29-1] = 0; // 0: auto; 1: ptscotch; 2: parmetis
241  _mumps_par.icntl[22-1] = 0; // 0: in-core ; 1: out-of-core TODO
242  if (is_distributed<M>::value) {
243  _mumps_par.icntl[18-1] = 3; // distributed
244 
245 #if defined(_RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH) || defined(_RHEOLEF_HAVE_MUMPS_WITH_PARMETIS)
246  _mumps_par.icntl[28-1] = 2; // parallel ordering
247 #elif defined(_RHEOLEF_HAVE_MUMPS_WITH_SCOTCH) || defined(_RHEOLEF_HAVE_MUMPS_WITH_METIS)
248  _mumps_par.icntl[28-1] = 1; // sequential ordering
249 #endif // _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH || _RHEOLEF_HAVE_MUMPS_WITH_PARMETIS
250 
251 #ifdef _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH
252  _mumps_par.icntl[29-1] = 1; // ptscotch=1
253 #elif _RHEOLEF_HAVE_MUMPS_WITH_PARMETIS
254  _mumps_par.icntl[29-1] = 2; // parmetis=2
255 #else
256  _mumps_par.icntl[29-1] = 0; // automatic choice
257 #endif // _RHEOLEF_HAVE_MUMPS_WITH_PTSCOTCH
258  } else {
259  _mumps_par.icntl[18-1] = 0; // sequential
260  }
261  _mumps_par.n = a.dis_nrow();
262  size_type dia_nnz = ((use_symmetry) ? nnz_dia_upper(a) : a.nnz());
263  size_type ext_nnz = ((use_symmetry) ? nnz_ext_upper(a) : a.ext_nnz());
264  size_type dis_nnz = dia_nnz + (_drop_ext_nnz ? 0 : ext_nnz);
265  if (is_distributed<M>::value) {
266 #ifdef _RHEOLEF_HAVE_MPI
267  _row.resize (dis_nnz);
268  _col.resize (dis_nnz);
269  csr2mumps_struct_dis (a, _row, _col, use_symmetry, _drop_ext_nnz);
270  _mumps_par.nz_loc = dis_nnz;
271  _mumps_par.irn_loc = _row.begin().operator->();
272  _mumps_par.jcn_loc = _col.begin().operator->();
273 #endif // _RHEOLEF_HAVE_MPI
274  } else { // sequential
275  _row.resize (dia_nnz);
276  _col.resize (dia_nnz);
277  csr2mumps_struct_seq (a, _row, _col, use_symmetry);
278  _mumps_par.nz = dia_nnz;
279  _mumps_par.irn = _row.begin().operator->();
280  _mumps_par.jcn = _col.begin().operator->();
281  }
282  _mumps_par.job = 1;
283  dmumps_c(&_mumps_par);
284  check_macro (_mumps_par.infog[1-1] == 0, "mumps: error infog(1)="<<_mumps_par.infog[1-1]<<" has occured (see mumps/infog(1) documentation; infog(2)="<<_mumps_par.infog[2-1]<<")");
285  update_values (a);
286 }
287 template<class T, class M>
288 void
289 solver_mumps_rep<T,M>::update_values (const csr<T,M>& a)
290 {
291  if (a.dis_nrow() <= 1) {
292  if (a.nrow() == 1) {
293  _a00 = (*(a.begin()[0])).second;
294  }
295  return;
296  }
297  bool use_symmetry = a.is_symmetric();
298 #ifdef TO_CLEAN
299  size_type dia_nnz = a.nnz();
300  size_type dis_nnz = dia_nnz + a.ext_nnz();
301 #endif // TO_CLEAN
302  size_type dia_nnz = ((use_symmetry) ? nnz_dia_upper(a) : a.nnz());
303  size_type ext_nnz = ((use_symmetry) ? nnz_ext_upper(a) : a.ext_nnz());
304  size_type dis_nnz = dia_nnz + (_drop_ext_nnz ? 0 : ext_nnz);
305  vector<T> val;
306  if (is_distributed<M>::value) {
307 #ifdef _RHEOLEF_HAVE_MPI
308  val.resize (dis_nnz);
309  csr2mumps_values_dis (a, val, use_symmetry, _drop_ext_nnz);
310  _mumps_par.a_loc = val.begin().operator->();
311 #endif // _RHEOLEF_HAVE_MPI
312  } else { // sequential
313  val.resize (dia_nnz);
314  csr2mumps_values_seq (a, val, use_symmetry);
315  _mumps_par.a = val.begin().operator->();
316  }
317  _mumps_par.job = 2;
318  bool finished = false;
319  while (!finished && _mumps_par.icntl[14-1] <= 2000) {
320  dmumps_c(&_mumps_par);
321  if ((_mumps_par.infog[1-1] == -8 || _mumps_par.infog[1-1] == -9) &&
322  _mumps_par.infog[2-1] >= 0) {
323  _mumps_par.icntl[14-1] += 10;
324  dis_warning_macro ("numeric factorization: increase working space of "<<_mumps_par.icntl[14-1]<< "% and retry...");
325  } else {
326  finished = true;
327  }
328  }
329  check_macro (_mumps_par.infog[1-1] == 0,
330  "solver failed: mumps error infog(1)=" <<_mumps_par.infog[1-1]
331  << ", infog(2)=" <<_mumps_par.infog[2-1]
332  << ", icntl(14)="<<_mumps_par.icntl[14-1]
333  << ", icntl(23)="<<_mumps_par.icntl[23-1]);
334  if (_mumps_par.icntl[33-1] != 0) {
335  _det.mantissa = _mumps_par.rinfog[12-1];
336  _det.exponant = _mumps_par.infog[34-1];
337  _det.base = 2;
338  }
339 }
340 template<class T, class M>
341 vec<T,M>
342 solver_mumps_rep<T,M>::solve (const vec<T,M>& b) const
343 {
344  if (b.dis_size() <= 1) {
345  if (b.size() == 1) {
346  return vec<T,M>(b.ownership(), b[0]/_a00);
347  } else {
348  return vec<T,M>(b.ownership());
349  }
350  }
351  vec<T,M> x;
352  vector<T> sol;
353  vector<int> perm;
354  size_type sol_size = 0;
355  vec<T,M> b_host;
356  T dummy; // mumps faild when zero sizes and 0 pointers...
357  int idummy;
358  if (is_distributed<M>::value) {
359  _mumps_par.icntl[21-1] = 1; // 1: solution is distributed
360  _mumps_par.icntl[10-1] = 0; // nstep of iterative refinement (not in distributed version?)
361  communicator comm = b.ownership().comm();
362  distributor host_ownership (b.dis_size(), comm, comm.rank() == 0 ? b.dis_size() : 0);
363  b_host.resize (host_ownership);
364  size_type first_i = b.ownership().first_index();
365  for (size_type i = 0, n = b.size(); i < n; i++) {
366  size_type dis_i = first_i + i;
367  b_host.dis_entry(dis_i) = b[i];
368  }
369  b_host.dis_entry_assembly();
370  if (comm.rank() == 0) {
371  _mumps_par.nrhs = 1;
372  _mumps_par.rhs = b_host.begin().operator->();
373  }
374  sol_size = _mumps_par.info[23-1];
375  sol.resize (sol_size);
376  perm.resize (sol_size);
377  _mumps_par.lsol_loc = sol_size;
378  _mumps_par.sol_loc = (sol_size > 0) ? sol.begin().operator->() : &dummy;
379  _mumps_par.isol_loc = (sol_size > 0) ? perm.begin().operator->() : &idummy;
380  } else { // sequential
381  _mumps_par.icntl[21-1] = 0; // 0: sol goes on the proc=0, inplace in rhs
382  _mumps_par.icntl[10-1] = 0; // nstep of iterative refinement (not in distributed version?)
383  x = b;
384  _mumps_par.nrhs = 1;
385  _mumps_par.rhs = x.begin().operator->();
386  }
387  _mumps_par.icntl [9-1] = 1; // solve A*x=b : ctrl=1 ; otherwise solve A^t*x=b TODO
388  _mumps_par.job = 3;
389  dmumps_c(&_mumps_par);
390  check_macro (_mumps_par.infog[1-1] >= 0,
391  "solver failed: mumps error infog(1)="<<_mumps_par.infog[1-1]
392  << ", infog(2)="<<_mumps_par.infog[2-1]);
393  if (_mumps_par.infog[1-1] > 0) {
394  warning_macro ("mumps warning infog(1)="<<_mumps_par.infog[1-1]
395  << ", infog(2)="<<_mumps_par.infog[2-1]);
396  }
397  if (is_distributed<M>::value) {
398  x.resize (b.ownership());
399  for (size_t i = 0; i < sol_size; i++) {
400  size_type dis_i = perm[i] - 1;
401  assert_macro (dis_i < x.dis_size(), "invalid index");
402  x.dis_entry(dis_i) = sol[i];
403  }
404  x.dis_entry_assembly();
405  }
406  return x;
407 }
408 template<class T, class M>
409 vec<T,M>
410 solver_mumps_rep<T,M>::trans_solve (const vec<T,M>& b) const
411 {
412  error_macro ("not yet");
413  return vec<T,M>();
414 }
415 template<class T, class M>
416 solver_mumps_rep<T,M>::~solver_mumps_rep ()
417 {
418  if (!_has_mumps_instance) return;
419  _has_mumps_instance = false;
420  _mumps_par.job = -2;
421  dmumps_c(&_mumps_par);
422  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occured");
423 }
424 
425 template class solver_mumps_rep<double,sequential>;
426 
427 #ifdef _RHEOLEF_HAVE_MPI
428 template class solver_mumps_rep<double,distributed>;
429 #endif // _RHEOLEF_HAVE_MPI
430 
431 } // namespace rheolef
432 #endif // MUMPS
#define error_macro(message)
Definition: compiler.h:100
#define dis_warning_macro(message)
Definition: dis_macros.h:9
STL namespace.
const communicator & comm() const
Definition: diststream.h:240
irheostream, orheostream - large data streams
Definition: compiler.h:7
#define assert_macro(ok_condition, message)
Definition: compiler.h:120
#define warning_macro(message)
Definition: compiler.h:102
#define check_macro(ok_condition, message)
Definition: compiler.h:104
void solve(tiny_matrix< T > &a, tiny_vector< size_t > &piv, const tiny_vector< T > &b, tiny_vector< T > &x)
Definition: tiny_lu.h:50
size_type rank() const
Definition: distributed.h:53
static iorheo::force_initialization dummy
Definition: iorheo.cc:105
void update_values(const csr< T, M > &a)
reference_element_H::size_type size_type