rheolef  6.5
solver_mumps.cc
Go to the documentation of this file.
1 // in // some independant sparse linear systems (eg. sparse mass matrix local
2 // https://listes.ens-lyon.fr/sympa/arc/mumps-users
3 // From: Alfredo Buttari <alfredo.buttari@enseeiht.fr>
4 #include "rheolef/config.h"
5 #ifdef _RHEOLEF_HAVE_MUMPS
6 #include "solver_mumps.h"
7 
8 namespace rheolef {
9 using namespace std;
10 
11 template<class T, class M>
12 static
13 typename csr<T,M>::size_type
15 {
16  typedef typename csr<T,M>::size_type size_type;
17  size_type dia_nnz = 0;
18  typename csr<T,M>::const_iterator dia_ia = a.begin();
19  for (size_type i = 0, n = a.nrow(); i < n; i++) {
20  for (typename csr<T,M>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
21  size_type j = (*p).first;
22  if (j >= i) dia_nnz++;
23  }
24  }
25  return dia_nnz;
26 }
27 template<class T>
28 static
31 {
32  return 0;
33 }
34 #ifdef _RHEOLEF_HAVE_MPI
35 template<class T>
36 static
39 {
41  size_type first_i = a.row_ownership().first_index();
42  size_type ext_nnz = 0;
43  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
44  for (size_type i = 0, n = a.nrow(); i < n; i++) {
45  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
46  size_type dis_i = first_i + i;
47  size_type dis_j = a.jext2dis_j ((*p).first);
48  if (dis_j >= dis_i) ext_nnz++;
49  }
50  }
51  return ext_nnz;
52 }
53 #endif // _RHEOLEF_HAVE_MPI
54 template<class T>
55 static
56 void
57 csr2mumps_struct (const csr<T,sequential>& a, vector<int>& row, vector<int>& col, bool use_symmetry)
58 {
59  typedef typename csr<T,sequential>::size_type size_type;
60  typename csr<T,sequential>::const_iterator dia_ia = a.begin();
61  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
62  for (typename csr<T,sequential>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
63  size_type j = (*p).first;
64  if (!use_symmetry || j >= i) {
65  row[q] = i + 1;
66  col[q] = j + 1;
67  q++;
68  }
69  }
70  }
71 }
72 template<class T>
73 static
74 void
75 csr2mumps_values (const csr<T,sequential>& a, vector<T>& val, bool use_symmetry)
76 {
77  typedef typename csr<T,sequential>::size_type size_type;
78  typename csr<T,sequential>::const_iterator dia_ia = a.begin();
79  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
80  for (typename csr<T,sequential>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
81  size_type j = (*p).first;
82  if (!use_symmetry || j >= i) {
83  val[q] = (*p).second;
84  q++;
85  }
86  }
87  }
88 }
89 #ifdef _RHEOLEF_HAVE_MPI
90 template<class T>
91 static
92 void
93 csr2mumps_struct (const csr<T,distributed>& a, vector<int>& row, vector<int>& col, bool use_symmetry)
94 {
96  typename csr<T,distributed>::const_iterator dia_ia = a.begin();
97  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
98  size_type first_i = a.row_ownership().first_index();
99  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
100  for (typename csr<T,distributed>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
101  size_type dis_i = first_i + i;
102  size_type dis_j = first_i + (*p).first;
103  if (!use_symmetry || dis_j >= dis_i) {
104  row[q] = dis_i + 1;
105  col[q] = dis_j + 1;
106  q++;
107  }
108  }
109  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
110  size_type dis_i = first_i + i;
111  size_type dis_j = a.jext2dis_j ((*p).first);
112  if (!use_symmetry || dis_j >= dis_i) {
113  row[q] = dis_i + 1;
114  col[q] = dis_j + 1;
115  q++;
116  }
117  }
118  }
119 }
120 template<class T>
121 static
122 void
123 csr2mumps_values (const csr<T,distributed>& a, vector<T>& val, bool use_symmetry)
124 {
125  typedef typename csr<T,distributed>::size_type size_type;
126  size_type first_i = a.row_ownership().first_index();
127  typename csr<T,distributed>::const_iterator dia_ia = a.begin();
128  typename csr<T,distributed>::const_iterator ext_ia = a.ext_begin();
129  for (size_type i = 0, n = a.nrow(), q = 0; i < n; i++) {
130  for (typename csr<T,distributed>::const_data_iterator p = dia_ia[i]; p < dia_ia[i+1]; p++) {
131  size_type dis_i = first_i + i;
132  size_type dis_j = first_i + (*p).first;
133  if (!use_symmetry || dis_j >= dis_i) {
134  val[q] = (*p).second;
135  q++;
136  }
137  }
138  for (typename csr<T,distributed>::const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
139  size_type dis_i = first_i + i;
140  size_type dis_j = a.jext2dis_j ((*p).first);
141  if (!use_symmetry || dis_j >= dis_i) {
142  val[q] = (*p).second;
143  q++;
144  }
145  }
146  }
147 }
148 #endif // _RHEOLEF_HAVE_MPI
149 template<class T, class M>
151  : solver_abstract_rep<T,M>(opt),
152  _has_mumps_instance(false),
153  _mumps_par(),
154  _row(),
155  _col(),
156  _a00(0)
157 {
158  if (a.dis_nrow() <= 1) {
159  if (a.nrow() == 1) {
160  _a00 = (*(a.begin()[0])).second;
161  }
162  _has_mumps_instance = false;
163  return;
164  }
165  _has_mumps_instance = true;
166  bool use_symmetry = a.is_symmetric();
167  _mumps_par.par = 1; // use parallel
168 #ifdef _RHEOLEF_HAVE_MPI
169  _mumps_par.comm_fortran = MPI_Comm_c2f (a.row_ownership().comm());
170 #endif // _RHEOLEF_HAVE_MPI
171  if (use_symmetry) {
172  _mumps_par.sym = 2; // sym but can have either < 0 or > 0 eigenvalues
173  } else {
174  _mumps_par.sym = 0; // is symmetric: 0=no, 1=sdp, 2=general sym
175  }
176  _mumps_par.job = -1;
177  dmumps_c(&_mumps_par);
178  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occured");
179  if (opt.verbose_level != 0) {
180  _mumps_par.icntl[1-1] = 1; // error output
181  _mumps_par.icntl[2-1] = 1; // verbose output
182  _mumps_par.icntl[3-1] = 1; // global infos
183  _mumps_par.icntl[4-1] = opt.verbose_level; // verbosity level
184  // strcpy (_mumps_par.write_problem, "dump_mumps"); // dump sparse structure
185  } else {
186  _mumps_par.icntl[1-1] = -1;
187  _mumps_par.icntl[2-1] = -1;
188  _mumps_par.icntl[3-1] = -1;
189  _mumps_par.icntl[4-1] = 0;
190  }
191  _mumps_par.icntl[5-1] = 0; // matrix is in assembled format
192  _mumps_par.icntl[7-1] = 7; // ordering: 7=auto; 3==scotch ; 4=pord, distributed with mumps
193  _mumps_par.icntl[14-1] = 40; // default 20 seems to be insufficient in some cases
194  _mumps_par.icntl[29-1] = 0; // 0: auto; 1: ptscotch; 2: parmetis
195  _mumps_par.icntl[22-1] = 0; // 0: in-core ; 1: out-of-core TODO
197  _mumps_par.icntl[18-1] = 3; // distributed
198  } else {
199  _mumps_par.icntl[18-1] = 0; // sequential
200  }
201  _mumps_par.n = a.dis_nrow();
202  size_type dia_nnz = ((use_symmetry) ? nnz_dia_upper(a) : a.nnz());
203  size_type ext_nnz = ((use_symmetry) ? nnz_ext_upper(a) : a.ext_nnz());
204  size_type nnz = dia_nnz + ext_nnz;
205  _row.resize (nnz);
206  _col.resize (nnz);
207  csr2mumps_struct (a, _row, _col, use_symmetry);
209  _mumps_par.nz_loc = nnz;
210  _mumps_par.irn_loc = _row.begin().operator->();
211  _mumps_par.jcn_loc = _col.begin().operator->();
212  } else { // sequential
213  _mumps_par.nz = nnz;
214  _mumps_par.irn = _row.begin().operator->();
215  _mumps_par.jcn = _col.begin().operator->();
216  }
217  _mumps_par.job = 1;
218  dmumps_c(&_mumps_par);
219  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occured");
220 
221  update_values (a);
222 }
223 template<class T, class M>
224 void
226 {
227  if (a.dis_nrow() <= 1) {
228  if (a.nrow() == 1) {
229  _a00 = (*(a.begin()[0])).second;
230  }
231  return;
232  }
233  bool use_symmetry = a.is_symmetric();
234  size_type nnz = a.nnz() + a.ext_nnz();
235  vector<T> val (nnz);
236  csr2mumps_values (a, val, use_symmetry);
238  _mumps_par.a_loc = val.begin().operator->();
239  } else { // sequential
240  _mumps_par.a = val.begin().operator->();
241  }
242  _mumps_par.job = 2;
243  bool finished = false;
244  while (!finished && _mumps_par.icntl[14-1] <= 2000) {
245  dmumps_c(&_mumps_par);
246  if (_mumps_par.infog[1-1] == -9 && _mumps_par.infog[2-1] >= 0) {
247  _mumps_par.icntl[14-1] += 10;
248  trace_macro ("numeric factorization: increase working space of "<<_mumps_par.icntl[14-1]<< "% and retry...");
249  } else {
250  finished = true;
251  }
252  }
253  check_macro (_mumps_par.infog[1-1] == 0,
254  "solver failed: mumps error infog(1)=" <<_mumps_par.infog[1-1]
255  << ", infog(2)=" <<_mumps_par.infog[2-1]
256  << ", icntl(14)="<<_mumps_par.icntl[14-1]
257  << ", icntl(23)="<<_mumps_par.icntl[23-1]);
258 }
259 template<class T, class M>
260 vec<T,M>
262 {
263  if (b.dis_size() <= 1) {
264  if (b.size() == 1) {
265  return vec<T,M>(b.ownership(), b[0]/_a00);
266  } else {
267  return vec<T,M>(b.ownership());
268  }
269  }
270  vec<T,M> x;
271  vector<T> sol;
272  vector<int> perm;
273  size_type sol_size = 0;
274  vec<T,M> b_host;
275  T dummy; // mumps faild when zero sizes and 0 pointers...
276  int idummy;
278  _mumps_par.icntl[21-1] = 1; // 1: solution is distributed
279  _mumps_par.icntl[10-1] = 0; // nstep of iterative refinement (not in distributed version?)
280  communicator comm = b.ownership().comm();
281  distributor host_ownership (b.dis_size(), comm, comm.rank() == 0 ? b.dis_size() : 0);
282  b_host.resize (host_ownership);
283  size_type first_i = b.ownership().first_index();
284  for (size_type i = 0, n = b.size(); i < n; i++) {
285  size_type dis_i = first_i + i;
286  b_host.dis_entry(dis_i) = b[i];
287  }
288  b_host.dis_entry_assembly();
289  if (comm.rank() == 0) {
290  _mumps_par.nrhs = 1;
291  _mumps_par.rhs = b_host.begin().operator->();
292  }
293  sol_size = _mumps_par.info[23-1];
294  sol.resize (sol_size);
295  perm.resize (sol_size);
296  _mumps_par.lsol_loc = sol_size;
297  _mumps_par.sol_loc = (sol_size > 0) ? sol.begin().operator->() : &dummy;
298  _mumps_par.isol_loc = (sol_size > 0) ? perm.begin().operator->() : &idummy;
299  } else { // sequential
300  _mumps_par.icntl[21-1] = 0; // 0: sol goes on the proc=0, inplace in rhs
301  _mumps_par.icntl[10-1] = 0; // nstep of iterative refinement (not in distributed version?)
302  x = b;
303  _mumps_par.nrhs = 1;
304  _mumps_par.rhs = x.begin().operator->();
305  }
306  _mumps_par.icntl [9-1] = 1; // solve A*x=b : ctrl=1 ; otherwise solve A^t*x=b TODO
307  _mumps_par.job = 3;
308  dmumps_c(&_mumps_par);
309  check_macro (_mumps_par.infog[1-1] >= 0,
310  "solver failed: mumps error infog(1)="<<_mumps_par.infog[1-1]
311  << ", infog(2)="<<_mumps_par.infog[2-1]);
312  if (_mumps_par.infog[1-1] > 0) {
313  warning_macro ("mumps warning infog(1)="<<_mumps_par.infog[1-1]
314  << ", infog(2)="<<_mumps_par.infog[2-1]);
315  }
317  x.resize (b.ownership());
318  for (size_t i = 0; i < sol_size; i++) {
319  size_type dis_i = perm[i] - 1;
320  assert_macro (dis_i < x.dis_size(), "invalid index");
321  x.dis_entry(dis_i) = sol[i];
322  }
323  x.dis_entry_assembly();
324  }
325  return x;
326 }
327 template<class T, class M>
328 vec<T,M>
330 {
331  error_macro ("not yet");
332  return vec<T,M>();
333 }
334 template<class T, class M>
336 {
337  if (!_has_mumps_instance) return;
338  _has_mumps_instance = false;
339  _mumps_par.job = -2;
340  dmumps_c(&_mumps_par);
341  check_macro (_mumps_par.infog[1-1] == 0, "mumps: an error has occured");
342 }
343 
345 
346 #ifdef _RHEOLEF_HAVE_MPI
348 #endif // _RHEOLEF_HAVE_MPI
349 
350 } // namespace rheolef
351 #endif // MUMPS
352