rheolef  6.5
csr_mpi.cc
Go to the documentation of this file.
1 # include "rheolef/config.h"
2 
3 #ifdef _RHEOLEF_HAVE_MPI
4 #include "rheolef/dis_macros.h"
5 #include "rheolef/csr.h"
6 #include "rheolef/asr_to_csr.h"
7 #include "rheolef/asr_to_csr_dist_logical.h"
8 
9 #include "rheolef/csr_amux.h"
10 #include "rheolef/csr_cumul_trans_mult.h"
11 #include "rheolef/mpi_scatter_init.h"
12 #include "rheolef/mpi_scatter_begin.h"
13 #include "rheolef/mpi_scatter_end.h"
14 
15 using namespace std;
16 namespace rheolef {
17 #include <algorithm> // for lower_bound
18 template <class Pair1, class Pair2, class RandomIterator>
19 struct op_ext2glob_t : unary_function<Pair1,Pair2> {
20  Pair2 operator()(const Pair1& x) const {
21  return Pair2(t[x.first], x.second); }
22  op_ext2glob_t(RandomIterator t1) : t(t1) {}
23  RandomIterator t;
24 };
25 template <class Pair1, class Pair2>
26 struct op_dia_t : unary_function<Pair1,Pair2> {
27  Pair2 operator()(const Pair1& x) const {
28  return Pair2(shift + x.first, x.second); }
29  typedef typename Pair1::first_type Size;
30  op_dia_t(Size s) : shift(s) {}
32 };
33 template <class Pair1, class Pair2, class RandomIterator>
34 struct op_dis_j2jext_t : unary_function<Pair1,Pair2> {
35  Pair2 operator()(const Pair1& x) const {
36  RandomIterator t = std::lower_bound(t1, t2, x.first);
37  assert_macro(*t == x.first, "problem in ditributed asr_to_csr");
38  return Pair2(distance(t1,t), x.second); }
39  op_dis_j2jext_t(RandomIterator u1, RandomIterator u2) : t1(u1), t2(u2) {}
40  RandomIterator t1, t2;
41 };
42 template<class T>
44  : base(),
45  _ext(),
46  _jext2dis_j(0),
47  _dis_nnz(0),
48  _dis_ext_nnz(0),
49  _scatter_initialized(false),
50  _from(),
51  _to(),
52  _buffer()
53 {
54 }
55 template<class T>
57  : base(a),
58  _ext(a._ext),
59  _jext2dis_j(a._jext2dis_j),
60  _dis_nnz(a._dis_nnz),
61  _dis_ext_nnz(a._dis_ext_nnz),
62  _scatter_initialized(a._scatter_initialized),
63  _from(a._from),
64  _to(a._to),
65  _buffer(a._buffer)
66 {
67 }
68 template<class T>
69 void
70 csr_rep<T,distributed>::resize (const distributor& row_ownership, const distributor& col_ownership, size_type nnz1)
71 {
72  base::resize (row_ownership, col_ownership, nnz1);
73  _ext.resize (row_ownership, col_ownership, 0); // note: the _ext part will be resized elsewhere
74  _scatter_initialized = false;
75 }
76 template<class T>
77 template<class A>
78 void
80 {
81  base::resize (a.row_ownership(), a.col_ownership(), 0);
82  _ext.resize (a.row_ownership(), a.col_ownership(), 0);
83 
85  typedef typename asr<T>::row_type row_type;
86  typedef typename row_type::value_type const_pair_type;
87  typedef pair<size_type,T> pair_type;
89  is_dia(col_ownership().first_index(), col_ownership().last_index());
90  set<size_type> colext;
91 
92  size_type nnzext
93  = asr_to_csr_dist_logical (a.begin(), a.end(), is_dia, colext);
94  size_type nnzdia = a.nnz() - nnzext;
95  size_type ncoldia = col_ownership().size();
96  size_type ncolext = colext.size();
97  base::resize(a.row_ownership(), a.col_ownership(), nnzdia);
98  _ext.resize (a.row_ownership(), a.col_ownership(), nnzext);
99  _jext2dis_j.resize (ncolext);
100  copy (colext.begin(), colext.end(), _jext2dis_j.begin());
101  op_dia_t<const_pair_type,pair_type> op_dia(- col_ownership().first_index());
102  asr_to_csr (
103  a.begin(),
104  a.end(),
105  is_dia,
106  op_dia,
107  base::begin(),
108  base::_data.begin());
109 
110  unary_negate<is_dia_t<size_type, const_pair_type> >
111  is_ext(is_dia);
113  op_dis_j2jext(_jext2dis_j.begin(), _jext2dis_j.end());
114  asr_to_csr (
115  a.begin(), a.end(), is_ext, op_dis_j2jext,
116  _ext.begin(), _ext._data.begin());
117 
118  _dis_nnz = a.dis_nnz();
119  _dis_ext_nnz = mpi::all_reduce (comm(), _ext.nnz(), std::plus<size_type>());
120  _scatter_initialized = false;
121 #ifdef TO_CLEAN
122  _dis_nnz = mpi::all_reduce (comm(), nnz() + _ext.nnz(), std::plus<size_type>());
123  check_macro (_dis_nnz == a.dis_nnz(), "build_from_asr: mistake: asr::dis_nnz="<<a.dis_nnz()
124  << " while _dis_nnz="<<_dis_nnz);
125 #endif // TO_CLEAN
126 }
127 template<class T>
128 void
130 {
131  vector<size_type> id (_jext2dis_j.size());
132  for (size_type i = 0, n = id.size(); i < n; i++) id[i] = i;
133 
135 
137  _jext2dis_j.size(),
138  _jext2dis_j.begin().operator->(),
139  id.size(),
140  id.begin().operator->(),
141  dis_ncol(),
142  col_ownership().begin().operator->(),
143  tag,
144  row_ownership().comm(),
145  _from,
146  _to);
147 
148  _buffer.resize(_jext2dis_j.size());
149 }
150 template<class T>
151 odiststream&
153 {
155  size_type my_proc = comm().rank();
156  distributor a_row_ownership (dis_nrow(), comm(), (my_proc == io_proc ? dis_nrow() : 0));
157  distributor a_col_ownership (dis_ncol(), comm(), (my_proc == io_proc ? dis_ncol() : 0));
158  typedef std::allocator<T> A; // TODO: use heap_alloc for asr
159  asr<T,distributed,A> a (a_row_ownership, a_col_ownership);
160  size_type first_dis_i = row_ownership().first_index();
161  size_type first_dis_j = col_ownership().first_index();
162  if (nnz() != 0) {
163  const_iterator ia = begin();
164  for (size_type i = 0; i < nrow(); i++) {
165  for (const_data_iterator p = ia[i]; p < ia[i+1]; p++) {
166  const size_type& j = (*p).first;
167  const T& val = (*p).second;
168  a.dis_entry (i+first_dis_i, j+first_dis_j) += val;
169  }
170  }
171  }
172  if (_ext.nnz() != 0) {
173  const_iterator ext_ia = _ext.begin();
174  for (size_type i = 0; i < nrow(); i++) {
175  for (const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
176  const size_type& j = (*p).first;
177  const T& val = (*p).second;
178  a.dis_entry (i+first_dis_i, _jext2dis_j[j]) += val;
179  }
180  }
181  }
182  a.dis_entry_assembly();
183  if (my_proc == io_proc) {
184  a.put_seq (ops);
185  }
186  return ops;
187 }
188 template<class T>
189 void
190 csr_rep<T,distributed>::dump (const string& name) const
191 {
192  odiststream ops;
193  std::string filename = name + itos(comm().rank());
194  ops.open (filename, "mtx", comm());
195  check_macro(ops.good(), "\"" << filename << "[.mtx]\" cannot be created.");
196  ops << "%%MatrixMarket matrix coordinate real general" << std::endl
197  << dis_nrow() << " " << dis_ncol() << " " << dis_nnz() << std::endl;
198  put(ops);
199 }
200 template<class T>
201 void
203  const vec<T,distributed>& x,
205  const
206 {
207  check_macro (x.size() == ncol(), "csr*vec: incompatible csr("<<nrow()<<","<<ncol()<<") and vec("<<x.size()<<")");
208  y.resize (row_ownership());
209 
211 
212  _scatter_init_guard();
213 
215  x.begin().operator->(),
216  _buffer.begin().operator->(),
217  _from,
218  _to,
219  set_op<T,T>(),
220  tag,
221  row_ownership().comm());
222 
223  csr_amux (
224  base::begin(),
225  base::end(),
226  x.begin(),
227  set_op<T,T>(),
228  y.begin());
229 
231  x.begin(),
232  _buffer.begin(),
233  _from,
234  _to,
235  set_op<T,T>(),
236  tag,
237  row_ownership().comm());
238 
239  csr_amux (_ext.begin(), _ext.end(), _buffer.begin(), set_add_op<T,T>(), y.begin());
240 }
241 template<class T>
242 void
244  const vec<T,distributed>& x,
246  const
247 {
248  check_macro (x.size() == nrow(), "csr.trans_mult(vec): incompatible csr("<<nrow()<<","<<ncol()<<") and vec("<<x.size()<<")");
249 
250  _scatter_init_guard();
251 
252  y.resize (col_ownership());
253 
254  std::fill (y.begin(), y.end(), T(0));
256  base::begin(),
257  base::end(),
258  x.begin(),
259  set_add_op<T,T>(),
260  y.begin());
261 
262  std::fill (_buffer.begin(), _buffer.end(), T(0));
264  _ext.begin(),
265  _ext.end(),
266  x.begin(),
267  set_add_op<T,T>(),
268  _buffer.begin());
269 
272  _buffer.begin().operator->(),
273  y.begin().operator->(),
274  _to, // reverse mode
275  _from,
276  set_add_op<T,T>(), // +=
277  tag,
278  col_ownership().comm());
279 
281  _buffer.begin(),
282  y.begin(),
283  _to, // reverse mode
284  _from,
285  set_add_op<T,T>(), // +=
286  tag,
287  col_ownership().comm());
288 }
289 template<class T>
292 {
293  base::operator*= (lambda);
294  _ext.operator*= (lambda);
295  return *this;
296 }
297 template<class T, class BinaryOp>
298 void
300  const csr_rep<T,sequential>& a, const std::vector<typename csr<T>::size_type>& jext_a2dis_j,
301  const csr_rep<T,sequential>& b, const std::vector<typename csr<T>::size_type>& jext_b2dis_j,
302  csr_rep<T,sequential>& c, std::vector<typename csr<T>::size_type>& jext_c2dis_j,
303  BinaryOp binop)
304 {
306  typedef typename csr_rep<T,distributed>::iterator iterator;
307  typedef typename csr_rep<T,distributed>::const_iterator const_iterator;
308  typedef typename csr_rep<T,distributed>::data_iterator data_iterator;
309  typedef typename csr_rep<T,distributed>::const_data_iterator const_data_iterator;
310  typedef std::pair<size_type,T> pair_type;
311  size_type nnz_ext_c = 0;
312  const size_type infty = std::numeric_limits<size_type>::max();
313  const_iterator ia = a.begin();
314  const_iterator ib = b.begin();
315  std::set<size_type> jext_c_set;
316  for (size_type i = 0, n = a.nrow(); i < n; i++) {
317  for (const_data_iterator iter_jva = ia[i], last_jva = ia[i+1],
318  iter_jvb = ib[i], last_jvb = ib[i+1];
319  iter_jva != last_jva || iter_jvb != last_jvb; ) {
320 
321  size_type dis_ja = iter_jva == last_jva ? infty : jext_a2dis_j [(*iter_jva).first];
322  size_type dis_jb = iter_jvb == last_jvb ? infty : jext_b2dis_j [(*iter_jvb).first];
323  if (dis_ja == dis_jb) {
324  jext_c_set.insert (dis_ja);
325  iter_jva++;
326  iter_jvb++;
327  } else if (dis_ja < dis_jb) {
328  jext_c_set.insert (dis_ja);
329  iter_jva++;
330  } else {
331  jext_c_set.insert (dis_jb);
332  iter_jvb++;
333  }
334  nnz_ext_c++;
335  }
336  }
337  c.resize (a.nrow(), b.ncol(), nnz_ext_c);
338  jext_c2dis_j.resize (jext_c_set.size());
339  std::copy (jext_c_set.begin(), jext_c_set.end(), jext_c2dis_j.begin());
341  op_dis_j2jext_c (jext_c2dis_j.begin(), jext_c2dis_j.end());
342  data_iterator iter_jvc = c._data.begin().operator->();
343  iterator ic = c.begin();
344  *ic++ = iter_jvc;
345  for (size_type i = 0, n = a.nrow(); i < n; i++) {
346  for (const_data_iterator iter_jva = ia[i], last_jva = ia[i+1],
347  iter_jvb = ib[i], last_jvb = ib[i+1];
348  iter_jva != last_jva || iter_jvb != last_jvb; ) {
349 
350  size_type dis_ja = iter_jva == last_jva ? infty : jext_a2dis_j [(*iter_jva).first];
351  size_type dis_jb = iter_jvb == last_jvb ? infty : jext_b2dis_j [(*iter_jvb).first];
352  if (dis_ja == dis_jb) {
353  *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_ja, binop((*iter_jva).second, (*iter_jvb).second)));
354  iter_jva++;
355  iter_jvb++;
356  } else if (dis_ja < dis_jb) {
357  *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_ja, binop((*iter_jva).second, T(0))));
358  iter_jva++;
359  } else {
360  *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_jb, binop(T(0),(*iter_jvb).second)));
361  iter_jvb++;
362  }
363  }
364  *ic++ = iter_jvc;
365  }
366 }
367 template<class T>
368 template<class BinaryOp>
369 void
371  const csr_rep<T,distributed>& a,
372  const csr_rep<T,distributed>& b,
373  BinaryOp binop)
374 {
375  check_macro (a.dis_nrow() == b.dis_nrow() && a.dis_ncol() == b.dis_ncol(),
376  "a+b: invalid matrix a("<<a.dis_nrow()<<","<<a.dis_ncol()<<") and b("
377  <<b.dis_nrow()<<","<<b.dis_ncol()<<")");
378  check_macro (a.nrow() == b.nrow() && a.ncol() == b.ncol(),
379  "a+b: matrix local distribution mismatch: a("<<a.nrow()<<","<<a.ncol()<<") and b("
380  <<b.nrow()<<","<<b.ncol()<<")");
381 
382  base::assign_add (a, b, binop);
383 
384  csr_ext_add (
385  a._ext, a._jext2dis_j,
386  b._ext, b._jext2dis_j,
387  _ext, _jext2dis_j,
388  binop);
389 
390  _dis_nnz = mpi::all_reduce (comm(), nnz() + _ext.nnz(), std::plus<size_type>());
391  _dis_ext_nnz = mpi::all_reduce (comm(), _ext.nnz(), std::plus<size_type>());
392  _scatter_initialized = false;
393 
394 #ifdef TO_CLEAN
395  vector<size_type> id(_jext2dis_j.size());
396  for (size_type i = 0; i < id.size(); i++) id[i] = i;
398 
399  _buffer.resize (_jext2dis_j.size());
401  _jext2dis_j.size(),
402  _jext2dis_j.begin().operator->(),
403  id.size(),
404  id.begin().operator->(),
405  dis_ncol(),
406  col_ownership().begin().operator->(),
407  tag,
408  row_ownership().comm(),
409  _from,
410  _to);
411 #endif // TO_CLEAN
412 }
413 template<class T>
414 void
416 {
417  asr<T> b_ext (col_ownership(), row_ownership());
418  size_type first_i = row_ownership().first_index();
419  const_iterator ext_ia = ext_begin();
420  for (size_type i = 0, n = nrow(); i < n; i++) {
421  size_type dis_i = first_i + i;
422  for (const_data_iterator p = ext_ia[i], last_p = ext_ia[i+1]; p < last_p; p++) {
423  size_type dis_j = jext2dis_j ((*p).first);
424  const T& val = (*p).second;
425  b_ext.dis_entry (dis_j, dis_i) += val;
426  }
427  }
428  b_ext.dis_entry_assembly();
429  b.build_from_asr (b_ext);
430  base::build_transpose (b);
431  b._dis_nnz = mpi::all_reduce (comm(), b.nnz() + b._ext.nnz(), std::plus<size_type>());
432  b._dis_ext_nnz = mpi::all_reduce (comm(), b._ext.nnz(), std::plus<size_type>());
433  b._scatter_initialized = false;
434 }
435 template<class T>
436 void
438 {
439  if (dis_nrow() != dis_ncol()) {
440  set_symmetry(false);
441  return;
442  }
444  build_transpose (at);
445  d.assign_add (*this, at, std::minus<T>());
446  set_symmetry (d.max_abs() <= tol);
447 }
448 template<class T>
449 void
451  const csr_rep<T,distributed>& a,
452  const csr_rep<T,distributed>& b)
453 {
455  "incompatible csr([0:"<<a.nrow()<<"|"<<a.dis_nrow()<<"[x"
456  <<"[0:"<<a.ncol()<<"|"<<a.dis_ncol()<<"[)"
457  "*csr([0:"<<b.nrow()<<"|"<<b.dis_nrow()<<"[x"
458  <<"[0:"<<b.ncol()<<"|"<<b.dis_ncol()<<"[)");
459  size_type a_dia_ncol = a.col_ownership().size();
460  size_type a_ext_ncol = a._jext2dis_j.size(); // number of external columns; compressed
461  size_type first_a_dis_j = a.col_ownership().first_index();
462  std::map<size_type,size_type> a_dis_j2a_zip_j;
463  size_type a_zip_j = 0;
464  for (size_type jext = 0; jext < a_ext_ncol && a._jext2dis_j [jext] < first_a_dis_j; jext++, a_zip_j++) {
465  // row < local row index
466  size_type dis_j = a._jext2dis_j [jext];
467  a_dis_j2a_zip_j [dis_j] = a_zip_j;
468  }
469  size_type jext_up = a_zip_j;
470  for (size_type j = 0; j < a_dia_ncol; j++, a_zip_j++) {
471  size_type dis_j = first_a_dis_j + j;
472  a_dis_j2a_zip_j [dis_j] = a_zip_j;
473  }
474  for (size_type jext = jext_up; jext < a_ext_ncol; jext++, a_zip_j++) {
475  // row > local row index
476  size_type dis_j = a._jext2dis_j [jext];
477  a_dis_j2a_zip_j [dis_j] = a_zip_j;
478  }
479  size_type a_zip_ncol = a_dia_ncol + a_ext_ncol;
480  typedef std::allocator<T> A; // TODO: use heap_alloc for asr
481  A alloc;
482  asr<T,distributed,A> b_asr (b, alloc);
483  b_asr.set_dis_indexes (a._jext2dis_j);
484  distributor a_zip_col_ownership (distributor::decide, b.comm(), a_zip_ncol);
485  distributor b_zip_row_ownership = a_zip_col_ownership;
486  asr<T,sequential,A> b_asr_zip (b_zip_row_ownership, b.col_ownership());
487  size_type first_dis_j = b.row_ownership().first_index();
488  size_type j = 0;
490  iter = b_asr.begin(),
491  last = b_asr.end(); iter != last; ++iter, ++j) {
492  typedef typename asr<T,distributed,A>::row_type row_type;
493  size_type dis_j = first_dis_j + j;
494  size_type zip_j = a_dis_j2a_zip_j [dis_j];
495  const row_type& row = *iter;
496  for (typename row_type::const_iterator
497  row_iter = row.begin(),
498  row_last = row.end(); row_iter != row_last; ++row_iter) {
499  size_type dis_k = (*row_iter).first;
500  const T& value = (*row_iter).second;
501  b_asr_zip.semi_dis_entry (zip_j, dis_k) = value;
502  }
503  }
504  typedef typename asr<T,distributed,A>::scatter_map_type ext_row_type;
505  const ext_row_type& b_ext_row_map = b_asr.get_dis_map_entries();
506  for (typename ext_row_type::const_iterator
507  iter = b_ext_row_map.begin(),
508  last = b_ext_row_map.end(); iter != last; ++iter) {
509  typedef typename asr<T,distributed,A>::row_type row_type;
510  size_type dis_j = (*iter).first;
511  size_type zip_j = a_dis_j2a_zip_j [dis_j];
512  const row_type& row = (*iter).second;
513  for (typename row_type::const_iterator
514  row_iter = row.begin(),
515  row_last = row.end(); row_iter != row_last; ++row_iter) {
516  size_type dis_k = (*row_iter).first;
517  const T& value = (*row_iter).second;
518  b_asr_zip.semi_dis_entry (zip_j, dis_k) = value;
519  }
520  }
521  b_asr_zip.dis_entry_assembly(); // recount nnz
522  csr<T,sequential> b_zip (b_asr_zip);
523  asr<T,sequential> a_asr_zip (a.row_ownership(), a_zip_col_ownership);
524  size_type first_dis_i = a.row_ownership().first_index();
525  const_iterator dia_ia = a.begin();
526  const_iterator ext_ia = a.ext_begin();
527  for (size_type i = 0, n = a.nrow(); i < n; ++i) {
528  size_type dis_i = first_dis_i + i;
529  for (const_data_iterator p = dia_ia[i], last_p = dia_ia[i+1]; p != last_p; ++p) {
530  size_type j = (*p).first;
531  const T& value = (*p).second;
532  size_type dis_j = first_dis_j + j;
533  size_type zip_j = a_dis_j2a_zip_j [dis_j];
534  a_asr_zip.semi_dis_entry (i, zip_j) += value;
535  }
536  if (a.ext_nnz() == 0) continue;
537  for (const_data_iterator p = ext_ia[i], last_p = ext_ia[i+1]; p != last_p; ++p) {
538  size_type jext = (*p).first;
539  const T& value = (*p).second;
540  size_type dis_j = a._jext2dis_j [jext];
541  size_type zip_j = a_dis_j2a_zip_j [dis_j];
542  a_asr_zip.semi_dis_entry (i, zip_j) += value;
543  }
544  }
545  a_asr_zip.dis_entry_assembly();
546  csr<T,sequential> a_zip (a_asr_zip);
547  csr<T,sequential> c_zip = a_zip*b_zip;
548  asr<T,distributed> c_asr_zip (a.row_ownership(), b.col_ownership());
549  const_iterator ic = c_zip.begin();
550  for (size_type i = 0, n = c_zip.nrow(); i < n; ++i) {
551  size_type dis_i = first_dis_i + i;
552  for (const_data_iterator p = ic[i], last_p = ic[i+1]; p != last_p; ++p) {
553  size_type dis_k = (*p).first;
554  const T& value = (*p).second;
555  c_asr_zip.semi_dis_entry (i, dis_k) += value;
556  }
557  }
558  c_asr_zip.dis_entry_assembly();
559  build_from_asr (c_asr_zip);
560 }
561 #define _RHEOLEF_istanciate(T) \
562 template class csr_rep<T,distributed>; \
563 template void csr_rep<T,distributed>::assign_add ( \
564  const csr_rep<T,distributed>&, \
565  const csr_rep<T,distributed>&, \
566  std::plus<T>); \
567 template void csr_rep<T,distributed>::assign_add ( \
568  const csr_rep<T,distributed>&, \
569  const csr_rep<T,distributed>&, \
570  std::minus<T>); \
571 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,std::allocator<T> >&); \
572 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,heap_allocator<T> >&);
573 
575 
576 } // namespace rheolef
577 # endif // _RHEOLEF_HAVE_MPI
578