MBDyn-1.7.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups
parsuperluwrap.cc
Go to the documentation of this file.
1 /* $Header: /var/cvs/mbdyn/mbdyn/mbdyn-1.0/libraries/libmbwrap/parsuperluwrap.cc,v 1.37 2017/01/12 14:44:25 masarati Exp $ */
2 /*
3  * MBDyn (C) is a multibody analysis code.
4  * http://www.mbdyn.org
5  *
6  * Copyright (C) 1996-2017
7  *
8  * Pierangelo Masarati <masarati@aero.polimi.it>
9  * Paolo Mantegazza <mantegazza@aero.polimi.it>
10  *
11  * Dipartimento di Ingegneria Aerospaziale - Politecnico di Milano
12  * via La Masa, 34 - 20156 Milano, Italy
13  * http://www.aero.polimi.it
14  *
15  * Changing this copyright notice is forbidden.
16  *
17  * This program is free software; you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation (version 2 of the License).
20  *
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */
31 
32 /*****************************************************************************
33  * *
34  * SuperLU C++ WRAPPER *
35  * *
36  *****************************************************************************/
37 
38 #include "mbconfig.h" /* This goes first in every *.c,*.cc file */
39 
40 #ifdef USE_SUPERLU_MT
41 
42 #include <sys/types.h>
43 #include <unistd.h>
44 #include <signal.h>
45 
46 #include "spmh.h"
47 #include "spmapmh.h"
48 #include "dirccmh.h"
49 #include "ccmh.h"
50 
51 extern "C" {
52 #include <pdsp_defs.h>
53 #include <util.h>
54 
55 extern void *pdgstrf_thread(void *);
56 extern void pdgstrf_finalize(pdgstrf_options_t *, SuperMatrix*);
57 }
58 
59 #include "parsuperluwrap.h"
60 
61 
62 struct ParSuperLUSolverData {
63  SuperMatrix A,
64  AC,
65  L,
66  U,
67  B;
68  Gstat_t Gstat;
69  std::vector<int> perm_c, /* column permutation vector */
70  perm_r; /* row permutations from partial pivoting */
71  pdgstrf_options_t pdgstrf_options;
72  pxgstrf_shared_t pxgstrf_shared;
73  pdgstrf_threadarg_t *pdgstrf_threadarg;
74 };
75 
76 /* ParSuperLUSolver - begin */
77 
78 /* Costruttore: si limita ad allocare la memoria */
79 ParSuperLUSolver::ParSuperLUSolver(unsigned nt, integer iMatOrd,
80  const doublereal &dPivot)
81 : Aip(0),
82 App(0),
83 Axp(0),
84 iN(iMatOrd),
85 iNonZeroes(0),
86 dPivotFactor(dPivot),
87 bFirstSol(true),
88 bRegenerateMatrix(true),
89 sld(0),
90 nThreads(nt),
91 thread_data(0)
92 {
93  ASSERT(iN > 0);
94 
95  SAFENEW(sld, ParSuperLUSolverData);
96 
97  /*
98  * This means it's the first run
99  * FIXME: create a dependence on the library's internals
100  */
101  sld->A.Store = NULL;
102 
103  pthread_mutex_init(&thread_mutex, NULL);
104  pthread_cond_init(&thread_cond, NULL);
105 
106  SAFENEWARRNOFILL(thread_data, thread_data_t, nThreads);
107 
108  for (unsigned t = 0; t < nThreads; t++) {
109  thread_data[t].pSLUS = this;
110  thread_data[t].threadNumber = t;
111 
112  if (t == 0) {
113  continue;
114  }
115 
116  sem_init(&thread_data[t].sem, 0, 0);
117  if (pthread_create(&thread_data[t].thread, NULL, thread_op, &thread_data[t]) != 0) {
118  silent_cerr("ParSuperLUSolver: pthread_create() failed "
119  "for thread " << t
120  << " of " << nThreads << std::endl);
122  }
123  }
124 }
125 
126 /* Distruttore */
127 ParSuperLUSolver::~ParSuperLUSolver(void)
128 {
129 #if 0
130  /* ------------------------------------------------------------
131  * Clean up and free storage after multithreaded factorization.
132  * ------------------------------------------------------------*/
133  pdgstrf_thread_finalize(sld->pdgstrf_threadarg, &sld->pxgstrf_shared,
134  &sld->A, &sld->perm_r[0], &sld->L, &sld->U);
135 
136  /* ------------------------------------------------------------
137  * Deallocate storage after factorization.
138  * ------------------------------------------------------------*/
139  pdgstrf_finalize(&sld->pdgstrf_options, &sld->AC);
140 #endif
141 
142  thread_operation = ParSuperLUSolver::EXIT;
143  thread_count = nThreads - 1;
144 
145  for (unsigned i = 1; i < nThreads; i++) {
146  sem_post(&thread_data[i].sem);
147  }
148 
149  /* thread cleanup func? */
150 
151  pthread_mutex_lock(&thread_mutex);
152  if (thread_count > 0) {
153  pthread_cond_wait(&thread_cond, &thread_mutex);
154  }
155  pthread_mutex_unlock(&thread_mutex);
156 
157  for (unsigned i = 1; i < nThreads; i++) {
158  sem_destroy(&thread_data[i].sem);
159  }
160 
161  pthread_mutex_destroy(&thread_mutex);
162  pthread_cond_destroy(&thread_cond);
163 
164  /* other cleanup... */
165 }
166 
167 void *
168 ParSuperLUSolver::thread_op(void *arg)
169 {
170  thread_data_t *td = (thread_data_t *)arg;
171 
172  silent_cout("ParSuperLUSolver: thread " << td->threadNumber
173  << " [self=" << pthread_self()
174  << ",pid=" << getpid() << "]"
175  << " starting..." << std::endl);
176 
177  /* deal with signals ... */
178  sigset_t newset /* , oldset */ ;
179  sigemptyset(&newset);
180  sigaddset(&newset, SIGTERM);
181  sigaddset(&newset, SIGINT);
182  sigaddset(&newset, SIGHUP);
183  pthread_sigmask(SIG_BLOCK, &newset, /* &oldset */ NULL);
184 
185  bool bKeepGoing(true);
186 
187  while (bKeepGoing) {
188  sem_wait(&td->sem);
189 
190  switch (td->pSLUS->thread_operation) {
191  case ParSuperLUSolver::FACTOR:
192  (void)pdgstrf_thread(td->pdgstrf_threadarg);
193  break;
194 
195  case ParSuperLUSolver::EXIT:
196  bKeepGoing = false;
197  break;
198 
199  default:
200  silent_cerr("ParSuperLUSolver: unhandled op"
201  << std::endl);
203  }
204 
205  td->pSLUS->EndOfOp();
206  }
207 
208  pthread_exit(NULL);
209 }
210 
211 void
212 ParSuperLUSolver::EndOfOp(void)
213 {
214  bool last;
215 
216  /* decrement the thread counter */
217  pthread_mutex_lock(&thread_mutex);
218  thread_count--;
219  last = (thread_count == 0);
220 
221  /* if last thread, signal to restart */
222  if (last) {
223 #if 0
224  pthread_cond_broadcast(&thread_cond);
225 #else
226  pthread_cond_signal(&thread_cond);
227 #endif
228  }
229  pthread_mutex_unlock(&thread_mutex);
230 }
231 
232 #ifdef DEBUG
233 void
234 ParSuperLUSolver::IsValid(void) const
235 {
236  ASSERT(Aip != NULL);
237  ASSERT(App != NULL);
238  ASSERT(Axp != NULL);
239  ASSERT(iN > 0);
240 }
241 #endif /* DEBUG */
242 
243 /* Fattorizza la matrice */
244 void
245 ParSuperLUSolver::Factor(void)
246 {
247 #ifdef DEBUG
248  IsValid();
249 #endif /* DEBUG */
250 
251  ASSERT(iNonZeroes > 0);
252 
253  yes_no_t refact = YES,
254  usepr = NO;
255  doublereal u = dPivotFactor,
256  drop_tol = 0.0;
257  void *work = NULL;
258  int info = 0, lwork = 0;
259  int panel_size = sp_ienv(1),
260  relax = sp_ienv(2);
261 
262  if (bRegenerateMatrix) {
263  refact = NO;
264 
265  /* NOTE: we could use sld->A.Store == NULL */
266  if (bFirstSol) {
267  ASSERT(Astore == NULL);
268 
269  /* ---------------------------------------------------
270  * Allocate storage and initialize statistics variables.
271  * ---------------------------------------------------*/
272  /* Set up the dense matrix data structure for B. */
273  dCreate_Dense_Matrix(&sld->B, iN, 1,
275  iN, DN, _D, GE);
276 
277  /* Set up the sparse matrix data structure for A. */
278  dCreate_CompCol_Matrix(&sld->A, iN, iN, iNonZeroes,
279  Axp, Aip, App, NC, _D, GE);
280 
281  StatAlloc(iN, nThreads, panel_size, relax, &sld->Gstat);
282  StatInit(iN, nThreads, &sld->Gstat);
283 
284  sld->perm_c.resize(iN);
285  sld->perm_r.resize(iN);
286 
287  bFirstSol = false; /* never change this again */
288 
289  } else {
290  NCformat *Astore = (NCformat *) sld->A.Store;
291 
292  ASSERT(Astore);
293 
294  Astore->nnz = iNonZeroes;
295  Astore->nzval = Axp;
296  Astore->rowind = Aip;
297  Astore->colptr = App;
298  }
299 
300  /* --------------------------------------------------
301  * Get column permutation vector perm_c[], according
302  * to permc_spec:
303  * permc_spec = 0: use the natural ordering
304  * permc_spec = 1: use minimum degree ordering
305  * on structure of A'*A
306  * permc_spec = 2: use minimum degree ordering
307  * on structure of A'+A
308  * !!! permc_spec = 3: use approximate minimum
309  * degree column order !!!
310  * --------------------------------------------------*/
311 
312  /*
313  * According to Umfpack's use of AMD:
314  *
315  * symmetric matrix:
316  * AMD: A^T + A permutation
317  *
318  * Non symmetric matrix:
319  * COLAMD: A^T * A permutation
320  *
321  * so we use permc_spec = 1
322  */
323 
324  int permc_spec = 1;
325  int *pc = &(sld->perm_c[0]);
326  get_perm_c(permc_spec, &sld->A, pc);
327 
328  bRegenerateMatrix = false;
329  }
330 
331  int *pr = &(sld->perm_r[0]),
332  *pc = &(sld->perm_c[0]);
333 
334  /* ------------------------------------------------------------
335  * Initialize the option structure pdgstrf_options using the
336  * user-input parameters;
337  * Apply perm_c to the columns of original A to form AC.
338  * ------------------------------------------------------------*/
339  pdgstrf_init(nThreads, refact, panel_size, relax,
340  u, usepr, drop_tol, pc, pr,
341  work, lwork, &sld->A, &sld->AC,
342  &sld->pdgstrf_options, &sld->Gstat);
343 
344 
345  /* --------------------------------------------------------------
346  * Initializes the parallel data structures for pdgstrf_thread().
347  * --------------------------------------------------------------*/
348  sld->pdgstrf_threadarg = pdgstrf_thread_init(&sld->AC,
349  &sld->L, &sld->U, &sld->pdgstrf_options,
350  &sld->pxgstrf_shared, &sld->Gstat, &info);
351 
352  if (info != 0) {
353  silent_cerr("ParSuperLUSolver::Factor: pdgstrf_thread_init failed"
354  << std::endl);
355  }
356 
357  for (unsigned t = 0; t < nThreads; t++) {
358  thread_data[t].pdgstrf_threadarg = &sld->pdgstrf_threadarg[t];
359  }
360 
361  thread_operation = ParSuperLUSolver::FACTOR;
362  thread_count = nThreads - 1;
363 
364  for (unsigned t = 1; t < nThreads; t++) {
365  sem_post(&thread_data[t].sem);
366  }
367 
368  (void)pdgstrf_thread(thread_data[0].pdgstrf_threadarg);
369 
370  pthread_mutex_lock(&thread_mutex);
371  if (thread_count > 0) {
372  pthread_cond_wait(&thread_cond, &thread_mutex);
373  }
374  pthread_mutex_unlock(&thread_mutex);
375 
376  /* ------------------------------------------------------------
377  * Clean up and free storage after multithreaded factorization.
378  * ------------------------------------------------------------*/
379  pdgstrf_thread_finalize(sld->pdgstrf_threadarg, &sld->pxgstrf_shared,
380  &sld->A, pr, &sld->L, &sld->U);
381 }
382 
383 /* Risolve */
384 void
385 ParSuperLUSolver::Solve(void) const
386 {
387 #ifdef DEBUG
388  IsValid();
389 #endif /* DEBUG */
390 
391  if (bHasBeenReset) {
392  const_cast<ParSuperLUSolver *>(this)->Factor();
393  bHasBeenReset = false;
394  }
395 
396  /* ------------------------------------------------------------
397  * Solve the system A*X=B, overwriting B with X.
398  * ------------------------------------------------------------*/
399  trans_t trans = NOTRANS;
400  int info = 0;
401 
402  int *pr = &(sld->perm_r[0]),
403  *pc = &(sld->perm_c[0]);
404 
405  dgstrs(trans, &sld->L, &sld->U, pr, pc,
406  &sld->B, &sld->Gstat, &info);
407 }
408 
409 /* Index Form */
410 void
411 ParSuperLUSolver::MakeCompactForm(SparseMatrixHandler& mh,
412  std::vector<doublereal>& Ax,
413  std::vector<integer>& Ar, std::vector<integer>& Ac,
414  std::vector<integer>& Ap) const
415 {
416  /* no need to rebuild matrix */
417  if (!bHasBeenReset) {
418  return;
419  }
420 
421  iNonZeroes = mh.MakeCompressedColumnForm(Ax, Ar, Ap, 0);
422  ASSERT(iNonZeroes > 0);
423 
424  Axp = &Ax[0];
425  Aip = &Ar[0];
426  App = &Ap[0];
427 
428  /* rebuild matrix ... (CC is broken) */
429  bRegenerateMatrix = true;
430 
431 #if 0
432  Destroy_CompCol_Matrix(&sld->A);
433 #endif
434 }
435 
436 /* ParSuperLUSolver - end */
437 
438 
439 /* ParSuperLUSparseSolutionManager - begin: code */
440 
441 /* Costruttore */
442 ParSuperLUSparseSolutionManager::ParSuperLUSparseSolutionManager(unsigned nt,
443  integer iSize, const doublereal& dPivotFactor)
444 : iMatSize(iSize),
445 Ap(iSize + 1),
446 xb(iSize),
447 MH(iSize),
448 VH(iSize, &xb[0])
449 {
450  ASSERT(iSize > 0);
451  ASSERT((dPivotFactor >= 0.0) && (dPivotFactor <= 1.0));
452 
454  ParSuperLUSolver,
455  ParSuperLUSolver(nt, iMatSize, dPivotFactor));
456 
457  pLS->pdSetResVec(&(xb[0]));
458  pLS->pdSetSolVec(&(xb[0]));
459  pLS->SetSolutionManager(this);
460 
461 #ifdef DEBUG
462  IsValid();
463 #endif /* DEBUG */
464 }
465 
466 
467 /* Distruttore; verificare la distruzione degli oggetti piu' complessi */
468 ParSuperLUSparseSolutionManager::~ParSuperLUSparseSolutionManager(void)
469 {
470 #ifdef DEBUG
471  IsValid();
472 #endif /* DEBUG */
473 
474  /* Dealloca roba, tra cui i thread */
475 }
476 
477 #ifdef DEBUG
478 /* Test di validita' del manager */
479 void
480 ParSuperLUSparseSolutionManager::IsValid(void) const
481 {
482  ASSERT(iMatSize > 0);
483 
484 #ifdef DEBUG_MEMMANAGER
485  ASSERT(defaultMemoryManager.fIsPointerToBlock(VH.pdGetVec()));
486  ASSERT(defaultMemoryManager.fIsPointerToBlock(pLS));
487 #endif /* DEBUG_MEMMANAGER */
488 
489  ASSERT((VH.IsValid(), 1));
490  ASSERT((pLS->IsValid(), 1));
491 }
492 #endif /* DEBUG */
493 
494 /* Inizializza il gestore delle matrici */
495 void
496 ParSuperLUSparseSolutionManager::MatrReset(void)
497 {
498 #ifdef DEBUG
499  IsValid();
500 #endif /* DEBUG */
501 
502  pLS->Reset();
503 }
504 
505 void
506 ParSuperLUSparseSolutionManager::MakeCompressedColumnForm(void)
507 {
508 #ifdef DEBUG
509  IsValid();
510 #endif /* DEBUG */
511 
512  /* FIXME: move this to the matrix handler! */
513  pLS->MakeCompactForm(MH, Ax, Ai, Adummy, Ap);
514 }
515 
516 /* Risolve il problema */
517 void
518 ParSuperLUSparseSolutionManager::Solve(void)
519 {
520 #ifdef DEBUG
521  IsValid();
522 #endif /* DEBUG */
523 
524  /* FIXME: move this to the matrix handler! */
525  MakeCompressedColumnForm();
526 
527 #if 0
528  std::cerr << "### after MakeIndexForm:" << std::endl
529  << "{col Ap[col]}={" << std::endl;
530  for (unsigned i = 0; i < Ap.size(); i++) {
531  std::cerr << i << " " << Ap[i] << std::endl;
532  }
533  std::cerr << "}" << std::endl;
534 
535  std::cerr << "{idx Ai[idx] col Ax[idx]}={" << std::endl;
536  unsigned c = 0;
537  for (unsigned i = 0; i < Ax.size(); i++) {
538  std::cerr << i << " " << Ai[i] << " " << c << " " << Ax[i] << std::endl;
539  if (i == Ap[c]) {
540  c++;
541  }
542  }
543  std::cerr << "}" << std::endl;
544 #endif
545 
546  pLS->Solve();
547 
548 #if 0
549  std::cerr << "### after Solve:" << std::endl
550  << "{col Ap[col]}={" << std::endl;
551  for (unsigned i = 0; i < Ap.size(); i++) {
552  std::cerr << i << " " << Ap[i] << std::endl;
553  }
554  std::cerr << "}" << std::endl;
555 
556  std::cerr << "{idx Ai[idx] col Ax[idx]}={" << std::endl;
557  c = 0;
558  for (unsigned i = 0; i < Ax.size(); i++) {
559  std::cerr << i << " " << Ai[i] << " " << c << " " << Ax[i] << std::endl;
560  if (i == Ap[c]) {
561  c++;
562  }
563  }
564  std::cerr << "}" << std::endl;
565 #endif
566 }
567 
568 /* ParSuperLUSparseSolutionManager - end */
569 
570 /* ParSuperLUSparseCCSolutionManager - begin */
571 
572 template <class CC>
573 ParSuperLUSparseCCSolutionManager<CC>::ParSuperLUSparseCCSolutionManager(unsigned nt,
574  integer Dim, const doublereal &dPivot)
575 : ParSuperLUSparseSolutionManager(nt, Dim, dPivot),
576 CCReady(false),
577 Ac(0)
578 {
579  NO_OP;
580 }
581 
582 template <class CC>
583 ParSuperLUSparseCCSolutionManager<CC>::~ParSuperLUSparseCCSolutionManager(void)
584 {
585  if (Ac) {
586  SAFEDELETE(Ac);
587  }
588 }
589 
590 template <class CC>
591 void
592 ParSuperLUSparseCCSolutionManager<CC>::MatrReset(void)
593 {
594  pLS->Reset();
595 }
596 
597 /* Risolve il sistema Fattorizzazione + Backward Substitution */
598 template <class CC>
599 void
600 ParSuperLUSparseCCSolutionManager<CC>::MakeCompressedColumnForm(void)
601 {
602  if (!CCReady) {
603  pLS->MakeCompactForm(MH, Ax, Ai, Adummy, Ap);
604 
605  if (Ac == 0) {
606  SAFENEWWITHCONSTRUCTOR(Ac, CC, CC(Ax, Ai, Ap));
607  }
608 
609  CCReady = true;
610  }
611 }
612 
613 /* Inizializzatore "speciale" */
614 template <class CC>
615 void
616 ParSuperLUSparseCCSolutionManager<CC>::MatrInitialize()
617 {
618  CCReady = false;
619 
620  MatrReset();
621 }
622 
623 /* Rende disponibile l'handler per la matrice */
624 template <class CC>
626 ParSuperLUSparseCCSolutionManager<CC>::pMatHdl(void) const
627 {
628  if (!CCReady) {
629  return &MH;
630  }
631 
632  ASSERT(Ac != 0);
633  return Ac;
634 }
635 
636 template class ParSuperLUSparseCCSolutionManager<CColMatrixHandler<0> >;
637 template class ParSuperLUSparseCCSolutionManager<DirCColMatrixHandler<0> >;
638 
639 /* ParSuperLUSparseCCSolutionManager - end */
640 
641 #endif /* USE_SUPERLU_MT */
642 
virtual integer MakeCompressedColumnForm(doublereal *const Ax, integer *const Ai, integer *const Ap, int offset=0) const =0
#define MBDYN_EXCEPT_ARGS
Definition: except.h:63
#define NO_OP
Definition: myassert.h:74
#define SAFENEW(pnt, item)
Definition: mynewmem.h:695
Definition: mbdyn.h:76
Definition: mbdyn.h:77
#define ASSERT(expression)
Definition: colamd.c:977
#define SAFENEWWITHCONSTRUCTOR(pnt, item, constructor)
Definition: mynewmem.h:698
#define defaultMemoryManager
Definition: mynewmem.h:691
static std::stack< cleanup * > c
Definition: cleanup.cc:59
#define SAFENEWARRNOFILL(pnt, item, sz)
Definition: mynewmem.h:704
doublereal * pdRhs
Definition: ls.h:74
LinearSolver * pLS
Definition: solman.h:151
double doublereal
Definition: colamd.c:52
long int integer
Definition: colamd.c:51
#define SAFEDELETE(pnt)
Definition: mynewmem.h:710