|
Blender
V2.59
|
00001 00005 /* 00006 * -- SuperLU routine (version 3.0) -- 00007 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00008 * and Lawrence Berkeley National Lab. 00009 * October 15, 2003 00010 * 00011 */ 00012 /* 00013 Copyright (c) 1994 by Xerox Corporation. All rights reserved. 00014 00015 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 00016 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 00017 00018 Permission is hereby granted to use or copy this program for any 00019 purpose, provided the above notices are retained on all copies. 00020 Permission to modify the code and to distribute modified code is 00021 granted, provided the above notices are retained, and a notice that 00022 the code was modified is included with the above copyright notice. 00023 */ 00024 00025 #include <math.h> 00026 #include "ssp_defs.h" 00027 00028 /* prototypes */ 00029 void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu); 00030 void scheck_tempv(int n, float *tempv); 00031 void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,float , float , 00032 float *, float *, char *, SuperLUStat_t *); 00033 int print_float_vec(char *what, int n, float *vec); 00034 /* ********** */ 00035 00036 void 00037 sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 00038 float *nzval, int *rowind, int *colptr, 00039 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00040 { 00041 NCformat *Astore; 00042 00043 A->Stype = stype; 00044 A->Dtype = dtype; 00045 A->Mtype = mtype; 00046 A->nrow = m; 00047 A->ncol = n; 00048 A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); 00049 if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); 00050 Astore = A->Store; 00051 Astore->nnz = nnz; 00052 Astore->nzval = nzval; 00053 Astore->rowind = rowind; 00054 Astore->colptr = colptr; 00055 } 00056 00057 void 00058 sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 00059 float *nzval, int *colind, int *rowptr, 00060 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00061 { 00062 NRformat *Astore; 00063 00064 A->Stype = stype; 00065 A->Dtype = dtype; 00066 A->Mtype = mtype; 00067 A->nrow = m; 00068 A->ncol = n; 00069 A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) ); 00070 if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); 00071 Astore = A->Store; 00072 Astore->nnz = nnz; 00073 Astore->nzval = nzval; 00074 Astore->colind = colind; 00075 Astore->rowptr = rowptr; 00076 } 00077 00078 /* Copy matrix A into matrix B. */ 00079 void 00080 sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) 00081 { 00082 NCformat *Astore, *Bstore; 00083 int ncol, nnz, i; 00084 00085 B->Stype = A->Stype; 00086 B->Dtype = A->Dtype; 00087 B->Mtype = A->Mtype; 00088 B->nrow = A->nrow;; 00089 B->ncol = ncol = A->ncol; 00090 Astore = (NCformat *) A->Store; 00091 Bstore = (NCformat *) B->Store; 00092 Bstore->nnz = nnz = Astore->nnz; 00093 for (i = 0; i < nnz; ++i) 00094 ((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i]; 00095 for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i]; 00096 for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i]; 00097 } 00098 00099 00100 void 00101 sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx, 00102 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00103 { 00104 DNformat *Xstore; 00105 00106 X->Stype = stype; 00107 X->Dtype = dtype; 00108 X->Mtype = mtype; 00109 X->nrow = m; 00110 X->ncol = n; 00111 X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); 00112 if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store"); 00113 Xstore = (DNformat *) X->Store; 00114 Xstore->lda = ldx; 00115 Xstore->nzval = (float *) x; 00116 } 00117 00118 void 00119 sCopy_Dense_Matrix(int M, int N, float *X, int ldx, 00120 float *Y, int ldy) 00121 { 00122 /* 00123 * 00124 * Purpose 00125 * ======= 00126 * 00127 * Copies a two-dimensional matrix X to another matrix Y. 00128 */ 00129 int i, j; 00130 00131 for (j = 0; j < N; ++j) 00132 for (i = 0; i < M; ++i) 00133 Y[i + j*ldy] = X[i + j*ldx]; 00134 } 00135 00136 void 00137 sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 00138 float *nzval, int *nzval_colptr, int *rowind, 00139 int *rowind_colptr, int *col_to_sup, int *sup_to_col, 00140 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00141 { 00142 SCformat *Lstore; 00143 00144 L->Stype = stype; 00145 L->Dtype = dtype; 00146 L->Mtype = mtype; 00147 L->nrow = m; 00148 L->ncol = n; 00149 L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); 00150 if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store"); 00151 Lstore = L->Store; 00152 Lstore->nnz = nnz; 00153 Lstore->nsuper = col_to_sup[n]; 00154 Lstore->nzval = nzval; 00155 Lstore->nzval_colptr = nzval_colptr; 00156 Lstore->rowind = rowind; 00157 Lstore->rowind_colptr = rowind_colptr; 00158 Lstore->col_to_sup = col_to_sup; 00159 Lstore->sup_to_col = sup_to_col; 00160 00161 } 00162 00163 00164 /* 00165 * Convert a row compressed storage into a column compressed storage. 00166 */ 00167 void 00168 sCompRow_to_CompCol(int m, int n, int nnz, 00169 float *a, int *colind, int *rowptr, 00170 float **at, int **rowind, int **colptr) 00171 { 00172 register int i, j, col, relpos; 00173 int *marker; 00174 00175 /* Allocate storage for another copy of the matrix. */ 00176 *at = (float *) floatMalloc(nnz); 00177 *rowind = (int *) intMalloc(nnz); 00178 *colptr = (int *) intMalloc(n+1); 00179 marker = (int *) intCalloc(n); 00180 00181 /* Get counts of each column of A, and set up column pointers */ 00182 for (i = 0; i < m; ++i) 00183 for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]]; 00184 (*colptr)[0] = 0; 00185 for (j = 0; j < n; ++j) { 00186 (*colptr)[j+1] = (*colptr)[j] + marker[j]; 00187 marker[j] = (*colptr)[j]; 00188 } 00189 00190 /* Transfer the matrix into the compressed column storage. */ 00191 for (i = 0; i < m; ++i) { 00192 for (j = rowptr[i]; j < rowptr[i+1]; ++j) { 00193 col = colind[j]; 00194 relpos = marker[col]; 00195 (*rowind)[relpos] = i; 00196 (*at)[relpos] = a[j]; 00197 ++marker[col]; 00198 } 00199 } 00200 00201 SUPERLU_FREE(marker); 00202 } 00203 00204 00205 void 00206 sPrint_CompCol_Matrix(char *what, SuperMatrix *A) 00207 { 00208 NCformat *Astore; 00209 register int i,n; 00210 float *dp; 00211 00212 printf("\nCompCol matrix %s:\n", what); 00213 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); 00214 n = A->ncol; 00215 Astore = (NCformat *) A->Store; 00216 dp = (float *) Astore->nzval; 00217 printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz); 00218 printf("nzval: "); 00219 for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]); 00220 printf("\nrowind: "); 00221 for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]); 00222 printf("\ncolptr: "); 00223 for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]); 00224 printf("\n"); 00225 fflush(stdout); 00226 } 00227 00228 void 00229 sPrint_SuperNode_Matrix(char *what, SuperMatrix *A) 00230 { 00231 SCformat *Astore; 00232 register int i, j, k, c, d, n, nsup; 00233 float *dp; 00234 int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr; 00235 00236 printf("\nSuperNode matrix %s:\n", what); 00237 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); 00238 n = A->ncol; 00239 Astore = (SCformat *) A->Store; 00240 dp = (float *) Astore->nzval; 00241 col_to_sup = Astore->col_to_sup; 00242 sup_to_col = Astore->sup_to_col; 00243 rowind_colptr = Astore->rowind_colptr; 00244 rowind = Astore->rowind; 00245 printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 00246 A->nrow,A->ncol,Astore->nnz,Astore->nsuper); 00247 printf("nzval:\n"); 00248 for (k = 0; k <= Astore->nsuper; ++k) { 00249 c = sup_to_col[k]; 00250 nsup = sup_to_col[k+1] - c; 00251 for (j = c; j < c + nsup; ++j) { 00252 d = Astore->nzval_colptr[j]; 00253 for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) { 00254 printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]); 00255 } 00256 } 00257 } 00258 #if 0 00259 for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]); 00260 #endif 00261 printf("\nnzval_colptr: "); 00262 for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]); 00263 printf("\nrowind: "); 00264 for (i = 0; i < Astore->rowind_colptr[n]; ++i) 00265 printf("%d ", Astore->rowind[i]); 00266 printf("\nrowind_colptr: "); 00267 for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]); 00268 printf("\ncol_to_sup: "); 00269 for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]); 00270 printf("\nsup_to_col: "); 00271 for (i = 0; i <= Astore->nsuper+1; ++i) 00272 printf("%d ", sup_to_col[i]); 00273 printf("\n"); 00274 fflush(stdout); 00275 } 00276 00277 void 00278 sPrint_Dense_Matrix(char *what, SuperMatrix *A) 00279 { 00280 DNformat *Astore; 00281 register int i; 00282 float *dp; 00283 00284 printf("\nDense matrix %s:\n", what); 00285 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); 00286 Astore = (DNformat *) A->Store; 00287 dp = (float *) Astore->nzval; 00288 printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); 00289 printf("\nnzval: "); 00290 for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); 00291 printf("\n"); 00292 fflush(stdout); 00293 } 00294 00295 /* 00296 * Diagnostic print of column "jcol" in the U/L factor. 00297 */ 00298 void 00299 sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) 00300 { 00301 int i, k, fsupc; 00302 int *xsup, *supno; 00303 int *xlsub, *lsub; 00304 float *lusup; 00305 int *xlusup; 00306 float *ucol; 00307 int *usub, *xusub; 00308 00309 xsup = Glu->xsup; 00310 supno = Glu->supno; 00311 lsub = Glu->lsub; 00312 xlsub = Glu->xlsub; 00313 lusup = Glu->lusup; 00314 xlusup = Glu->xlusup; 00315 ucol = Glu->ucol; 00316 usub = Glu->usub; 00317 xusub = Glu->xusub; 00318 00319 printf("%s", msg); 00320 printf("col %d: pivrow %d, supno %d, xprune %d\n", 00321 jcol, pivrow, supno[jcol], xprune[jcol]); 00322 00323 printf("\tU-col:\n"); 00324 for (i = xusub[jcol]; i < xusub[jcol+1]; i++) 00325 printf("\t%d%10.4f\n", usub[i], ucol[i]); 00326 printf("\tL-col in rectangular snode:\n"); 00327 fsupc = xsup[supno[jcol]]; /* first col of the snode */ 00328 i = xlsub[fsupc]; 00329 k = xlusup[jcol]; 00330 while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) { 00331 printf("\t%d\t%10.4f\n", lsub[i], lusup[k]); 00332 i++; k++; 00333 } 00334 fflush(stdout); 00335 } 00336 00337 00338 /* 00339 * Check whether tempv[] == 0. This should be true before and after 00340 * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 00341 */ 00342 void scheck_tempv(int n, float *tempv) 00343 { 00344 int i; 00345 00346 for (i = 0; i < n; i++) { 00347 if (tempv[i] != 0.0) 00348 { 00349 fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]); 00350 ABORT("scheck_tempv"); 00351 } 00352 } 00353 } 00354 00355 00356 void 00357 sGenXtrue(int n, int nrhs, float *x, int ldx) 00358 { 00359 int i, j; 00360 for (j = 0; j < nrhs; ++j) 00361 for (i = 0; i < n; ++i) { 00362 x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/ 00363 } 00364 } 00365 00366 /* 00367 * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's 00368 */ 00369 void 00370 sFillRHS(trans_t trans, int nrhs, float *x, int ldx, 00371 SuperMatrix *A, SuperMatrix *B) 00372 { 00373 NCformat *Astore; 00374 float *Aval; 00375 DNformat *Bstore; 00376 float *rhs; 00377 float one = 1.0; 00378 float zero = 0.0; 00379 int ldc; 00380 char transc[1]; 00381 00382 Astore = A->Store; 00383 Aval = (float *) Astore->nzval; 00384 Bstore = B->Store; 00385 rhs = Bstore->nzval; 00386 ldc = Bstore->lda; 00387 00388 if ( trans == NOTRANS ) *(unsigned char *)transc = 'N'; 00389 else *(unsigned char *)transc = 'T'; 00390 00391 sp_sgemm(transc, nrhs, one, A, 00392 x, ldx, zero, rhs, ldc); 00393 00394 } 00395 00396 /* 00397 * Fills a float precision array with a given value. 00398 */ 00399 void 00400 sfill(float *a, int alen, float dval) 00401 { 00402 register int i; 00403 for (i = 0; i < alen; i++) a[i] = dval; 00404 } 00405 00406 00407 00408 /* 00409 * Check the inf-norm of the error vector 00410 */ 00411 void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue) 00412 { 00413 DNformat *Xstore; 00414 float err, xnorm; 00415 float *Xmat, *soln_work; 00416 int i, j; 00417 00418 Xstore = X->Store; 00419 Xmat = Xstore->nzval; 00420 00421 for (j = 0; j < nrhs; j++) { 00422 soln_work = &Xmat[j*Xstore->lda]; 00423 err = xnorm = 0.0; 00424 for (i = 0; i < X->nrow; i++) { 00425 err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i])); 00426 xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i])); 00427 } 00428 err = err / xnorm; 00429 printf("||X - Xtrue||/||X|| = %e\n", err); 00430 } 00431 } 00432 00433 00434 00435 /* Print performance of the code. */ 00436 void 00437 sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, 00438 float rpg, float rcond, float *ferr, 00439 float *berr, char *equed, SuperLUStat_t *stat) 00440 { 00441 SCformat *Lstore; 00442 NCformat *Ustore; 00443 double *utime; 00444 flops_t *ops; 00445 00446 utime = stat->utime; 00447 ops = stat->ops; 00448 00449 if ( utime[FACT] != 0. ) 00450 printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], 00451 ops[FACT]*1e-6/utime[FACT]); 00452 printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]); 00453 if ( utime[SOLVE] != 0. ) 00454 printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE], 00455 ops[SOLVE]*1e-6/utime[SOLVE]); 00456 00457 Lstore = (SCformat *) L->Store; 00458 Ustore = (NCformat *) U->Store; 00459 printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz); 00460 printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); 00461 printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); 00462 00463 printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", 00464 mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, 00465 mem_usage->expansions); 00466 00467 printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); 00468 printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", 00469 utime[FACT], ops[FACT]*1e-6/utime[FACT], 00470 utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE], 00471 utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]); 00472 00473 printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n"); 00474 printf("NUM:\t%e\t%e\t%e\t%e\t%s\n", 00475 rpg, rcond, ferr[0], berr[0], equed); 00476 00477 } 00478 00479 00480 00481 00482 int print_float_vec(char *what, int n, float *vec) 00483 { 00484 int i; 00485 printf("%s: n %d\n", what, n); 00486 for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]); 00487 return 0; 00488 } 00489