Blender  V2.59
ssnode_bmod.c
Go to the documentation of this file.
00001 
00005 /*
00006  * -- SuperLU routine (version 3.0) --
00007  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00008  * and Lawrence Berkeley National Lab.
00009  * October 15, 2003
00010  *
00011  */
00012 /*
00013   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00014  
00015   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
00016   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00017  
00018   Permission is hereby granted to use or copy this program for any
00019   purpose, provided the above notices are retained on all copies.
00020   Permission to modify the code and to distribute modified code is
00021   granted, provided the above notices are retained, and a notice that
00022   the code was modified is included with the above copyright notice.
00023 */
00024 
00025 #include "ssp_defs.h"
00026 
00027 void slsolve(int, int, float*, float*);
00028 void smatvec(int, int, int, float*, float*, float*);
00029 
00030 /*
00031  * Performs numeric block updates within the relaxed snode. 
00032  */
00033 int
00034 ssnode_bmod (
00035             const int  jcol,      /* in */
00036             const int  fsupc,     /* in */
00037             float     *dense,    /* in */
00038             float     *tempv,    /* working array */
00039             GlobalLU_t *Glu,      /* modified */
00040             SuperLUStat_t *stat   /* output */
00041             )
00042 {
00043 #ifdef USE_VENDOR_BLAS
00044 #ifdef _CRAY
00045     _fcd ftcs1 = _cptofcd("L", strlen("L")),
00046          ftcs2 = _cptofcd("N", strlen("N")),
00047          ftcs3 = _cptofcd("U", strlen("U"));
00048 #endif
00049     int            incx = 1, incy = 1;
00050     float         alpha = -1.0, beta = 1.0;
00051 #endif
00052 
00053     int            luptr, nsupc, nsupr, nrow;
00054     int            isub, irow, i, iptr; 
00055     register int   ufirst, nextlu;
00056     int            *lsub, *xlsub;
00057     float         *lusup;
00058     int            *xlusup;
00059     flops_t *ops = stat->ops;
00060 
00061     lsub    = Glu->lsub;
00062     xlsub   = Glu->xlsub;
00063     lusup   = Glu->lusup;
00064     xlusup  = Glu->xlusup;
00065 
00066     nextlu = xlusup[jcol];
00067     
00068     /*
00069      *  Process the supernodal portion of L\U[*,j]
00070      */
00071     for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
00072         irow = lsub[isub];
00073         lusup[nextlu] = dense[irow];
00074         dense[irow] = 0;
00075         ++nextlu;
00076     }
00077 
00078     xlusup[jcol + 1] = nextlu;  /* Initialize xlusup for next column */
00079     
00080     if ( fsupc < jcol ) {
00081 
00082         luptr = xlusup[fsupc];
00083         nsupr = xlsub[fsupc+1] - xlsub[fsupc];
00084         nsupc = jcol - fsupc;   /* Excluding jcol */
00085         ufirst = xlusup[jcol];  /* Points to the beginning of column
00086                                    jcol in supernode L\U(jsupno). */
00087         nrow = nsupr - nsupc;
00088 
00089         ops[TRSV] += nsupc * (nsupc - 1);
00090         ops[GEMV] += 2 * nrow * nsupc;
00091 
00092 #ifdef USE_VENDOR_BLAS
00093 #ifdef _CRAY
00094         STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
00095               &lusup[ufirst], &incx );
00096         SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
00097                 &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
00098 #else
00099         strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
00100               &lusup[ufirst], &incx );
00101         sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
00102                 &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
00103 #endif
00104 #else
00105         slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
00106         smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
00107                         &lusup[ufirst], &tempv[0] );
00108 
00109         /* Scatter tempv[*] into lusup[*] */
00110         iptr = ufirst + nsupc;
00111         for (i = 0; i < nrow; i++) {
00112             lusup[iptr++] -= tempv[i];
00113             tempv[i] = 0.0;
00114         }
00115 #endif
00116 
00117     }
00118 
00119     return 0;
00120 }