Blender  V2.59
opennl.c
Go to the documentation of this file.
00001 
00004 /*
00005  *  $Id: opennl.c 35142 2011-02-25 10:24:29Z jesterking $
00006  *
00007  *  OpenNL: Numerical Library
00008  *  Copyright (C) 2004 Bruno Levy
00009  *
00010  *  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version.
00014  *
00015  *  This program is distributed in the hope that it will be useful,
00016  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *  GNU General Public License for more details.
00019  *
00020  *  You should have received a copy of the GNU General Public License
00021  *  along with this program; if not, write to the Free Software
00022  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00023  *
00024  *  If you modify this software, you should include a notice giving the
00025  *  name of the person performing the modification, the date of modification,
00026  *  and the reason for such modification.
00027  *
00028  *  Contact: Bruno Levy
00029  *
00030  *       levy@loria.fr
00031  *
00032  *       ISA Project
00033  *       LORIA, INRIA Lorraine, 
00034  *       Campus Scientifique, BP 239
00035  *       54506 VANDOEUVRE LES NANCY CEDEX 
00036  *       FRANCE
00037  *
00038  *  Note that the GNU General Public License does not permit incorporating
00039  *  the Software into proprietary programs. 
00040  */
00041 
00042 #include "ONL_opennl.h"
00043 
00044 #include <stdlib.h>
00045 #include <stdio.h>
00046 #include <string.h>
00047 #include <math.h>
00048 
00049 #ifdef NL_PARANOID
00050 #ifndef NL_DEBUG
00051 #define NL_DEBUG
00052 #endif
00053 #endif
00054 
00055 /* SuperLU includes */
00056 #include <ssp_defs.h>
00057 #include <util.h>
00058 
00059 /************************************************************************************/
00060 /* Assertions */
00061 
00062 
00063 static void __nl_assertion_failed(char* cond, char* file, int line) {
00064         fprintf(
00065                 stderr, 
00066                 "OpenNL assertion failed: %s, file:%s, line:%d\n",
00067                 cond,file,line
00068         );
00069         abort();
00070 }
00071 
00072 static void __nl_range_assertion_failed(
00073         float x, float min_val, float max_val, char* file, int line
00074 ) {
00075         fprintf(
00076                 stderr, 
00077                 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
00078                 x, min_val, max_val, file,line
00079         );
00080         abort();
00081 }
00082 
00083 static void __nl_should_not_have_reached(char* file, int line) {
00084         fprintf(
00085                 stderr, 
00086                 "OpenNL should not have reached this point: file:%s, line:%d\n",
00087                 file,line
00088         );
00089         abort();
00090 }
00091 
00092 
00093 #define __nl_assert(x) {                                                                                \
00094         if(!(x)) {                                                                                                \
00095                 __nl_assertion_failed(#x,__FILE__, __LINE__);             \
00096         }                                                                                                                  \
00097 } 
00098 
00099 #define __nl_range_assert(x,min_val,max_val) {                            \
00100         if(((x) < (min_val)) || ((x) > (max_val))) {                            \
00101                 __nl_range_assertion_failed(x, min_val, max_val,                \
00102                         __FILE__, __LINE__                                                                \
00103                 );                                                                                                       \
00104         }                                                                                                                  \
00105 }
00106 
00107 #define __nl_assert_not_reached {                                                          \
00108         __nl_should_not_have_reached(__FILE__, __LINE__);                 \
00109 }
00110 
00111 #ifdef NL_DEBUG
00112 #define __nl_debug_assert(x) __nl_assert(x)
00113 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
00114 #else
00115 #define __nl_debug_assert(x) 
00116 #define __nl_debug_range_assert(x,min_val,max_val) 
00117 #endif
00118 
00119 #ifdef NL_PARANOID
00120 #define __nl_parano_assert(x) __nl_assert(x)
00121 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
00122 #else
00123 #define __nl_parano_assert(x) 
00124 #define __nl_parano_range_assert(x,min_val,max_val) 
00125 #endif
00126 
00127 /************************************************************************************/
00128 /* classic macros */
00129 
00130 #ifndef MIN
00131 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 
00132 #endif
00133 
00134 #ifndef MAX
00135 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 
00136 #endif
00137 
00138 /************************************************************************************/
00139 /* memory management */
00140 
00141 #define __NL_NEW(T)                             (T*)(calloc(1, sizeof(T))) 
00142 #define __NL_NEW_ARRAY(T,NB)       (T*)(calloc((NB),sizeof(T))) 
00143 #define __NL_RENEW_ARRAY(T,x,NB)   (T*)(realloc(x,(NB)*sizeof(T))) 
00144 #define __NL_DELETE(x)                   free(x); x = NULL 
00145 #define __NL_DELETE_ARRAY(x)       free(x); x = NULL
00146 
00147 #define __NL_CLEAR(T, x)                   memset(x, 0, sizeof(T)) 
00148 #define __NL_CLEAR_ARRAY(T,x,NB)   memset(x, 0, (NB)*sizeof(T)) 
00149 
00150 /************************************************************************************/
00151 /* Dynamic arrays for sparse row/columns */
00152 
00153 typedef struct {
00154         NLuint   index;
00155         NLfloat value;
00156 } __NLCoeff;
00157 
00158 typedef struct {
00159         NLuint size;
00160         NLuint capacity;
00161         __NLCoeff* coeff;
00162 } __NLRowColumn;
00163 
00164 static void __nlRowColumnConstruct(__NLRowColumn* c) {
00165         c->size  = 0;
00166         c->capacity = 0;
00167         c->coeff        = NULL;
00168 }
00169 
00170 static void __nlRowColumnDestroy(__NLRowColumn* c) {
00171         __NL_DELETE_ARRAY(c->coeff);
00172 #ifdef NL_PARANOID
00173         __NL_CLEAR(__NLRowColumn, c); 
00174 #endif
00175 }
00176 
00177 static void __nlRowColumnGrow(__NLRowColumn* c) {
00178         if(c->capacity != 0) {
00179                 c->capacity = 2 * c->capacity;
00180                 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity);
00181         } else {
00182                 c->capacity = 4;
00183                 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity);
00184         }
00185 }
00186 
00187 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
00188         NLuint i;
00189         for(i=0; i<c->size; i++) {
00190                 if(c->coeff[i].index == (NLuint)index) {
00191                         c->coeff[i].value += value;
00192                         return;
00193                 }
00194         }
00195         if(c->size == c->capacity) {
00196                 __nlRowColumnGrow(c);
00197         }
00198         c->coeff[c->size].index = index;
00199         c->coeff[c->size].value = value;
00200         c->size++;
00201 }
00202 
00203 /* Does not check whether the index already exists */
00204 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
00205         if(c->size == c->capacity) {
00206                 __nlRowColumnGrow(c);
00207         }
00208         c->coeff[c->size].index = index;
00209         c->coeff[c->size].value = value;
00210         c->size++;
00211 }
00212 
00213 static void __nlRowColumnClear(__NLRowColumn* c) {
00214         c->size  = 0;
00215         c->capacity = 0;
00216         __NL_DELETE_ARRAY(c->coeff);
00217 }
00218 
00219 /************************************************************************************/
00220 /* SparseMatrix data structure */
00221 
00222 #define __NL_ROWS         1
00223 #define __NL_COLUMNS   2
00224 #define __NL_SYMMETRIC 4
00225 
00226 typedef struct {
00227         NLuint m;
00228         NLuint n;
00229         NLuint diag_size;
00230         NLenum storage;
00231         __NLRowColumn* row;
00232         __NLRowColumn* column;
00233         NLfloat*          diag;
00234 } __NLSparseMatrix;
00235 
00236 
00237 static void __nlSparseMatrixConstruct(
00238         __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
00239 ) {
00240         NLuint i;
00241         M->m = m;
00242         M->n = n;
00243         M->storage = storage;
00244         if(storage & __NL_ROWS) {
00245                 M->row = __NL_NEW_ARRAY(__NLRowColumn, m);
00246                 for(i=0; i<m; i++) {
00247                         __nlRowColumnConstruct(&(M->row[i]));
00248                 }
00249         } else {
00250                 M->row = NULL;
00251         }
00252 
00253         if(storage & __NL_COLUMNS) {
00254                 M->column = __NL_NEW_ARRAY(__NLRowColumn, n);
00255                 for(i=0; i<n; i++) {
00256                         __nlRowColumnConstruct(&(M->column[i]));
00257                 }
00258         } else {
00259                 M->column = NULL;
00260         }
00261 
00262         M->diag_size = MIN(m,n);
00263         M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
00264 }
00265 
00266 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
00267         NLuint i;
00268         __NL_DELETE_ARRAY(M->diag);
00269         if(M->storage & __NL_ROWS) {
00270                 for(i=0; i<M->m; i++) {
00271                         __nlRowColumnDestroy(&(M->row[i]));
00272                 }
00273                 __NL_DELETE_ARRAY(M->row);
00274         }
00275         if(M->storage & __NL_COLUMNS) {
00276                 for(i=0; i<M->n; i++) {
00277                         __nlRowColumnDestroy(&(M->column[i]));
00278                 }
00279                 __NL_DELETE_ARRAY(M->column);
00280         }
00281 #ifdef NL_PARANOID
00282         __NL_CLEAR(__NLSparseMatrix,M);
00283 #endif
00284 }
00285 
00286 static void __nlSparseMatrixAdd(
00287         __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
00288 ) {
00289         __nl_parano_range_assert(i, 0, M->m - 1);
00290         __nl_parano_range_assert(j, 0, M->n - 1);
00291         if((M->storage & __NL_SYMMETRIC) && (j > i)) {
00292                 return;
00293         }
00294         if(i == j) {
00295                 M->diag[i] += value;
00296         }
00297         if(M->storage & __NL_ROWS) {
00298                 __nlRowColumnAdd(&(M->row[i]), j, value);
00299         }
00300         if(M->storage & __NL_COLUMNS) {
00301                 __nlRowColumnAdd(&(M->column[j]), i, value);
00302         }
00303 }
00304 
00305 static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
00306         NLuint i;
00307         if(M->storage & __NL_ROWS) {
00308                 for(i=0; i<M->m; i++) {
00309                         __nlRowColumnClear(&(M->row[i]));
00310                 }
00311         }
00312         if(M->storage & __NL_COLUMNS) {
00313                 for(i=0; i<M->n; i++) {
00314                         __nlRowColumnClear(&(M->column[i]));
00315                 }
00316         }
00317         __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);       
00318 }
00319 
00320 /* Returns the number of non-zero coefficients */
00321 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
00322         NLuint nnz = 0;
00323         NLuint i;
00324         if(M->storage & __NL_ROWS) {
00325                 for(i = 0; i<M->m; i++) {
00326                         nnz += M->row[i].size;
00327                 }
00328         } else if (M->storage & __NL_COLUMNS) {
00329                 for(i = 0; i<M->n; i++) {
00330                         nnz += M->column[i].size;
00331                 }
00332         } else {
00333                 __nl_assert_not_reached;
00334         }
00335         return nnz;
00336 }
00337 
00338 /************************************************************************************/
00339 /* SparseMatrix x Vector routines, internal helper routines */
00340 
00341 static void __nlSparseMatrix_mult_rows_symmetric(
00342         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00343 ) {
00344         NLuint m = A->m;
00345         NLuint i,ij;
00346         __NLRowColumn* Ri = NULL;
00347         __NLCoeff* c = NULL;
00348         for(i=0; i<m; i++) {
00349                 y[i] = 0;
00350                 Ri = &(A->row[i]);
00351                 for(ij=0; ij<Ri->size; ij++) {
00352                         c = &(Ri->coeff[ij]);
00353                         y[i] += c->value * x[c->index];
00354                         if(i != c->index) {
00355                                 y[c->index] += c->value * x[i];
00356                         }
00357                 }
00358         }
00359 }
00360 
00361 static void __nlSparseMatrix_mult_rows(
00362         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00363 ) {
00364         NLuint m = A->m;
00365         NLuint i,ij;
00366         __NLRowColumn* Ri = NULL;
00367         __NLCoeff* c = NULL;
00368         for(i=0; i<m; i++) {
00369                 y[i] = 0;
00370                 Ri = &(A->row[i]);
00371                 for(ij=0; ij<Ri->size; ij++) {
00372                         c = &(Ri->coeff[ij]);
00373                         y[i] += c->value * x[c->index];
00374                 }
00375         }
00376 }
00377 
00378 static void __nlSparseMatrix_mult_cols_symmetric(
00379         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00380 ) {
00381         NLuint n = A->n;
00382         NLuint j,ii;
00383         __NLRowColumn* Cj = NULL;
00384         __NLCoeff* c = NULL;
00385         for(j=0; j<n; j++) {
00386                 y[j] = 0;
00387                 Cj = &(A->column[j]);
00388                 for(ii=0; ii<Cj->size; ii++) {
00389                         c = &(Cj->coeff[ii]);
00390                         y[c->index] += c->value * x[j];
00391                         if(j != c->index) {
00392                                 y[j] += c->value * x[c->index];
00393                         }
00394                 }
00395         }
00396 }
00397 
00398 static void __nlSparseMatrix_mult_cols(
00399         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00400 ) {
00401         NLuint n = A->n;
00402         NLuint j,ii; 
00403         __NLRowColumn* Cj = NULL;
00404         __NLCoeff* c = NULL;
00405         __NL_CLEAR_ARRAY(NLfloat, y, A->m);
00406         for(j=0; j<n; j++) {
00407                 Cj = &(A->column[j]);
00408                 for(ii=0; ii<Cj->size; ii++) {
00409                         c = &(Cj->coeff[ii]);
00410                         y[c->index] += c->value * x[j];
00411                 }
00412         }
00413 }
00414 
00415 /************************************************************************************/
00416 /* SparseMatrix x Vector routines, main driver routine */
00417 
00418 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
00419         if(A->storage & __NL_ROWS) {
00420                 if(A->storage & __NL_SYMMETRIC) {
00421                         __nlSparseMatrix_mult_rows_symmetric(A, x, y);
00422                 } else {
00423                         __nlSparseMatrix_mult_rows(A, x, y);
00424                 }
00425         } else {
00426                 if(A->storage & __NL_SYMMETRIC) {
00427                         __nlSparseMatrix_mult_cols_symmetric(A, x, y);
00428                 } else {
00429                         __nlSparseMatrix_mult_cols(A, x, y);
00430                 }
00431         }
00432 }
00433 
00434 /* ****************** Routines for least squares ******************* */
00435 
00436 static void __nlSparseMatrix_square(
00437         __NLSparseMatrix* AtA, __NLSparseMatrix *A
00438 ) {
00439         NLuint m = A->m;
00440         NLuint n = A->n;
00441         NLuint i, j0, j1;
00442         __NLRowColumn *Ri = NULL;
00443         __NLCoeff *c0 = NULL, *c1 = NULL;
00444         float value;
00445 
00446         __nlSparseMatrixConstruct(AtA, n, n, A->storage);
00447 
00448         for(i=0; i<m; i++) {
00449                 Ri = &(A->row[i]);
00450 
00451                 for(j0=0; j0<Ri->size; j0++) {
00452                         c0 = &(Ri->coeff[j0]);
00453                         for(j1=0; j1<Ri->size; j1++) {
00454                                 c1 = &(Ri->coeff[j1]);
00455 
00456                                 value = c0->value*c1->value;
00457                                 __nlSparseMatrixAdd(AtA, c0->index, c1->index, value);
00458                         }
00459                 }
00460         }
00461 }
00462 
00463 static void __nlSparseMatrix_transpose_mult_rows(
00464         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00465 ) {
00466         NLuint m = A->m;
00467         NLuint n = A->n;
00468         NLuint i,ij;
00469         __NLRowColumn* Ri = NULL;
00470         __NLCoeff* c = NULL;
00471 
00472         __NL_CLEAR_ARRAY(NLfloat, y, n);
00473 
00474         for(i=0; i<m; i++) {
00475                 Ri = &(A->row[i]);
00476                 for(ij=0; ij<Ri->size; ij++) {
00477                         c = &(Ri->coeff[ij]);
00478                         y[c->index] += c->value * x[i];
00479                 }
00480         }
00481 }
00482 
00483 /************************************************************************************/
00484 /* NLContext data structure */
00485 
00486 typedef void(*__NLMatrixFunc)(float* x, float* y);
00487 
00488 typedef struct {
00489         NLfloat  value[4];
00490         NLboolean locked;
00491         NLuint  index;
00492         __NLRowColumn *a;
00493 } __NLVariable;
00494 
00495 #define __NL_STATE_INITIAL                              0
00496 #define __NL_STATE_SYSTEM                               1
00497 #define __NL_STATE_MATRIX                               2
00498 #define __NL_STATE_MATRIX_CONSTRUCTED   3
00499 #define __NL_STATE_SYSTEM_CONSTRUCTED   4
00500 #define __NL_STATE_SYSTEM_SOLVED                5
00501 
00502 typedef struct {
00503         NLenum                  state;
00504         NLuint                  n;
00505         NLuint                  m;
00506         __NLVariable*   variable;
00507         NLfloat*                b;
00508         NLfloat*                Mtb;
00509         __NLSparseMatrix M;
00510         __NLSparseMatrix MtM;
00511         NLfloat*                x;
00512         NLuint                  nb_variables;
00513         NLuint                  nb_rows;
00514         NLboolean               least_squares;
00515         NLboolean               symmetric;
00516         NLuint                  nb_rhs;
00517         NLboolean               solve_again;
00518         NLboolean               alloc_M;
00519         NLboolean               alloc_MtM;
00520         NLboolean               alloc_variable;
00521         NLboolean               alloc_x;
00522         NLboolean               alloc_b;
00523         NLboolean               alloc_Mtb;
00524         NLfloat                 error;
00525         __NLMatrixFunc  matrix_vector_prod;
00526 
00527         struct __NLSuperLUContext {
00528                 NLboolean alloc_slu;
00529                 SuperMatrix L, U;
00530                 NLint *perm_c, *perm_r;
00531                 SuperLUStat_t stat;
00532         } slu;
00533 } __NLContext;
00534 
00535 static __NLContext* __nlCurrentContext = NULL;
00536 
00537 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
00538         __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
00539 }
00540 
00541 
00542 NLContext nlNewContext(void) {
00543         __NLContext* result       = __NL_NEW(__NLContext);
00544         result->state                   = __NL_STATE_INITIAL;
00545         result->matrix_vector_prod = __nlMatrixVectorProd_default;
00546         result->nb_rhs = 1;
00547         nlMakeCurrent(result);
00548         return result;
00549 }
00550 
00551 static void __nlFree_SUPERLU(__NLContext *context);
00552 
00553 void nlDeleteContext(NLContext context_in) {
00554         __NLContext* context = (__NLContext*)(context_in);
00555         int i;
00556 
00557         if(__nlCurrentContext == context) {
00558                 __nlCurrentContext = NULL;
00559         }
00560         if(context->alloc_M) {
00561                 __nlSparseMatrixDestroy(&context->M);
00562         }
00563         if(context->alloc_MtM) {
00564                 __nlSparseMatrixDestroy(&context->MtM);
00565         }
00566         if(context->alloc_variable) {
00567                 for(i=0; i<context->nb_variables; i++) {
00568                         if(context->variable[i].a) {
00569                                 __nlRowColumnDestroy(context->variable[i].a);
00570                                 __NL_DELETE(context->variable[i].a);
00571                         }
00572                 }
00573         }
00574         if(context->alloc_b) {
00575                 __NL_DELETE_ARRAY(context->b);
00576         }
00577         if(context->alloc_Mtb) {
00578                 __NL_DELETE_ARRAY(context->Mtb);
00579         }
00580         if(context->alloc_x) {
00581                 __NL_DELETE_ARRAY(context->x);
00582         }
00583         if (context->slu.alloc_slu) {
00584                 __nlFree_SUPERLU(context);
00585         }
00586 
00587 #ifdef NL_PARANOID
00588         __NL_CLEAR(__NLContext, context);
00589 #endif
00590         __NL_DELETE(context);
00591 }
00592 
00593 void nlMakeCurrent(NLContext context) {
00594         __nlCurrentContext = (__NLContext*)(context);
00595 }
00596 
00597 NLContext nlGetCurrent(void) {
00598         return __nlCurrentContext;
00599 }
00600 
00601 static void __nlCheckState(NLenum state) {
00602         __nl_assert(__nlCurrentContext->state == state);
00603 }
00604 
00605 static void __nlTransition(NLenum from_state, NLenum to_state) {
00606         __nlCheckState(from_state);
00607         __nlCurrentContext->state = to_state;
00608 }
00609 
00610 /************************************************************************************/
00611 /* Get/Set parameters */
00612 
00613 void nlSolverParameterf(NLenum pname, NLfloat param) {
00614         __nlCheckState(__NL_STATE_INITIAL);
00615         switch(pname) {
00616         case NL_NB_VARIABLES: {
00617                 __nl_assert(param > 0);
00618                 __nlCurrentContext->nb_variables = (NLuint)param;
00619         } break;
00620         case NL_NB_ROWS: {
00621                 __nl_assert(param > 0);
00622                 __nlCurrentContext->nb_rows = (NLuint)param;
00623         } break;
00624         case NL_LEAST_SQUARES: {
00625                 __nlCurrentContext->least_squares = (NLboolean)param;
00626         } break;
00627         case NL_SYMMETRIC: {
00628                 __nlCurrentContext->symmetric = (NLboolean)param;               
00629         } break;
00630         case NL_NB_RIGHT_HAND_SIDES: {
00631                 __nlCurrentContext->nb_rhs = (NLuint)param;
00632         } break;
00633         default: {
00634                 __nl_assert_not_reached;
00635         } break;
00636         }
00637 }
00638 
00639 void nlSolverParameteri(NLenum pname, NLint param) {
00640         __nlCheckState(__NL_STATE_INITIAL);
00641         switch(pname) {
00642         case NL_NB_VARIABLES: {
00643                 __nl_assert(param > 0);
00644                 __nlCurrentContext->nb_variables = (NLuint)param;
00645         } break;
00646         case NL_NB_ROWS: {
00647                 __nl_assert(param > 0);
00648                 __nlCurrentContext->nb_rows = (NLuint)param;
00649         } break;
00650         case NL_LEAST_SQUARES: {
00651                 __nlCurrentContext->least_squares = (NLboolean)param;
00652         } break;
00653         case NL_SYMMETRIC: {
00654                 __nlCurrentContext->symmetric = (NLboolean)param;               
00655         } break;
00656         case NL_NB_RIGHT_HAND_SIDES: {
00657                 __nlCurrentContext->nb_rhs = (NLuint)param;
00658         } break;
00659         default: {
00660                 __nl_assert_not_reached;
00661         } break;
00662         }
00663 }
00664 
00665 void nlGetBooleanv(NLenum pname, NLboolean* params) {
00666         switch(pname) {
00667         case NL_LEAST_SQUARES: {
00668                 *params = __nlCurrentContext->least_squares;
00669         } break;
00670         case NL_SYMMETRIC: {
00671                 *params = __nlCurrentContext->symmetric;
00672         } break;
00673         default: {
00674                 __nl_assert_not_reached;
00675         } break;
00676         }
00677 }
00678 
00679 void nlGetFloatv(NLenum pname, NLfloat* params) {
00680         switch(pname) {
00681         case NL_NB_VARIABLES: {
00682                 *params = (NLfloat)(__nlCurrentContext->nb_variables);
00683         } break;
00684         case NL_NB_ROWS: {
00685                 *params = (NLfloat)(__nlCurrentContext->nb_rows);
00686         } break;
00687         case NL_LEAST_SQUARES: {
00688                 *params = (NLfloat)(__nlCurrentContext->least_squares);
00689         } break;
00690         case NL_SYMMETRIC: {
00691                 *params = (NLfloat)(__nlCurrentContext->symmetric);
00692         } break;
00693         case NL_ERROR: {
00694                 *params = (NLfloat)(__nlCurrentContext->error);
00695         } break;
00696         default: {
00697                 __nl_assert_not_reached;
00698         } break;
00699         }
00700 }
00701 
00702 void nlGetIntergerv(NLenum pname, NLint* params) {
00703         switch(pname) {
00704         case NL_NB_VARIABLES: {
00705                 *params = (NLint)(__nlCurrentContext->nb_variables);
00706         } break;
00707         case NL_NB_ROWS: {
00708                 *params = (NLint)(__nlCurrentContext->nb_rows);
00709         } break;
00710         case NL_LEAST_SQUARES: {
00711                 *params = (NLint)(__nlCurrentContext->least_squares);
00712         } break;
00713         case NL_SYMMETRIC: {
00714                 *params = (NLint)(__nlCurrentContext->symmetric);
00715         } break;
00716         default: {
00717                 __nl_assert_not_reached;
00718         } break;
00719         }
00720 }
00721 
00722 /************************************************************************************/
00723 /* Enable / Disable */
00724 
00725 void nlEnable(NLenum pname) {
00726         switch(pname) {
00727         default: {
00728                 __nl_assert_not_reached;
00729         }
00730         }
00731 }
00732 
00733 void nlDisable(NLenum pname) {
00734         switch(pname) {
00735         default: {
00736                 __nl_assert_not_reached;
00737         }
00738         }
00739 }
00740 
00741 NLboolean nlIsEnabled(NLenum pname) {
00742         switch(pname) {
00743         default: {
00744                 __nl_assert_not_reached;
00745         }
00746         }
00747         return NL_FALSE;
00748 }
00749 
00750 /************************************************************************************/
00751 /* Get/Set Lock/Unlock variables */
00752 
00753 void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
00754         __nlCheckState(__NL_STATE_SYSTEM);
00755         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00756         __nlCurrentContext->variable[index].value[rhsindex] = value;    
00757 }
00758 
00759 NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
00760         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
00761         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00762         return __nlCurrentContext->variable[index].value[rhsindex];
00763 }
00764 
00765 void nlLockVariable(NLuint index) {
00766         __nlCheckState(__NL_STATE_SYSTEM);
00767         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00768         __nlCurrentContext->variable[index].locked = NL_TRUE;
00769 }
00770 
00771 void nlUnlockVariable(NLuint index) {
00772         __nlCheckState(__NL_STATE_SYSTEM);
00773         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00774         __nlCurrentContext->variable[index].locked = NL_FALSE;
00775 }
00776 
00777 NLboolean nlVariableIsLocked(NLuint index) {
00778         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
00779         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00780         return __nlCurrentContext->variable[index].locked;
00781 }
00782 
00783 /************************************************************************************/
00784 /* System construction */
00785 
00786 static void __nlVariablesToVector() {
00787         __NLContext *context = __nlCurrentContext;
00788         NLuint i, j, nb_rhs;
00789 
00790         __nl_assert(context->alloc_x);
00791         __nl_assert(context->alloc_variable);
00792 
00793         nb_rhs= context->nb_rhs;
00794 
00795         for(i=0; i<context->nb_variables; i++) {
00796                 __NLVariable* v = &(context->variable[i]);
00797                 if(!v->locked) {
00798                         __nl_assert(v->index < context->n);
00799 
00800                         for(j=0; j<nb_rhs; j++)
00801                                 context->x[context->n*j + v->index] = v->value[j];
00802                 }
00803         }
00804 }
00805 
00806 static void __nlVectorToVariables() {
00807         __NLContext *context = __nlCurrentContext;
00808         NLuint i, j, nb_rhs;
00809 
00810         __nl_assert(context->alloc_x);
00811         __nl_assert(context->alloc_variable);
00812 
00813         nb_rhs= context->nb_rhs;
00814 
00815         for(i=0; i<context->nb_variables; i++) {
00816                 __NLVariable* v = &(context->variable[i]);
00817                 if(!v->locked) {
00818                         __nl_assert(v->index < context->n);
00819 
00820                         for(j=0; j<nb_rhs; j++)
00821                                 v->value[j] = context->x[context->n*j + v->index];
00822                 }
00823         }
00824 }
00825 
00826 static void __nlBeginSystem() {
00827         __nl_assert(__nlCurrentContext->nb_variables > 0);
00828 
00829         if (__nlCurrentContext->solve_again)
00830                 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM);
00831         else {
00832                 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM);
00833 
00834                 __nlCurrentContext->variable = __NL_NEW_ARRAY(
00835                         __NLVariable, __nlCurrentContext->nb_variables);
00836                 
00837                 __nlCurrentContext->alloc_variable = NL_TRUE;
00838         }
00839 }
00840 
00841 static void __nlEndSystem() {
00842         __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED);   
00843 }
00844 
00845 static void __nlBeginMatrix() {
00846         NLuint i;
00847         NLuint m = 0, n = 0;
00848         NLenum storage = __NL_ROWS;
00849         __NLContext *context = __nlCurrentContext;
00850 
00851         __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX);
00852 
00853         if (!context->solve_again) {
00854                 for(i=0; i<context->nb_variables; i++) {
00855                         if(context->variable[i].locked) {
00856                                 context->variable[i].index = ~0;
00857                                 context->variable[i].a = __NL_NEW(__NLRowColumn);
00858                                 __nlRowColumnConstruct(context->variable[i].a);
00859                         }
00860                         else
00861                                 context->variable[i].index = n++;
00862                 }
00863 
00864                 m = (context->nb_rows == 0)? n: context->nb_rows;
00865 
00866                 context->m = m;
00867                 context->n = n;
00868 
00869                 __nlSparseMatrixConstruct(&context->M, m, n, storage);
00870                 context->alloc_M = NL_TRUE;
00871 
00872                 context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
00873                 context->alloc_b = NL_TRUE;
00874 
00875                 context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
00876                 context->alloc_x = NL_TRUE;
00877         }
00878         else {
00879                 /* need to recompute b only, A is not constructed anymore */
00880                 __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
00881         }
00882 
00883         __nlVariablesToVector();
00884 }
00885 
00886 static void __nlEndMatrixRHS(NLuint rhs) {
00887         __NLContext *context = __nlCurrentContext;
00888         __NLVariable *variable;
00889         __NLRowColumn *a;
00890         NLfloat *b, *Mtb;
00891         NLuint i, j;
00892 
00893         b = context->b + context->m*rhs;
00894         Mtb = context->Mtb + context->n*rhs;
00895 
00896         for(i=0; i<__nlCurrentContext->nb_variables; i++) {
00897                 variable = &(context->variable[i]);
00898 
00899                 if(variable->locked) {
00900                         a = variable->a;
00901 
00902                         for(j=0; j<a->size; j++) {
00903                                 b[a->coeff[j].index] -= a->coeff[j].value*variable->value[rhs];
00904                         }
00905                 }
00906         }
00907 
00908         if(context->least_squares)
00909                 __nlSparseMatrix_transpose_mult_rows(&context->M, b, Mtb);
00910 }
00911 
00912 static void __nlEndMatrix() {
00913         __NLContext *context = __nlCurrentContext;
00914         NLuint i;
00915 
00916         __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED);       
00917         
00918         if(context->least_squares) {
00919                 if(!__nlCurrentContext->solve_again) {
00920                         __nlSparseMatrix_square(&context->MtM, &context->M);
00921                         context->alloc_MtM = NL_TRUE;
00922 
00923                         context->Mtb =
00924                                 __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
00925                         context->alloc_Mtb = NL_TRUE;
00926                 }
00927         }
00928 
00929         for(i=0; i<context->nb_rhs; i++)
00930                 __nlEndMatrixRHS(i);
00931 }
00932 
00933 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
00934 {
00935         __NLContext *context = __nlCurrentContext;
00936 
00937         __nlCheckState(__NL_STATE_MATRIX);
00938 
00939         if(context->solve_again)
00940                 return;
00941 
00942         if (!context->least_squares && context->variable[row].locked);
00943         else if (context->variable[col].locked) {
00944                 if(!context->least_squares)
00945                         row = context->variable[row].index;
00946                 __nlRowColumnAppend(context->variable[col].a, row, value);
00947         }
00948         else {
00949                 __NLSparseMatrix* M  = &context->M;
00950                 
00951                 if(!context->least_squares)
00952                         row = context->variable[row].index;
00953                 col = context->variable[col].index;
00954                 
00955                 __nl_range_assert(row, 0, context->m - 1);
00956                 __nl_range_assert(col, 0, context->n - 1);
00957 
00958                 __nlSparseMatrixAdd(M, row, col, value);
00959         }
00960 }
00961 
00962 void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
00963 {
00964         __NLContext *context = __nlCurrentContext;
00965         NLfloat* b = context->b;
00966 
00967         __nlCheckState(__NL_STATE_MATRIX);
00968 
00969         if(context->least_squares) {
00970                 __nl_range_assert(index, 0, context->m - 1);
00971                 b[rhsindex*context->m + index] += value;
00972         }
00973         else {
00974                 if(!context->variable[index].locked) {
00975                         index = context->variable[index].index;
00976                         __nl_range_assert(index, 0, context->m - 1);
00977 
00978                         b[rhsindex*context->m + index] += value;
00979                 }
00980         }
00981 }
00982 
00983 void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
00984 {
00985         __NLContext *context = __nlCurrentContext;
00986         NLfloat* b = context->b;
00987 
00988         __nlCheckState(__NL_STATE_MATRIX);
00989 
00990         if(context->least_squares) {
00991                 __nl_range_assert(index, 0, context->m - 1);
00992                 b[rhsindex*context->m + index] = value;
00993         }
00994         else {
00995                 if(!context->variable[index].locked) {
00996                         index = context->variable[index].index;
00997                         __nl_range_assert(index, 0, context->m - 1);
00998 
00999                         b[rhsindex*context->m + index] = value;
01000                 }
01001         }
01002 }
01003 
01004 void nlBegin(NLenum prim) {
01005         switch(prim) {
01006         case NL_SYSTEM: {
01007                 __nlBeginSystem();
01008         } break;
01009         case NL_MATRIX: {
01010                 __nlBeginMatrix();
01011         } break;
01012         default: {
01013                 __nl_assert_not_reached;
01014         }
01015         }
01016 }
01017 
01018 void nlEnd(NLenum prim) {
01019         switch(prim) {
01020         case NL_SYSTEM: {
01021                 __nlEndSystem();
01022         } break;
01023         case NL_MATRIX: {
01024                 __nlEndMatrix();
01025         } break;
01026         default: {
01027                 __nl_assert_not_reached;
01028         }
01029         }
01030 }
01031 
01032 /************************************************************************/
01033 /* SuperLU wrapper */
01034 
01035 /* Note: SuperLU is difficult to call, but it is worth it.      */
01036 /* Here is a driver inspired by A. Sheffer's "cow flattener". */
01037 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) {
01038 
01039         /* OpenNL Context */
01040         __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M;
01041         NLuint n = context->n;
01042         NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */
01043 
01044         /* Compressed Row Storage matrix representation */
01045         NLint   *xa             = __NL_NEW_ARRAY(NLint, n+1);
01046         NLfloat *rhs    = __NL_NEW_ARRAY(NLfloat, n);
01047         NLfloat *a              = __NL_NEW_ARRAY(NLfloat, nnz);
01048         NLint   *asub   = __NL_NEW_ARRAY(NLint, nnz);
01049         NLint   *etree  = __NL_NEW_ARRAY(NLint, n);
01050 
01051         /* SuperLU variables */
01052         SuperMatrix At, AtP;
01053         NLint info, panel_size, relax;
01054         superlu_options_t options;
01055 
01056         /* Temporary variables */
01057         NLuint i, jj, count;
01058         
01059         __nl_assert(!(M->storage & __NL_SYMMETRIC));
01060         __nl_assert(M->storage & __NL_ROWS);
01061         __nl_assert(M->m == M->n);
01062         
01063         /* Convert M to compressed column format */
01064         for(i=0, count=0; i<n; i++) {
01065                 __NLRowColumn *Ri = M->row + i;
01066                 xa[i] = count;
01067 
01068                 for(jj=0; jj<Ri->size; jj++, count++) {
01069                         a[count] = Ri->coeff[jj].value;
01070                         asub[count] = Ri->coeff[jj].index;
01071                 }
01072         }
01073         xa[n] = nnz;
01074 
01075         /* Free M, don't need it anymore at this point */
01076         __nlSparseMatrixClear(M);
01077 
01078         /* Create superlu A matrix transposed */
01079         sCreate_CompCol_Matrix(
01080                 &At, n, n, nnz, a, asub, xa, 
01081                 SLU_NC,         /* Colum wise, no supernode */
01082                 SLU_S,          /* floats */ 
01083                 SLU_GE          /* general storage */
01084         );
01085 
01086         /* Set superlu options */
01087         set_default_options(&options);
01088         options.ColPerm = MY_PERMC;
01089         options.Fact = DOFACT;
01090 
01091         StatInit(&(context->slu.stat));
01092 
01093         panel_size = sp_ienv(1); /* sp_ienv give us the defaults */
01094         relax = sp_ienv(2);
01095 
01096         /* Compute permutation and permuted matrix */
01097         context->slu.perm_r = __NL_NEW_ARRAY(NLint, n);
01098         context->slu.perm_c = __NL_NEW_ARRAY(NLint, n);
01099 
01100         if ((permutation == NULL) || (*permutation == -1)) {
01101                 get_perm_c(3, &At, context->slu.perm_c);
01102 
01103                 if (permutation)
01104                         memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n);
01105         }
01106         else
01107                 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n);
01108 
01109         sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP);
01110 
01111         /* Decompose into L and U */
01112         sgstrf(&options, &AtP, relax, panel_size,
01113                 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r,
01114                 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info);
01115 
01116         /* Cleanup */
01117 
01118         Destroy_SuperMatrix_Store(&At);
01119         Destroy_CompCol_Permuted(&AtP);
01120 
01121         __NL_DELETE_ARRAY(etree);
01122         __NL_DELETE_ARRAY(xa);
01123         __NL_DELETE_ARRAY(rhs);
01124         __NL_DELETE_ARRAY(a);
01125         __NL_DELETE_ARRAY(asub);
01126 
01127         context->slu.alloc_slu = NL_TRUE;
01128 
01129         return (info == 0);
01130 }
01131 
01132 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
01133 
01134         /* OpenNL Context */
01135         NLfloat* b = (context->least_squares)? context->Mtb: context->b;
01136         NLfloat* x = context->x;
01137         NLuint n = context->n, j;
01138 
01139         /* SuperLU variables */
01140         SuperMatrix B;
01141         NLint info;
01142 
01143         for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) {
01144                 /* Create superlu array for B */
01145                 sCreate_Dense_Matrix(
01146                         &B, n, 1, b, n, 
01147                         SLU_DN, /* Fortran-type column-wise storage */
01148                         SLU_S,  /* floats                                                 */
01149                         SLU_GE  /* general                                                */
01150                 );
01151 
01152                 /* Forward/Back substitution to compute x */
01153                 sgstrs(TRANS, &(context->slu.L), &(context->slu.U),
01154                         context->slu.perm_c, context->slu.perm_r, &B,
01155                         &(context->slu.stat), &info);
01156 
01157                 if(info == 0)
01158                         memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n);
01159 
01160                 Destroy_SuperMatrix_Store(&B);
01161         }
01162 
01163         return (info == 0);
01164 }
01165 
01166 static void __nlFree_SUPERLU(__NLContext *context) {
01167 
01168         Destroy_SuperNode_Matrix(&(context->slu.L));
01169         Destroy_CompCol_Matrix(&(context->slu.U));
01170 
01171         StatFree(&(context->slu.stat));
01172 
01173         __NL_DELETE_ARRAY(context->slu.perm_r);
01174         __NL_DELETE_ARRAY(context->slu.perm_c);
01175 
01176         context->slu.alloc_slu = NL_FALSE;
01177 }
01178 
01179 void nlPrintMatrix(void) {
01180         __NLContext *context = __nlCurrentContext;
01181         __NLSparseMatrix* M  = &(context->M);
01182         __NLSparseMatrix* MtM  = &(context->MtM);
01183         float *b = context->b;
01184         NLuint i, jj, k;
01185         NLuint m = context->m;
01186         NLuint n = context->n;
01187         __NLRowColumn* Ri = NULL;
01188         float *value = malloc(sizeof(*value)*(n+m));
01189 
01190         printf("A:\n");
01191         for(i=0; i<m; i++) {
01192                 Ri = &(M->row[i]);
01193 
01194                 memset(value, 0.0, sizeof(*value)*n);
01195                 for(jj=0; jj<Ri->size; jj++)
01196                         value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
01197 
01198                 for (k = 0; k<n; k++)
01199                         printf("%.3f ", value[k]);
01200                 printf("\n");
01201         }
01202 
01203         for(k=0; k<context->nb_rhs; k++) {
01204                 printf("b (%d):\n", k);
01205                 for(i=0; i<n; i++)
01206                         printf("%f ", b[context->n*k + i]);
01207                 printf("\n");
01208         }
01209 
01210         if(context->alloc_MtM) {
01211                 printf("AtA:\n");
01212                 for(i=0; i<n; i++) {
01213                         Ri = &(MtM->row[i]);
01214 
01215                         memset(value, 0.0, sizeof(*value)*m);
01216                         for(jj=0; jj<Ri->size; jj++)
01217                                 value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
01218 
01219                         for (k = 0; k<n; k++)
01220                                 printf("%.3f ", value[k]);
01221                         printf("\n");
01222                 }
01223 
01224                 for(k=0; k<context->nb_rhs; k++) {
01225                         printf("Mtb (%d):\n", k);
01226                         for(i=0; i<n; i++)
01227                                 printf("%f ", context->Mtb[context->n*k + i]);
01228                         printf("\n");
01229                 }
01230                 printf("\n");
01231         }
01232 
01233         free(value);
01234 }
01235 
01236 /************************************************************************/
01237 /* nlSolve() driver routine */
01238 
01239 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) {
01240         NLboolean result = NL_TRUE;
01241 
01242         __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED);
01243 
01244         if (!__nlCurrentContext->solve_again)
01245                 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation);
01246 
01247         if (result) {
01248                 result = __nlInvert_SUPERLU(__nlCurrentContext);
01249 
01250                 if (result) {
01251                         __nlVectorToVariables();
01252 
01253                         if (solveAgain)
01254                                 __nlCurrentContext->solve_again = NL_TRUE;
01255 
01256                         __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED);
01257                 }
01258         }
01259 
01260         return result;
01261 }
01262 
01263 NLboolean nlSolve() {
01264         return nlSolveAdvanced(NULL, NL_FALSE);
01265 }
01266