|
Blender
V2.59
|
00001 00004 /* 00005 * $Id: opennl.c 35142 2011-02-25 10:24:29Z jesterking $ 00006 * 00007 * OpenNL: Numerical Library 00008 * Copyright (C) 2004 Bruno Levy 00009 * 00010 * This program is free software; you can redistribute it and/or modify 00011 * it under the terms of the GNU General Public License as published by 00012 * the Free Software Foundation; either version 2 of the License, or 00013 * (at your option) any later version. 00014 * 00015 * This program is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 * GNU General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU General Public License 00021 * along with this program; if not, write to the Free Software 00022 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00023 * 00024 * If you modify this software, you should include a notice giving the 00025 * name of the person performing the modification, the date of modification, 00026 * and the reason for such modification. 00027 * 00028 * Contact: Bruno Levy 00029 * 00030 * levy@loria.fr 00031 * 00032 * ISA Project 00033 * LORIA, INRIA Lorraine, 00034 * Campus Scientifique, BP 239 00035 * 54506 VANDOEUVRE LES NANCY CEDEX 00036 * FRANCE 00037 * 00038 * Note that the GNU General Public License does not permit incorporating 00039 * the Software into proprietary programs. 00040 */ 00041 00042 #include "ONL_opennl.h" 00043 00044 #include <stdlib.h> 00045 #include <stdio.h> 00046 #include <string.h> 00047 #include <math.h> 00048 00049 #ifdef NL_PARANOID 00050 #ifndef NL_DEBUG 00051 #define NL_DEBUG 00052 #endif 00053 #endif 00054 00055 /* SuperLU includes */ 00056 #include <ssp_defs.h> 00057 #include <util.h> 00058 00059 /************************************************************************************/ 00060 /* Assertions */ 00061 00062 00063 static void __nl_assertion_failed(char* cond, char* file, int line) { 00064 fprintf( 00065 stderr, 00066 "OpenNL assertion failed: %s, file:%s, line:%d\n", 00067 cond,file,line 00068 ); 00069 abort(); 00070 } 00071 00072 static void __nl_range_assertion_failed( 00073 float x, float min_val, float max_val, char* file, int line 00074 ) { 00075 fprintf( 00076 stderr, 00077 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n", 00078 x, min_val, max_val, file,line 00079 ); 00080 abort(); 00081 } 00082 00083 static void __nl_should_not_have_reached(char* file, int line) { 00084 fprintf( 00085 stderr, 00086 "OpenNL should not have reached this point: file:%s, line:%d\n", 00087 file,line 00088 ); 00089 abort(); 00090 } 00091 00092 00093 #define __nl_assert(x) { \ 00094 if(!(x)) { \ 00095 __nl_assertion_failed(#x,__FILE__, __LINE__); \ 00096 } \ 00097 } 00098 00099 #define __nl_range_assert(x,min_val,max_val) { \ 00100 if(((x) < (min_val)) || ((x) > (max_val))) { \ 00101 __nl_range_assertion_failed(x, min_val, max_val, \ 00102 __FILE__, __LINE__ \ 00103 ); \ 00104 } \ 00105 } 00106 00107 #define __nl_assert_not_reached { \ 00108 __nl_should_not_have_reached(__FILE__, __LINE__); \ 00109 } 00110 00111 #ifdef NL_DEBUG 00112 #define __nl_debug_assert(x) __nl_assert(x) 00113 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val) 00114 #else 00115 #define __nl_debug_assert(x) 00116 #define __nl_debug_range_assert(x,min_val,max_val) 00117 #endif 00118 00119 #ifdef NL_PARANOID 00120 #define __nl_parano_assert(x) __nl_assert(x) 00121 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val) 00122 #else 00123 #define __nl_parano_assert(x) 00124 #define __nl_parano_range_assert(x,min_val,max_val) 00125 #endif 00126 00127 /************************************************************************************/ 00128 /* classic macros */ 00129 00130 #ifndef MIN 00131 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 00132 #endif 00133 00134 #ifndef MAX 00135 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 00136 #endif 00137 00138 /************************************************************************************/ 00139 /* memory management */ 00140 00141 #define __NL_NEW(T) (T*)(calloc(1, sizeof(T))) 00142 #define __NL_NEW_ARRAY(T,NB) (T*)(calloc((NB),sizeof(T))) 00143 #define __NL_RENEW_ARRAY(T,x,NB) (T*)(realloc(x,(NB)*sizeof(T))) 00144 #define __NL_DELETE(x) free(x); x = NULL 00145 #define __NL_DELETE_ARRAY(x) free(x); x = NULL 00146 00147 #define __NL_CLEAR(T, x) memset(x, 0, sizeof(T)) 00148 #define __NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (NB)*sizeof(T)) 00149 00150 /************************************************************************************/ 00151 /* Dynamic arrays for sparse row/columns */ 00152 00153 typedef struct { 00154 NLuint index; 00155 NLfloat value; 00156 } __NLCoeff; 00157 00158 typedef struct { 00159 NLuint size; 00160 NLuint capacity; 00161 __NLCoeff* coeff; 00162 } __NLRowColumn; 00163 00164 static void __nlRowColumnConstruct(__NLRowColumn* c) { 00165 c->size = 0; 00166 c->capacity = 0; 00167 c->coeff = NULL; 00168 } 00169 00170 static void __nlRowColumnDestroy(__NLRowColumn* c) { 00171 __NL_DELETE_ARRAY(c->coeff); 00172 #ifdef NL_PARANOID 00173 __NL_CLEAR(__NLRowColumn, c); 00174 #endif 00175 } 00176 00177 static void __nlRowColumnGrow(__NLRowColumn* c) { 00178 if(c->capacity != 0) { 00179 c->capacity = 2 * c->capacity; 00180 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity); 00181 } else { 00182 c->capacity = 4; 00183 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity); 00184 } 00185 } 00186 00187 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) { 00188 NLuint i; 00189 for(i=0; i<c->size; i++) { 00190 if(c->coeff[i].index == (NLuint)index) { 00191 c->coeff[i].value += value; 00192 return; 00193 } 00194 } 00195 if(c->size == c->capacity) { 00196 __nlRowColumnGrow(c); 00197 } 00198 c->coeff[c->size].index = index; 00199 c->coeff[c->size].value = value; 00200 c->size++; 00201 } 00202 00203 /* Does not check whether the index already exists */ 00204 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) { 00205 if(c->size == c->capacity) { 00206 __nlRowColumnGrow(c); 00207 } 00208 c->coeff[c->size].index = index; 00209 c->coeff[c->size].value = value; 00210 c->size++; 00211 } 00212 00213 static void __nlRowColumnClear(__NLRowColumn* c) { 00214 c->size = 0; 00215 c->capacity = 0; 00216 __NL_DELETE_ARRAY(c->coeff); 00217 } 00218 00219 /************************************************************************************/ 00220 /* SparseMatrix data structure */ 00221 00222 #define __NL_ROWS 1 00223 #define __NL_COLUMNS 2 00224 #define __NL_SYMMETRIC 4 00225 00226 typedef struct { 00227 NLuint m; 00228 NLuint n; 00229 NLuint diag_size; 00230 NLenum storage; 00231 __NLRowColumn* row; 00232 __NLRowColumn* column; 00233 NLfloat* diag; 00234 } __NLSparseMatrix; 00235 00236 00237 static void __nlSparseMatrixConstruct( 00238 __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage 00239 ) { 00240 NLuint i; 00241 M->m = m; 00242 M->n = n; 00243 M->storage = storage; 00244 if(storage & __NL_ROWS) { 00245 M->row = __NL_NEW_ARRAY(__NLRowColumn, m); 00246 for(i=0; i<m; i++) { 00247 __nlRowColumnConstruct(&(M->row[i])); 00248 } 00249 } else { 00250 M->row = NULL; 00251 } 00252 00253 if(storage & __NL_COLUMNS) { 00254 M->column = __NL_NEW_ARRAY(__NLRowColumn, n); 00255 for(i=0; i<n; i++) { 00256 __nlRowColumnConstruct(&(M->column[i])); 00257 } 00258 } else { 00259 M->column = NULL; 00260 } 00261 00262 M->diag_size = MIN(m,n); 00263 M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size); 00264 } 00265 00266 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) { 00267 NLuint i; 00268 __NL_DELETE_ARRAY(M->diag); 00269 if(M->storage & __NL_ROWS) { 00270 for(i=0; i<M->m; i++) { 00271 __nlRowColumnDestroy(&(M->row[i])); 00272 } 00273 __NL_DELETE_ARRAY(M->row); 00274 } 00275 if(M->storage & __NL_COLUMNS) { 00276 for(i=0; i<M->n; i++) { 00277 __nlRowColumnDestroy(&(M->column[i])); 00278 } 00279 __NL_DELETE_ARRAY(M->column); 00280 } 00281 #ifdef NL_PARANOID 00282 __NL_CLEAR(__NLSparseMatrix,M); 00283 #endif 00284 } 00285 00286 static void __nlSparseMatrixAdd( 00287 __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value 00288 ) { 00289 __nl_parano_range_assert(i, 0, M->m - 1); 00290 __nl_parano_range_assert(j, 0, M->n - 1); 00291 if((M->storage & __NL_SYMMETRIC) && (j > i)) { 00292 return; 00293 } 00294 if(i == j) { 00295 M->diag[i] += value; 00296 } 00297 if(M->storage & __NL_ROWS) { 00298 __nlRowColumnAdd(&(M->row[i]), j, value); 00299 } 00300 if(M->storage & __NL_COLUMNS) { 00301 __nlRowColumnAdd(&(M->column[j]), i, value); 00302 } 00303 } 00304 00305 static void __nlSparseMatrixClear( __NLSparseMatrix* M) { 00306 NLuint i; 00307 if(M->storage & __NL_ROWS) { 00308 for(i=0; i<M->m; i++) { 00309 __nlRowColumnClear(&(M->row[i])); 00310 } 00311 } 00312 if(M->storage & __NL_COLUMNS) { 00313 for(i=0; i<M->n; i++) { 00314 __nlRowColumnClear(&(M->column[i])); 00315 } 00316 } 00317 __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size); 00318 } 00319 00320 /* Returns the number of non-zero coefficients */ 00321 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) { 00322 NLuint nnz = 0; 00323 NLuint i; 00324 if(M->storage & __NL_ROWS) { 00325 for(i = 0; i<M->m; i++) { 00326 nnz += M->row[i].size; 00327 } 00328 } else if (M->storage & __NL_COLUMNS) { 00329 for(i = 0; i<M->n; i++) { 00330 nnz += M->column[i].size; 00331 } 00332 } else { 00333 __nl_assert_not_reached; 00334 } 00335 return nnz; 00336 } 00337 00338 /************************************************************************************/ 00339 /* SparseMatrix x Vector routines, internal helper routines */ 00340 00341 static void __nlSparseMatrix_mult_rows_symmetric( 00342 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00343 ) { 00344 NLuint m = A->m; 00345 NLuint i,ij; 00346 __NLRowColumn* Ri = NULL; 00347 __NLCoeff* c = NULL; 00348 for(i=0; i<m; i++) { 00349 y[i] = 0; 00350 Ri = &(A->row[i]); 00351 for(ij=0; ij<Ri->size; ij++) { 00352 c = &(Ri->coeff[ij]); 00353 y[i] += c->value * x[c->index]; 00354 if(i != c->index) { 00355 y[c->index] += c->value * x[i]; 00356 } 00357 } 00358 } 00359 } 00360 00361 static void __nlSparseMatrix_mult_rows( 00362 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00363 ) { 00364 NLuint m = A->m; 00365 NLuint i,ij; 00366 __NLRowColumn* Ri = NULL; 00367 __NLCoeff* c = NULL; 00368 for(i=0; i<m; i++) { 00369 y[i] = 0; 00370 Ri = &(A->row[i]); 00371 for(ij=0; ij<Ri->size; ij++) { 00372 c = &(Ri->coeff[ij]); 00373 y[i] += c->value * x[c->index]; 00374 } 00375 } 00376 } 00377 00378 static void __nlSparseMatrix_mult_cols_symmetric( 00379 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00380 ) { 00381 NLuint n = A->n; 00382 NLuint j,ii; 00383 __NLRowColumn* Cj = NULL; 00384 __NLCoeff* c = NULL; 00385 for(j=0; j<n; j++) { 00386 y[j] = 0; 00387 Cj = &(A->column[j]); 00388 for(ii=0; ii<Cj->size; ii++) { 00389 c = &(Cj->coeff[ii]); 00390 y[c->index] += c->value * x[j]; 00391 if(j != c->index) { 00392 y[j] += c->value * x[c->index]; 00393 } 00394 } 00395 } 00396 } 00397 00398 static void __nlSparseMatrix_mult_cols( 00399 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00400 ) { 00401 NLuint n = A->n; 00402 NLuint j,ii; 00403 __NLRowColumn* Cj = NULL; 00404 __NLCoeff* c = NULL; 00405 __NL_CLEAR_ARRAY(NLfloat, y, A->m); 00406 for(j=0; j<n; j++) { 00407 Cj = &(A->column[j]); 00408 for(ii=0; ii<Cj->size; ii++) { 00409 c = &(Cj->coeff[ii]); 00410 y[c->index] += c->value * x[j]; 00411 } 00412 } 00413 } 00414 00415 /************************************************************************************/ 00416 /* SparseMatrix x Vector routines, main driver routine */ 00417 00418 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) { 00419 if(A->storage & __NL_ROWS) { 00420 if(A->storage & __NL_SYMMETRIC) { 00421 __nlSparseMatrix_mult_rows_symmetric(A, x, y); 00422 } else { 00423 __nlSparseMatrix_mult_rows(A, x, y); 00424 } 00425 } else { 00426 if(A->storage & __NL_SYMMETRIC) { 00427 __nlSparseMatrix_mult_cols_symmetric(A, x, y); 00428 } else { 00429 __nlSparseMatrix_mult_cols(A, x, y); 00430 } 00431 } 00432 } 00433 00434 /* ****************** Routines for least squares ******************* */ 00435 00436 static void __nlSparseMatrix_square( 00437 __NLSparseMatrix* AtA, __NLSparseMatrix *A 00438 ) { 00439 NLuint m = A->m; 00440 NLuint n = A->n; 00441 NLuint i, j0, j1; 00442 __NLRowColumn *Ri = NULL; 00443 __NLCoeff *c0 = NULL, *c1 = NULL; 00444 float value; 00445 00446 __nlSparseMatrixConstruct(AtA, n, n, A->storage); 00447 00448 for(i=0; i<m; i++) { 00449 Ri = &(A->row[i]); 00450 00451 for(j0=0; j0<Ri->size; j0++) { 00452 c0 = &(Ri->coeff[j0]); 00453 for(j1=0; j1<Ri->size; j1++) { 00454 c1 = &(Ri->coeff[j1]); 00455 00456 value = c0->value*c1->value; 00457 __nlSparseMatrixAdd(AtA, c0->index, c1->index, value); 00458 } 00459 } 00460 } 00461 } 00462 00463 static void __nlSparseMatrix_transpose_mult_rows( 00464 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00465 ) { 00466 NLuint m = A->m; 00467 NLuint n = A->n; 00468 NLuint i,ij; 00469 __NLRowColumn* Ri = NULL; 00470 __NLCoeff* c = NULL; 00471 00472 __NL_CLEAR_ARRAY(NLfloat, y, n); 00473 00474 for(i=0; i<m; i++) { 00475 Ri = &(A->row[i]); 00476 for(ij=0; ij<Ri->size; ij++) { 00477 c = &(Ri->coeff[ij]); 00478 y[c->index] += c->value * x[i]; 00479 } 00480 } 00481 } 00482 00483 /************************************************************************************/ 00484 /* NLContext data structure */ 00485 00486 typedef void(*__NLMatrixFunc)(float* x, float* y); 00487 00488 typedef struct { 00489 NLfloat value[4]; 00490 NLboolean locked; 00491 NLuint index; 00492 __NLRowColumn *a; 00493 } __NLVariable; 00494 00495 #define __NL_STATE_INITIAL 0 00496 #define __NL_STATE_SYSTEM 1 00497 #define __NL_STATE_MATRIX 2 00498 #define __NL_STATE_MATRIX_CONSTRUCTED 3 00499 #define __NL_STATE_SYSTEM_CONSTRUCTED 4 00500 #define __NL_STATE_SYSTEM_SOLVED 5 00501 00502 typedef struct { 00503 NLenum state; 00504 NLuint n; 00505 NLuint m; 00506 __NLVariable* variable; 00507 NLfloat* b; 00508 NLfloat* Mtb; 00509 __NLSparseMatrix M; 00510 __NLSparseMatrix MtM; 00511 NLfloat* x; 00512 NLuint nb_variables; 00513 NLuint nb_rows; 00514 NLboolean least_squares; 00515 NLboolean symmetric; 00516 NLuint nb_rhs; 00517 NLboolean solve_again; 00518 NLboolean alloc_M; 00519 NLboolean alloc_MtM; 00520 NLboolean alloc_variable; 00521 NLboolean alloc_x; 00522 NLboolean alloc_b; 00523 NLboolean alloc_Mtb; 00524 NLfloat error; 00525 __NLMatrixFunc matrix_vector_prod; 00526 00527 struct __NLSuperLUContext { 00528 NLboolean alloc_slu; 00529 SuperMatrix L, U; 00530 NLint *perm_c, *perm_r; 00531 SuperLUStat_t stat; 00532 } slu; 00533 } __NLContext; 00534 00535 static __NLContext* __nlCurrentContext = NULL; 00536 00537 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) { 00538 __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y); 00539 } 00540 00541 00542 NLContext nlNewContext(void) { 00543 __NLContext* result = __NL_NEW(__NLContext); 00544 result->state = __NL_STATE_INITIAL; 00545 result->matrix_vector_prod = __nlMatrixVectorProd_default; 00546 result->nb_rhs = 1; 00547 nlMakeCurrent(result); 00548 return result; 00549 } 00550 00551 static void __nlFree_SUPERLU(__NLContext *context); 00552 00553 void nlDeleteContext(NLContext context_in) { 00554 __NLContext* context = (__NLContext*)(context_in); 00555 int i; 00556 00557 if(__nlCurrentContext == context) { 00558 __nlCurrentContext = NULL; 00559 } 00560 if(context->alloc_M) { 00561 __nlSparseMatrixDestroy(&context->M); 00562 } 00563 if(context->alloc_MtM) { 00564 __nlSparseMatrixDestroy(&context->MtM); 00565 } 00566 if(context->alloc_variable) { 00567 for(i=0; i<context->nb_variables; i++) { 00568 if(context->variable[i].a) { 00569 __nlRowColumnDestroy(context->variable[i].a); 00570 __NL_DELETE(context->variable[i].a); 00571 } 00572 } 00573 } 00574 if(context->alloc_b) { 00575 __NL_DELETE_ARRAY(context->b); 00576 } 00577 if(context->alloc_Mtb) { 00578 __NL_DELETE_ARRAY(context->Mtb); 00579 } 00580 if(context->alloc_x) { 00581 __NL_DELETE_ARRAY(context->x); 00582 } 00583 if (context->slu.alloc_slu) { 00584 __nlFree_SUPERLU(context); 00585 } 00586 00587 #ifdef NL_PARANOID 00588 __NL_CLEAR(__NLContext, context); 00589 #endif 00590 __NL_DELETE(context); 00591 } 00592 00593 void nlMakeCurrent(NLContext context) { 00594 __nlCurrentContext = (__NLContext*)(context); 00595 } 00596 00597 NLContext nlGetCurrent(void) { 00598 return __nlCurrentContext; 00599 } 00600 00601 static void __nlCheckState(NLenum state) { 00602 __nl_assert(__nlCurrentContext->state == state); 00603 } 00604 00605 static void __nlTransition(NLenum from_state, NLenum to_state) { 00606 __nlCheckState(from_state); 00607 __nlCurrentContext->state = to_state; 00608 } 00609 00610 /************************************************************************************/ 00611 /* Get/Set parameters */ 00612 00613 void nlSolverParameterf(NLenum pname, NLfloat param) { 00614 __nlCheckState(__NL_STATE_INITIAL); 00615 switch(pname) { 00616 case NL_NB_VARIABLES: { 00617 __nl_assert(param > 0); 00618 __nlCurrentContext->nb_variables = (NLuint)param; 00619 } break; 00620 case NL_NB_ROWS: { 00621 __nl_assert(param > 0); 00622 __nlCurrentContext->nb_rows = (NLuint)param; 00623 } break; 00624 case NL_LEAST_SQUARES: { 00625 __nlCurrentContext->least_squares = (NLboolean)param; 00626 } break; 00627 case NL_SYMMETRIC: { 00628 __nlCurrentContext->symmetric = (NLboolean)param; 00629 } break; 00630 case NL_NB_RIGHT_HAND_SIDES: { 00631 __nlCurrentContext->nb_rhs = (NLuint)param; 00632 } break; 00633 default: { 00634 __nl_assert_not_reached; 00635 } break; 00636 } 00637 } 00638 00639 void nlSolverParameteri(NLenum pname, NLint param) { 00640 __nlCheckState(__NL_STATE_INITIAL); 00641 switch(pname) { 00642 case NL_NB_VARIABLES: { 00643 __nl_assert(param > 0); 00644 __nlCurrentContext->nb_variables = (NLuint)param; 00645 } break; 00646 case NL_NB_ROWS: { 00647 __nl_assert(param > 0); 00648 __nlCurrentContext->nb_rows = (NLuint)param; 00649 } break; 00650 case NL_LEAST_SQUARES: { 00651 __nlCurrentContext->least_squares = (NLboolean)param; 00652 } break; 00653 case NL_SYMMETRIC: { 00654 __nlCurrentContext->symmetric = (NLboolean)param; 00655 } break; 00656 case NL_NB_RIGHT_HAND_SIDES: { 00657 __nlCurrentContext->nb_rhs = (NLuint)param; 00658 } break; 00659 default: { 00660 __nl_assert_not_reached; 00661 } break; 00662 } 00663 } 00664 00665 void nlGetBooleanv(NLenum pname, NLboolean* params) { 00666 switch(pname) { 00667 case NL_LEAST_SQUARES: { 00668 *params = __nlCurrentContext->least_squares; 00669 } break; 00670 case NL_SYMMETRIC: { 00671 *params = __nlCurrentContext->symmetric; 00672 } break; 00673 default: { 00674 __nl_assert_not_reached; 00675 } break; 00676 } 00677 } 00678 00679 void nlGetFloatv(NLenum pname, NLfloat* params) { 00680 switch(pname) { 00681 case NL_NB_VARIABLES: { 00682 *params = (NLfloat)(__nlCurrentContext->nb_variables); 00683 } break; 00684 case NL_NB_ROWS: { 00685 *params = (NLfloat)(__nlCurrentContext->nb_rows); 00686 } break; 00687 case NL_LEAST_SQUARES: { 00688 *params = (NLfloat)(__nlCurrentContext->least_squares); 00689 } break; 00690 case NL_SYMMETRIC: { 00691 *params = (NLfloat)(__nlCurrentContext->symmetric); 00692 } break; 00693 case NL_ERROR: { 00694 *params = (NLfloat)(__nlCurrentContext->error); 00695 } break; 00696 default: { 00697 __nl_assert_not_reached; 00698 } break; 00699 } 00700 } 00701 00702 void nlGetIntergerv(NLenum pname, NLint* params) { 00703 switch(pname) { 00704 case NL_NB_VARIABLES: { 00705 *params = (NLint)(__nlCurrentContext->nb_variables); 00706 } break; 00707 case NL_NB_ROWS: { 00708 *params = (NLint)(__nlCurrentContext->nb_rows); 00709 } break; 00710 case NL_LEAST_SQUARES: { 00711 *params = (NLint)(__nlCurrentContext->least_squares); 00712 } break; 00713 case NL_SYMMETRIC: { 00714 *params = (NLint)(__nlCurrentContext->symmetric); 00715 } break; 00716 default: { 00717 __nl_assert_not_reached; 00718 } break; 00719 } 00720 } 00721 00722 /************************************************************************************/ 00723 /* Enable / Disable */ 00724 00725 void nlEnable(NLenum pname) { 00726 switch(pname) { 00727 default: { 00728 __nl_assert_not_reached; 00729 } 00730 } 00731 } 00732 00733 void nlDisable(NLenum pname) { 00734 switch(pname) { 00735 default: { 00736 __nl_assert_not_reached; 00737 } 00738 } 00739 } 00740 00741 NLboolean nlIsEnabled(NLenum pname) { 00742 switch(pname) { 00743 default: { 00744 __nl_assert_not_reached; 00745 } 00746 } 00747 return NL_FALSE; 00748 } 00749 00750 /************************************************************************************/ 00751 /* Get/Set Lock/Unlock variables */ 00752 00753 void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) { 00754 __nlCheckState(__NL_STATE_SYSTEM); 00755 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00756 __nlCurrentContext->variable[index].value[rhsindex] = value; 00757 } 00758 00759 NLfloat nlGetVariable(NLuint rhsindex, NLuint index) { 00760 __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL); 00761 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00762 return __nlCurrentContext->variable[index].value[rhsindex]; 00763 } 00764 00765 void nlLockVariable(NLuint index) { 00766 __nlCheckState(__NL_STATE_SYSTEM); 00767 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00768 __nlCurrentContext->variable[index].locked = NL_TRUE; 00769 } 00770 00771 void nlUnlockVariable(NLuint index) { 00772 __nlCheckState(__NL_STATE_SYSTEM); 00773 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00774 __nlCurrentContext->variable[index].locked = NL_FALSE; 00775 } 00776 00777 NLboolean nlVariableIsLocked(NLuint index) { 00778 __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL); 00779 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00780 return __nlCurrentContext->variable[index].locked; 00781 } 00782 00783 /************************************************************************************/ 00784 /* System construction */ 00785 00786 static void __nlVariablesToVector() { 00787 __NLContext *context = __nlCurrentContext; 00788 NLuint i, j, nb_rhs; 00789 00790 __nl_assert(context->alloc_x); 00791 __nl_assert(context->alloc_variable); 00792 00793 nb_rhs= context->nb_rhs; 00794 00795 for(i=0; i<context->nb_variables; i++) { 00796 __NLVariable* v = &(context->variable[i]); 00797 if(!v->locked) { 00798 __nl_assert(v->index < context->n); 00799 00800 for(j=0; j<nb_rhs; j++) 00801 context->x[context->n*j + v->index] = v->value[j]; 00802 } 00803 } 00804 } 00805 00806 static void __nlVectorToVariables() { 00807 __NLContext *context = __nlCurrentContext; 00808 NLuint i, j, nb_rhs; 00809 00810 __nl_assert(context->alloc_x); 00811 __nl_assert(context->alloc_variable); 00812 00813 nb_rhs= context->nb_rhs; 00814 00815 for(i=0; i<context->nb_variables; i++) { 00816 __NLVariable* v = &(context->variable[i]); 00817 if(!v->locked) { 00818 __nl_assert(v->index < context->n); 00819 00820 for(j=0; j<nb_rhs; j++) 00821 v->value[j] = context->x[context->n*j + v->index]; 00822 } 00823 } 00824 } 00825 00826 static void __nlBeginSystem() { 00827 __nl_assert(__nlCurrentContext->nb_variables > 0); 00828 00829 if (__nlCurrentContext->solve_again) 00830 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM); 00831 else { 00832 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM); 00833 00834 __nlCurrentContext->variable = __NL_NEW_ARRAY( 00835 __NLVariable, __nlCurrentContext->nb_variables); 00836 00837 __nlCurrentContext->alloc_variable = NL_TRUE; 00838 } 00839 } 00840 00841 static void __nlEndSystem() { 00842 __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED); 00843 } 00844 00845 static void __nlBeginMatrix() { 00846 NLuint i; 00847 NLuint m = 0, n = 0; 00848 NLenum storage = __NL_ROWS; 00849 __NLContext *context = __nlCurrentContext; 00850 00851 __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX); 00852 00853 if (!context->solve_again) { 00854 for(i=0; i<context->nb_variables; i++) { 00855 if(context->variable[i].locked) { 00856 context->variable[i].index = ~0; 00857 context->variable[i].a = __NL_NEW(__NLRowColumn); 00858 __nlRowColumnConstruct(context->variable[i].a); 00859 } 00860 else 00861 context->variable[i].index = n++; 00862 } 00863 00864 m = (context->nb_rows == 0)? n: context->nb_rows; 00865 00866 context->m = m; 00867 context->n = n; 00868 00869 __nlSparseMatrixConstruct(&context->M, m, n, storage); 00870 context->alloc_M = NL_TRUE; 00871 00872 context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs); 00873 context->alloc_b = NL_TRUE; 00874 00875 context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs); 00876 context->alloc_x = NL_TRUE; 00877 } 00878 else { 00879 /* need to recompute b only, A is not constructed anymore */ 00880 __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs); 00881 } 00882 00883 __nlVariablesToVector(); 00884 } 00885 00886 static void __nlEndMatrixRHS(NLuint rhs) { 00887 __NLContext *context = __nlCurrentContext; 00888 __NLVariable *variable; 00889 __NLRowColumn *a; 00890 NLfloat *b, *Mtb; 00891 NLuint i, j; 00892 00893 b = context->b + context->m*rhs; 00894 Mtb = context->Mtb + context->n*rhs; 00895 00896 for(i=0; i<__nlCurrentContext->nb_variables; i++) { 00897 variable = &(context->variable[i]); 00898 00899 if(variable->locked) { 00900 a = variable->a; 00901 00902 for(j=0; j<a->size; j++) { 00903 b[a->coeff[j].index] -= a->coeff[j].value*variable->value[rhs]; 00904 } 00905 } 00906 } 00907 00908 if(context->least_squares) 00909 __nlSparseMatrix_transpose_mult_rows(&context->M, b, Mtb); 00910 } 00911 00912 static void __nlEndMatrix() { 00913 __NLContext *context = __nlCurrentContext; 00914 NLuint i; 00915 00916 __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED); 00917 00918 if(context->least_squares) { 00919 if(!__nlCurrentContext->solve_again) { 00920 __nlSparseMatrix_square(&context->MtM, &context->M); 00921 context->alloc_MtM = NL_TRUE; 00922 00923 context->Mtb = 00924 __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs); 00925 context->alloc_Mtb = NL_TRUE; 00926 } 00927 } 00928 00929 for(i=0; i<context->nb_rhs; i++) 00930 __nlEndMatrixRHS(i); 00931 } 00932 00933 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value) 00934 { 00935 __NLContext *context = __nlCurrentContext; 00936 00937 __nlCheckState(__NL_STATE_MATRIX); 00938 00939 if(context->solve_again) 00940 return; 00941 00942 if (!context->least_squares && context->variable[row].locked); 00943 else if (context->variable[col].locked) { 00944 if(!context->least_squares) 00945 row = context->variable[row].index; 00946 __nlRowColumnAppend(context->variable[col].a, row, value); 00947 } 00948 else { 00949 __NLSparseMatrix* M = &context->M; 00950 00951 if(!context->least_squares) 00952 row = context->variable[row].index; 00953 col = context->variable[col].index; 00954 00955 __nl_range_assert(row, 0, context->m - 1); 00956 __nl_range_assert(col, 0, context->n - 1); 00957 00958 __nlSparseMatrixAdd(M, row, col, value); 00959 } 00960 } 00961 00962 void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value) 00963 { 00964 __NLContext *context = __nlCurrentContext; 00965 NLfloat* b = context->b; 00966 00967 __nlCheckState(__NL_STATE_MATRIX); 00968 00969 if(context->least_squares) { 00970 __nl_range_assert(index, 0, context->m - 1); 00971 b[rhsindex*context->m + index] += value; 00972 } 00973 else { 00974 if(!context->variable[index].locked) { 00975 index = context->variable[index].index; 00976 __nl_range_assert(index, 0, context->m - 1); 00977 00978 b[rhsindex*context->m + index] += value; 00979 } 00980 } 00981 } 00982 00983 void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value) 00984 { 00985 __NLContext *context = __nlCurrentContext; 00986 NLfloat* b = context->b; 00987 00988 __nlCheckState(__NL_STATE_MATRIX); 00989 00990 if(context->least_squares) { 00991 __nl_range_assert(index, 0, context->m - 1); 00992 b[rhsindex*context->m + index] = value; 00993 } 00994 else { 00995 if(!context->variable[index].locked) { 00996 index = context->variable[index].index; 00997 __nl_range_assert(index, 0, context->m - 1); 00998 00999 b[rhsindex*context->m + index] = value; 01000 } 01001 } 01002 } 01003 01004 void nlBegin(NLenum prim) { 01005 switch(prim) { 01006 case NL_SYSTEM: { 01007 __nlBeginSystem(); 01008 } break; 01009 case NL_MATRIX: { 01010 __nlBeginMatrix(); 01011 } break; 01012 default: { 01013 __nl_assert_not_reached; 01014 } 01015 } 01016 } 01017 01018 void nlEnd(NLenum prim) { 01019 switch(prim) { 01020 case NL_SYSTEM: { 01021 __nlEndSystem(); 01022 } break; 01023 case NL_MATRIX: { 01024 __nlEndMatrix(); 01025 } break; 01026 default: { 01027 __nl_assert_not_reached; 01028 } 01029 } 01030 } 01031 01032 /************************************************************************/ 01033 /* SuperLU wrapper */ 01034 01035 /* Note: SuperLU is difficult to call, but it is worth it. */ 01036 /* Here is a driver inspired by A. Sheffer's "cow flattener". */ 01037 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) { 01038 01039 /* OpenNL Context */ 01040 __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M; 01041 NLuint n = context->n; 01042 NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */ 01043 01044 /* Compressed Row Storage matrix representation */ 01045 NLint *xa = __NL_NEW_ARRAY(NLint, n+1); 01046 NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n); 01047 NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz); 01048 NLint *asub = __NL_NEW_ARRAY(NLint, nnz); 01049 NLint *etree = __NL_NEW_ARRAY(NLint, n); 01050 01051 /* SuperLU variables */ 01052 SuperMatrix At, AtP; 01053 NLint info, panel_size, relax; 01054 superlu_options_t options; 01055 01056 /* Temporary variables */ 01057 NLuint i, jj, count; 01058 01059 __nl_assert(!(M->storage & __NL_SYMMETRIC)); 01060 __nl_assert(M->storage & __NL_ROWS); 01061 __nl_assert(M->m == M->n); 01062 01063 /* Convert M to compressed column format */ 01064 for(i=0, count=0; i<n; i++) { 01065 __NLRowColumn *Ri = M->row + i; 01066 xa[i] = count; 01067 01068 for(jj=0; jj<Ri->size; jj++, count++) { 01069 a[count] = Ri->coeff[jj].value; 01070 asub[count] = Ri->coeff[jj].index; 01071 } 01072 } 01073 xa[n] = nnz; 01074 01075 /* Free M, don't need it anymore at this point */ 01076 __nlSparseMatrixClear(M); 01077 01078 /* Create superlu A matrix transposed */ 01079 sCreate_CompCol_Matrix( 01080 &At, n, n, nnz, a, asub, xa, 01081 SLU_NC, /* Colum wise, no supernode */ 01082 SLU_S, /* floats */ 01083 SLU_GE /* general storage */ 01084 ); 01085 01086 /* Set superlu options */ 01087 set_default_options(&options); 01088 options.ColPerm = MY_PERMC; 01089 options.Fact = DOFACT; 01090 01091 StatInit(&(context->slu.stat)); 01092 01093 panel_size = sp_ienv(1); /* sp_ienv give us the defaults */ 01094 relax = sp_ienv(2); 01095 01096 /* Compute permutation and permuted matrix */ 01097 context->slu.perm_r = __NL_NEW_ARRAY(NLint, n); 01098 context->slu.perm_c = __NL_NEW_ARRAY(NLint, n); 01099 01100 if ((permutation == NULL) || (*permutation == -1)) { 01101 get_perm_c(3, &At, context->slu.perm_c); 01102 01103 if (permutation) 01104 memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n); 01105 } 01106 else 01107 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n); 01108 01109 sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP); 01110 01111 /* Decompose into L and U */ 01112 sgstrf(&options, &AtP, relax, panel_size, 01113 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r, 01114 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info); 01115 01116 /* Cleanup */ 01117 01118 Destroy_SuperMatrix_Store(&At); 01119 Destroy_CompCol_Permuted(&AtP); 01120 01121 __NL_DELETE_ARRAY(etree); 01122 __NL_DELETE_ARRAY(xa); 01123 __NL_DELETE_ARRAY(rhs); 01124 __NL_DELETE_ARRAY(a); 01125 __NL_DELETE_ARRAY(asub); 01126 01127 context->slu.alloc_slu = NL_TRUE; 01128 01129 return (info == 0); 01130 } 01131 01132 static NLboolean __nlInvert_SUPERLU(__NLContext *context) { 01133 01134 /* OpenNL Context */ 01135 NLfloat* b = (context->least_squares)? context->Mtb: context->b; 01136 NLfloat* x = context->x; 01137 NLuint n = context->n, j; 01138 01139 /* SuperLU variables */ 01140 SuperMatrix B; 01141 NLint info; 01142 01143 for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) { 01144 /* Create superlu array for B */ 01145 sCreate_Dense_Matrix( 01146 &B, n, 1, b, n, 01147 SLU_DN, /* Fortran-type column-wise storage */ 01148 SLU_S, /* floats */ 01149 SLU_GE /* general */ 01150 ); 01151 01152 /* Forward/Back substitution to compute x */ 01153 sgstrs(TRANS, &(context->slu.L), &(context->slu.U), 01154 context->slu.perm_c, context->slu.perm_r, &B, 01155 &(context->slu.stat), &info); 01156 01157 if(info == 0) 01158 memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n); 01159 01160 Destroy_SuperMatrix_Store(&B); 01161 } 01162 01163 return (info == 0); 01164 } 01165 01166 static void __nlFree_SUPERLU(__NLContext *context) { 01167 01168 Destroy_SuperNode_Matrix(&(context->slu.L)); 01169 Destroy_CompCol_Matrix(&(context->slu.U)); 01170 01171 StatFree(&(context->slu.stat)); 01172 01173 __NL_DELETE_ARRAY(context->slu.perm_r); 01174 __NL_DELETE_ARRAY(context->slu.perm_c); 01175 01176 context->slu.alloc_slu = NL_FALSE; 01177 } 01178 01179 void nlPrintMatrix(void) { 01180 __NLContext *context = __nlCurrentContext; 01181 __NLSparseMatrix* M = &(context->M); 01182 __NLSparseMatrix* MtM = &(context->MtM); 01183 float *b = context->b; 01184 NLuint i, jj, k; 01185 NLuint m = context->m; 01186 NLuint n = context->n; 01187 __NLRowColumn* Ri = NULL; 01188 float *value = malloc(sizeof(*value)*(n+m)); 01189 01190 printf("A:\n"); 01191 for(i=0; i<m; i++) { 01192 Ri = &(M->row[i]); 01193 01194 memset(value, 0.0, sizeof(*value)*n); 01195 for(jj=0; jj<Ri->size; jj++) 01196 value[Ri->coeff[jj].index] = Ri->coeff[jj].value; 01197 01198 for (k = 0; k<n; k++) 01199 printf("%.3f ", value[k]); 01200 printf("\n"); 01201 } 01202 01203 for(k=0; k<context->nb_rhs; k++) { 01204 printf("b (%d):\n", k); 01205 for(i=0; i<n; i++) 01206 printf("%f ", b[context->n*k + i]); 01207 printf("\n"); 01208 } 01209 01210 if(context->alloc_MtM) { 01211 printf("AtA:\n"); 01212 for(i=0; i<n; i++) { 01213 Ri = &(MtM->row[i]); 01214 01215 memset(value, 0.0, sizeof(*value)*m); 01216 for(jj=0; jj<Ri->size; jj++) 01217 value[Ri->coeff[jj].index] = Ri->coeff[jj].value; 01218 01219 for (k = 0; k<n; k++) 01220 printf("%.3f ", value[k]); 01221 printf("\n"); 01222 } 01223 01224 for(k=0; k<context->nb_rhs; k++) { 01225 printf("Mtb (%d):\n", k); 01226 for(i=0; i<n; i++) 01227 printf("%f ", context->Mtb[context->n*k + i]); 01228 printf("\n"); 01229 } 01230 printf("\n"); 01231 } 01232 01233 free(value); 01234 } 01235 01236 /************************************************************************/ 01237 /* nlSolve() driver routine */ 01238 01239 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) { 01240 NLboolean result = NL_TRUE; 01241 01242 __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED); 01243 01244 if (!__nlCurrentContext->solve_again) 01245 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation); 01246 01247 if (result) { 01248 result = __nlInvert_SUPERLU(__nlCurrentContext); 01249 01250 if (result) { 01251 __nlVectorToVariables(); 01252 01253 if (solveAgain) 01254 __nlCurrentContext->solve_again = NL_TRUE; 01255 01256 __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED); 01257 } 01258 } 01259 01260 return result; 01261 } 01262 01263 NLboolean nlSolve() { 01264 return nlSolveAdvanced(NULL, NL_FALSE); 01265 } 01266