|
Blender
V2.59
|
00001 /* 00002 * $Id: occlusion.c 35943 2011-04-01 16:01:29Z blendix $ 00003 * 00004 * ***** BEGIN GPL LICENSE BLOCK ***** 00005 * 00006 * This program is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU General Public License 00008 * as published by the Free Software Foundation; either version 2 00009 * of the License, or (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software Foundation, 00018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00019 * 00020 * The Original Code is Copyright (C) 2008 Blender Foundation. 00021 * All rights reserved. 00022 * 00023 * The Original Code is: all of this file. 00024 * 00025 * Contributor(s): Brecht Van Lommel. 00026 * 00027 * ***** END GPL LICENSE BLOCK ***** 00028 */ 00029 00035 #include <math.h> 00036 #include <stdio.h> 00037 #include <stdlib.h> 00038 #include <string.h> 00039 00040 #include "MEM_guardedalloc.h" 00041 00042 #include "DNA_material_types.h" 00043 00044 #include "BLI_math.h" 00045 #include "BLI_blenlib.h" 00046 #include "BLI_memarena.h" 00047 #include "BLI_threads.h" 00048 #include "BLI_utildefines.h" 00049 00050 #include "BKE_global.h" 00051 #include "BKE_scene.h" 00052 00053 00054 #include "RE_shader_ext.h" 00055 00056 /* local includes */ 00057 #include "occlusion.h" 00058 #include "render_types.h" 00059 #include "rendercore.h" 00060 #include "renderdatabase.h" 00061 #include "pixelshading.h" 00062 #include "shading.h" 00063 #include "zbuf.h" 00064 00065 /* ------------------------- Declarations --------------------------- */ 00066 00067 #define INVALID_INDEX ((int)(~0)) 00068 #define INVPI 0.31830988618379069f 00069 #define TOTCHILD 8 00070 #define CACHE_STEP 3 00071 00072 typedef struct OcclusionCacheSample { 00073 float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2; 00074 int x, y, filled; 00075 } OcclusionCacheSample; 00076 00077 typedef struct OcclusionCache { 00078 OcclusionCacheSample *sample; 00079 int x, y, w, h, step; 00080 } OcclusionCache; 00081 00082 typedef struct OccFace { 00083 int obi; 00084 int facenr; 00085 } OccFace; 00086 00087 typedef struct OccNode { 00088 float co[3], area; 00089 float sh[9], dco; 00090 float occlusion, rad[3]; 00091 int childflag; 00092 union { 00093 //OccFace face; 00094 int face; 00095 struct OccNode *node; 00096 } child[TOTCHILD]; 00097 } OccNode; 00098 00099 typedef struct OcclusionTree { 00100 MemArena *arena; 00101 00102 float (*co)[3]; /* temporary during build */ 00103 00104 OccFace *face; /* instance and face indices */ 00105 float *occlusion; /* occlusion for faces */ 00106 float (*rad)[3]; /* radiance for faces */ 00107 00108 OccNode *root; 00109 00110 OccNode **stack[BLENDER_MAX_THREADS]; 00111 int maxdepth; 00112 00113 int totface; 00114 00115 float error; 00116 float distfac; 00117 00118 int dothreadedbuild; 00119 int totbuildthread; 00120 int doindirect; 00121 00122 OcclusionCache *cache; 00123 } OcclusionTree; 00124 00125 typedef struct OcclusionThread { 00126 Render *re; 00127 StrandSurface *mesh; 00128 float (*faceao)[3]; 00129 float (*faceenv)[3]; 00130 float (*faceindirect)[3]; 00131 int begin, end; 00132 int thread; 00133 } OcclusionThread; 00134 00135 typedef struct OcclusionBuildThread { 00136 OcclusionTree *tree; 00137 int begin, end, depth; 00138 OccNode *node; 00139 } OcclusionBuildThread; 00140 00141 /* ------------------------- Shading --------------------------- */ 00142 00143 extern Render R; // meh 00144 00145 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad) 00146 { 00147 ShadeInput *shi= ssamp->shi; 00148 ShadeResult *shr= ssamp->shr; 00149 float l, u, v, *v1, *v2, *v3; 00150 00151 /* init */ 00152 if(vlr->v4) { 00153 shi->u= u= 0.5f; 00154 shi->v= v= 0.5f; 00155 } 00156 else { 00157 shi->u= u= 1.0f/3.0f; 00158 shi->v= v= 1.0f/3.0f; 00159 } 00160 00161 /* setup render coordinates */ 00162 v1= vlr->v1->co; 00163 v2= vlr->v2->co; 00164 v3= vlr->v3->co; 00165 00166 /* renderco */ 00167 l= 1.0f-u-v; 00168 00169 shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0]; 00170 shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1]; 00171 shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2]; 00172 00173 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2); 00174 00175 /* set up view vector */ 00176 VECCOPY(shi->view, shi->co); 00177 normalize_v3(shi->view); 00178 00179 /* cache for shadow */ 00180 shi->samplenr++; 00181 00182 shi->xs= 0; // TODO 00183 shi->ys= 0; 00184 00185 shade_input_set_normals(shi); 00186 00187 /* no normal flip */ 00188 if(shi->flippednor) 00189 shade_input_flip_normals(shi); 00190 00191 madd_v3_v3fl(shi->co, shi->vn, 0.0001f); /* ugly.. */ 00192 00193 /* not a pretty solution, but fixes common cases */ 00194 if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) { 00195 negate_v3(shi->vn); 00196 negate_v3(shi->vno); 00197 negate_v3(shi->nmapnorm); 00198 } 00199 00200 /* init material vars */ 00201 // note, keep this synced with render_types.h 00202 memcpy(&shi->r, &shi->mat->r, 23*sizeof(float)); 00203 shi->har= shi->mat->har; 00204 00205 /* render */ 00206 shade_input_set_shade_texco(shi); 00207 shade_material_loop(shi, shr); /* todo: nodes */ 00208 00209 VECCOPY(rad, shr->combined); 00210 } 00211 00212 static void occ_build_shade(Render *re, OcclusionTree *tree) 00213 { 00214 ShadeSample ssamp; 00215 ObjectInstanceRen *obi; 00216 VlakRen *vlr; 00217 int a; 00218 00219 R= *re; 00220 00221 /* setup shade sample with correct passes */ 00222 memset(&ssamp, 0, sizeof(ShadeSample)); 00223 ssamp.shi[0].lay= re->lay; 00224 ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA; 00225 ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC); 00226 ssamp.tot= 1; 00227 00228 for(a=0; a<tree->totface; a++) { 00229 obi= &R.objectinstance[tree->face[a].obi]; 00230 vlr= RE_findOrAddVlak(obi->obr, tree->face[a].facenr); 00231 00232 occ_shade(&ssamp, obi, vlr, tree->rad[a]); 00233 } 00234 } 00235 00236 /* ------------------------- Spherical Harmonics --------------------------- */ 00237 00238 /* Use 2nd order SH => 9 coefficients, stored in this order: 00239 0 = (0,0), 00240 1 = (1,-1), 2 = (1,0), 3 = (1,1), 00241 4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */ 00242 00243 static void sh_copy(float *shresult, float *sh) 00244 { 00245 memcpy(shresult, sh, sizeof(float)*9); 00246 } 00247 00248 static void sh_mul(float *sh, float f) 00249 { 00250 int i; 00251 00252 for(i=0; i<9; i++) 00253 sh[i] *= f; 00254 } 00255 00256 static void sh_add(float *shresult, float *sh1, float *sh2) 00257 { 00258 int i; 00259 00260 for(i=0; i<9; i++) 00261 shresult[i]= sh1[i] + sh2[i]; 00262 } 00263 00264 static void sh_from_disc(float *n, float area, float *shresult) 00265 { 00266 /* See formula (3) in: 00267 "An Efficient Representation for Irradiance Environment Maps" */ 00268 float sh[9], x, y, z; 00269 00270 x= n[0]; 00271 y= n[1]; 00272 z= n[2]; 00273 00274 sh[0]= 0.282095f; 00275 00276 sh[1]= 0.488603f*y; 00277 sh[2]= 0.488603f*z; 00278 sh[3]= 0.488603f*x; 00279 00280 sh[4]= 1.092548f*x*y; 00281 sh[5]= 1.092548f*y*z; 00282 sh[6]= 0.315392f*(3.0f*z*z - 1.0f); 00283 sh[7]= 1.092548f*x*z; 00284 sh[8]= 0.546274f*(x*x - y*y); 00285 00286 sh_mul(sh, area); 00287 sh_copy(shresult, sh); 00288 } 00289 00290 static float sh_eval(float *sh, float *v) 00291 { 00292 /* See formula (13) in: 00293 "An Efficient Representation for Irradiance Environment Maps" */ 00294 static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f; 00295 static const float c4 = 0.886227f, c5 = 0.247708f; 00296 float x, y, z, sum; 00297 00298 x= v[0]; 00299 y= v[1]; 00300 z= v[2]; 00301 00302 sum= c1*sh[8]*(x*x - y*y); 00303 sum += c3*sh[6]*z*z; 00304 sum += c4*sh[0]; 00305 sum += -c5*sh[6]; 00306 sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z); 00307 sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z); 00308 00309 return sum; 00310 } 00311 00312 /* ------------------------------ Building --------------------------------- */ 00313 00314 static void occ_face(const OccFace *face, float co[3], float normal[3], float *area) 00315 { 00316 ObjectInstanceRen *obi; 00317 VlakRen *vlr; 00318 float v1[3], v2[3], v3[3], v4[3]; 00319 00320 obi= &R.objectinstance[face->obi]; 00321 vlr= RE_findOrAddVlak(obi->obr, face->facenr); 00322 00323 if(co) { 00324 if(vlr->v4) 00325 interp_v3_v3v3(co, vlr->v1->co, vlr->v3->co, 0.5f); 00326 else 00327 cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co); 00328 00329 if(obi->flag & R_TRANSFORMED) 00330 mul_m4_v3(obi->mat, co); 00331 } 00332 00333 if(normal) { 00334 normal[0]= -vlr->n[0]; 00335 normal[1]= -vlr->n[1]; 00336 normal[2]= -vlr->n[2]; 00337 00338 if(obi->flag & R_TRANSFORMED) 00339 mul_m3_v3(obi->nmat, normal); 00340 } 00341 00342 if(area) { 00343 VECCOPY(v1, vlr->v1->co); 00344 VECCOPY(v2, vlr->v2->co); 00345 VECCOPY(v3, vlr->v3->co); 00346 if(vlr->v4) VECCOPY(v4, vlr->v4->co); 00347 00348 if(obi->flag & R_TRANSFORMED) { 00349 mul_m4_v3(obi->mat, v1); 00350 mul_m4_v3(obi->mat, v2); 00351 mul_m4_v3(obi->mat, v3); 00352 if(vlr->v4) mul_m4_v3(obi->mat, v4); 00353 } 00354 00355 /* todo: correct area for instances */ 00356 if(vlr->v4) 00357 *area= area_quad_v3(v1, v2, v3, v4); 00358 else 00359 *area= area_tri_v3(v1, v2, v3); 00360 } 00361 } 00362 00363 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node) 00364 { 00365 OccNode *child; 00366 float occ, area, totarea, rad[3]; 00367 int a, b, indirect= tree->doindirect; 00368 00369 occ= 0.0f; 00370 totarea= 0.0f; 00371 if(indirect) zero_v3(rad); 00372 00373 for(b=0; b<TOTCHILD; b++) { 00374 if(node->childflag & (1<<b)) { 00375 a= node->child[b].face; 00376 occ_face(&tree->face[a], 0, 0, &area); 00377 occ += area*tree->occlusion[a]; 00378 if(indirect) madd_v3_v3fl(rad, tree->rad[a], area); 00379 totarea += area; 00380 } 00381 else if(node->child[b].node) { 00382 child= node->child[b].node; 00383 occ_sum_occlusion(tree, child); 00384 00385 occ += child->area*child->occlusion; 00386 if(indirect) madd_v3_v3fl(rad, child->rad, child->area); 00387 totarea += child->area; 00388 } 00389 } 00390 00391 if(totarea != 0.0f) { 00392 occ /= totarea; 00393 if(indirect) mul_v3_fl(rad, 1.0f/totarea); 00394 } 00395 00396 node->occlusion= occ; 00397 if(indirect) copy_v3_v3(node->rad, rad); 00398 } 00399 00400 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max) 00401 { 00402 float len, maxlen= -1.0f; 00403 int a, axis = 0; 00404 00405 INIT_MINMAX(min, max); 00406 00407 for(a=begin; a<end; a++) 00408 DO_MINMAX(tree->co[a], min, max) 00409 00410 for(a=0; a<3; a++) { 00411 len= max[a] - min[a]; 00412 00413 if(len > maxlen) { 00414 maxlen= len; 00415 axis= a; 00416 } 00417 } 00418 00419 return axis; 00420 } 00421 00422 static void occ_node_from_face(OccFace *face, OccNode *node) 00423 { 00424 float n[3]; 00425 00426 occ_face(face, node->co, n, &node->area); 00427 node->dco= 0.0f; 00428 sh_from_disc(n, node->area, node->sh); 00429 } 00430 00431 static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco) 00432 { 00433 int b; 00434 for(b=0; b<TOTCHILD; b++) { 00435 float dist, d[3], nco[3]; 00436 00437 if(node->childflag & (1<<b)) { 00438 occ_face(tree->face+node->child[b].face, nco, NULL, NULL); 00439 } 00440 else if(node->child[b].node) { 00441 OccNode *child= node->child[b].node; 00442 occ_build_dco(tree, child, co, dco); 00443 VECCOPY(nco, child->co); 00444 } 00445 else { 00446 continue; 00447 } 00448 00449 VECSUB(d, nco, co); 00450 dist= INPR(d, d); 00451 if(dist > *dco) 00452 *dco= dist; 00453 } 00454 } 00455 00456 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split) 00457 { 00458 float min[3], max[3], mid; 00459 int axis, a, enda; 00460 00461 /* split in middle of boundbox. this seems faster than median split 00462 * on complex scenes, possibly since it avoids two distant faces to 00463 * be in the same node better? */ 00464 axis= occ_find_bbox_axis(tree, begin, end, min, max); 00465 mid= 0.5f*(min[axis]+max[axis]); 00466 00467 a= begin; 00468 enda= end; 00469 while(a<enda) { 00470 if(tree->co[a][axis] > mid) { 00471 enda--; 00472 SWAP(OccFace, tree->face[a], tree->face[enda]); 00473 SWAP(float, tree->co[a][0], tree->co[enda][0]); 00474 SWAP(float, tree->co[a][1], tree->co[enda][1]); 00475 SWAP(float, tree->co[a][2], tree->co[enda][2]); 00476 } 00477 else 00478 a++; 00479 } 00480 00481 *split= enda; 00482 } 00483 00484 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count) 00485 { 00486 /* split faces into eight groups */ 00487 int b, splitx, splity[2], splitz[4]; 00488 00489 occ_build_split(tree, begin, end, &splitx); 00490 00491 /* force split if none found, to deal with degenerate geometry */ 00492 if(splitx == begin || splitx == end) 00493 splitx= (begin+end)/2; 00494 00495 occ_build_split(tree, begin, splitx, &splity[0]); 00496 occ_build_split(tree, splitx, end, &splity[1]); 00497 00498 occ_build_split(tree, begin, splity[0], &splitz[0]); 00499 occ_build_split(tree, splity[0], splitx, &splitz[1]); 00500 occ_build_split(tree, splitx, splity[1], &splitz[2]); 00501 occ_build_split(tree, splity[1], end, &splitz[3]); 00502 00503 offset[0]= begin; 00504 offset[1]= splitz[0]; 00505 offset[2]= splity[0]; 00506 offset[3]= splitz[1]; 00507 offset[4]= splitx; 00508 offset[5]= splitz[2]; 00509 offset[6]= splity[1]; 00510 offset[7]= splitz[3]; 00511 00512 for(b=0; b<7; b++) 00513 count[b]= offset[b+1] - offset[b]; 00514 count[7]= end - offset[7]; 00515 } 00516 00517 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth); 00518 00519 static void *exec_occ_build(void *data) 00520 { 00521 OcclusionBuildThread *othread= (OcclusionBuildThread*)data; 00522 00523 occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth); 00524 00525 return 0; 00526 } 00527 00528 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth) 00529 { 00530 ListBase threads; 00531 OcclusionBuildThread othreads[BLENDER_MAX_THREADS]; 00532 OccNode *child, tmpnode; 00533 /* OccFace *face; */ 00534 int a, b, totthread=0, offset[TOTCHILD], count[TOTCHILD]; 00535 00536 /* add a new node */ 00537 node->occlusion= 1.0f; 00538 00539 /* leaf node with only children */ 00540 if(end - begin <= TOTCHILD) { 00541 for(a=begin, b=0; a<end; a++, b++) { 00542 /* face= &tree->face[a]; */ 00543 node->child[b].face= a; 00544 node->childflag |= (1<<b); 00545 } 00546 } 00547 else { 00548 /* order faces */ 00549 occ_build_8_split(tree, begin, end, offset, count); 00550 00551 if(depth == 1 && tree->dothreadedbuild) 00552 BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread); 00553 00554 for(b=0; b<TOTCHILD; b++) { 00555 if(count[b] == 0) { 00556 node->child[b].node= NULL; 00557 } 00558 else if(count[b] == 1) { 00559 /* face= &tree->face[offset[b]]; */ 00560 node->child[b].face= offset[b]; 00561 node->childflag |= (1<<b); 00562 } 00563 else { 00564 if(tree->dothreadedbuild) 00565 BLI_lock_thread(LOCK_CUSTOM1); 00566 00567 child= BLI_memarena_alloc(tree->arena, sizeof(OccNode)); 00568 node->child[b].node= child; 00569 00570 /* keep track of maximum depth for stack */ 00571 if(depth+1 > tree->maxdepth) 00572 tree->maxdepth= depth+1; 00573 00574 if(tree->dothreadedbuild) 00575 BLI_unlock_thread(LOCK_CUSTOM1); 00576 00577 if(depth == 1 && tree->dothreadedbuild) { 00578 othreads[totthread].tree= tree; 00579 othreads[totthread].node= child; 00580 othreads[totthread].begin= offset[b]; 00581 othreads[totthread].end= offset[b]+count[b]; 00582 othreads[totthread].depth= depth+1; 00583 BLI_insert_thread(&threads, &othreads[totthread]); 00584 totthread++; 00585 } 00586 else 00587 occ_build_recursive(tree, child, offset[b], offset[b]+count[b], depth+1); 00588 } 00589 } 00590 00591 if(depth == 1 && tree->dothreadedbuild) 00592 BLI_end_threads(&threads); 00593 } 00594 00595 /* combine area, position and sh */ 00596 for(b=0; b<TOTCHILD; b++) { 00597 if(node->childflag & (1<<b)) { 00598 child= &tmpnode; 00599 occ_node_from_face(tree->face+node->child[b].face, &tmpnode); 00600 } 00601 else { 00602 child= node->child[b].node; 00603 } 00604 00605 if(child) { 00606 node->area += child->area; 00607 sh_add(node->sh, node->sh, child->sh); 00608 VECADDFAC(node->co, node->co, child->co, child->area); 00609 } 00610 } 00611 00612 if(node->area != 0.0f) 00613 mul_v3_fl(node->co, 1.0f/node->area); 00614 00615 /* compute maximum distance from center */ 00616 node->dco= 0.0f; 00617 if(node->area > 0.0f) 00618 occ_build_dco(tree, node, node->co, &node->dco); 00619 } 00620 00621 static void occ_build_sh_normalize(OccNode *node) 00622 { 00623 /* normalize spherical harmonics to not include area, so 00624 * we can clamp the dot product and then mutliply by area */ 00625 int b; 00626 00627 if(node->area != 0.0f) 00628 sh_mul(node->sh, 1.0f/node->area); 00629 00630 for(b=0; b<TOTCHILD; b++) { 00631 if(node->childflag & (1<<b)); 00632 else if(node->child[b].node) 00633 occ_build_sh_normalize(node->child[b].node); 00634 } 00635 } 00636 00637 static OcclusionTree *occ_tree_build(Render *re) 00638 { 00639 OcclusionTree *tree; 00640 ObjectInstanceRen *obi; 00641 ObjectRen *obr; 00642 Material *ma; 00643 VlakRen *vlr= NULL; 00644 int a, b, c, totface; 00645 00646 /* count */ 00647 totface= 0; 00648 for(obi=re->instancetable.first; obi; obi=obi->next) { 00649 obr= obi->obr; 00650 for(a=0; a<obr->totvlak; a++) { 00651 if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; 00652 else vlr++; 00653 00654 ma= vlr->mat; 00655 00656 if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) 00657 totface++; 00658 } 00659 } 00660 00661 if(totface == 0) 00662 return NULL; 00663 00664 tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree"); 00665 tree->totface= totface; 00666 00667 /* parameters */ 00668 tree->error= get_render_aosss_error(&re->r, re->wrld.ao_approx_error); 00669 tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f; 00670 tree->doindirect= (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0); 00671 00672 /* allocation */ 00673 tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena"); 00674 BLI_memarena_use_calloc(tree->arena); 00675 00676 if(re->wrld.aomode & WO_AOCACHE) 00677 tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache"); 00678 00679 tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace"); 00680 tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo"); 00681 tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion"); 00682 00683 if(tree->doindirect) 00684 tree->rad= MEM_callocN(sizeof(float)*3*totface, "OcclusionRad"); 00685 00686 /* make array of face pointers */ 00687 for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) { 00688 obr= obi->obr; 00689 for(a=0; a<obr->totvlak; a++) { 00690 if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak; 00691 else vlr++; 00692 00693 ma= vlr->mat; 00694 00695 if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) { 00696 tree->face[b].obi= c; 00697 tree->face[b].facenr= a; 00698 tree->occlusion[b]= 1.0f; 00699 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 00700 b++; 00701 } 00702 } 00703 } 00704 00705 /* threads */ 00706 tree->totbuildthread= (re->r.threads > 1 && totface > 10000)? 8: 1; 00707 tree->dothreadedbuild= (tree->totbuildthread > 1); 00708 00709 /* recurse */ 00710 tree->root= BLI_memarena_alloc(tree->arena, sizeof(OccNode)); 00711 tree->maxdepth= 1; 00712 occ_build_recursive(tree, tree->root, 0, totface, 1); 00713 00714 if(tree->doindirect) { 00715 occ_build_shade(re, tree); 00716 occ_sum_occlusion(tree, tree->root); 00717 } 00718 00719 MEM_freeN(tree->co); 00720 tree->co= NULL; 00721 00722 occ_build_sh_normalize(tree->root); 00723 00724 for(a=0; a<BLENDER_MAX_THREADS; a++) 00725 tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack"); 00726 00727 return tree; 00728 } 00729 00730 static void occ_free_tree(OcclusionTree *tree) 00731 { 00732 int a; 00733 00734 if(tree) { 00735 if(tree->arena) BLI_memarena_free(tree->arena); 00736 for(a=0; a<BLENDER_MAX_THREADS; a++) 00737 if(tree->stack[a]) 00738 MEM_freeN(tree->stack[a]); 00739 if(tree->occlusion) MEM_freeN(tree->occlusion); 00740 if(tree->cache) MEM_freeN(tree->cache); 00741 if(tree->face) MEM_freeN(tree->face); 00742 if(tree->rad) MEM_freeN(tree->rad); 00743 MEM_freeN(tree); 00744 } 00745 } 00746 00747 /* ------------------------- Traversal --------------------------- */ 00748 00749 static float occ_solid_angle(OccNode *node, float *v, float d2, float invd2, float *receivenormal) 00750 { 00751 float dotreceive, dotemit; 00752 float ev[3]; 00753 00754 ev[0]= -v[0]*invd2; 00755 ev[1]= -v[1]*invd2; 00756 ev[2]= -v[2]*invd2; 00757 dotemit= sh_eval(node->sh, ev); 00758 dotreceive= INPR(receivenormal, v)*invd2; 00759 00760 CLAMP(dotemit, 0.0f, 1.0f); 00761 CLAMP(dotreceive, 0.0f, 1.0f); 00762 00763 return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI; 00764 } 00765 00766 static void VecAddDir(float *result, float *v1, float *v2, float fac) 00767 { 00768 result[0]= v1[0] + fac*(v2[0] - v1[0]); 00769 result[1]= v1[1] + fac*(v2[1] - v1[1]); 00770 result[2]= v1[2] + fac*(v2[2] - v1[2]); 00771 } 00772 00773 static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3) 00774 { 00775 static const float epsilon = 1e-6f; 00776 float c, sd[3]; 00777 00778 c= INPR(n, p); 00779 00780 /* signed distances from the vertices to the plane. */ 00781 sd[0]= INPR(n, v0) - c; 00782 sd[1]= INPR(n, v1) - c; 00783 sd[2]= INPR(n, v2) - c; 00784 00785 if(fabsf(sd[0]) < epsilon) sd[0] = 0.0f; 00786 if(fabsf(sd[1]) < epsilon) sd[1] = 0.0f; 00787 if(fabsf(sd[2]) < epsilon) sd[2] = 0.0f; 00788 00789 if(sd[0] > 0) { 00790 if(sd[1] > 0) { 00791 if(sd[2] > 0) { 00792 // +++ 00793 VECCOPY(q0, v0); 00794 VECCOPY(q1, v1); 00795 VECCOPY(q2, v2); 00796 VECCOPY(q3, q2); 00797 } 00798 else if(sd[2] < 0) { 00799 // ++- 00800 VECCOPY(q0, v0); 00801 VECCOPY(q1, v1); 00802 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); 00803 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); 00804 } 00805 else { 00806 // ++0 00807 VECCOPY(q0, v0); 00808 VECCOPY(q1, v1); 00809 VECCOPY(q2, v2); 00810 VECCOPY(q3, q2); 00811 } 00812 } 00813 else if(sd[1] < 0) { 00814 if(sd[2] > 0) { 00815 // +-+ 00816 VECCOPY(q0, v0); 00817 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); 00818 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); 00819 VECCOPY(q3, v2); 00820 } 00821 else if(sd[2] < 0) { 00822 // +-- 00823 VECCOPY(q0, v0); 00824 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); 00825 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); 00826 VECCOPY(q3, q2); 00827 } 00828 else { 00829 // +-0 00830 VECCOPY(q0, v0); 00831 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1]))); 00832 VECCOPY(q2, v2); 00833 VECCOPY(q3, q2); 00834 } 00835 } 00836 else { 00837 if(sd[2] > 0) { 00838 // +0+ 00839 VECCOPY(q0, v0); 00840 VECCOPY(q1, v1); 00841 VECCOPY(q2, v2); 00842 VECCOPY(q3, q2); 00843 } 00844 else if(sd[2] < 0) { 00845 // +0- 00846 VECCOPY(q0, v0); 00847 VECCOPY(q1, v1); 00848 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2]))); 00849 VECCOPY(q3, q2); 00850 } 00851 else { 00852 // +00 00853 VECCOPY(q0, v0); 00854 VECCOPY(q1, v1); 00855 VECCOPY(q2, v2); 00856 VECCOPY(q3, q2); 00857 } 00858 } 00859 } 00860 else if(sd[0] < 0) { 00861 if(sd[1] > 0) { 00862 if(sd[2] > 0) { 00863 // -++ 00864 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); 00865 VECCOPY(q1, v1); 00866 VECCOPY(q2, v2); 00867 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2]))); 00868 } 00869 else if(sd[2] < 0) { 00870 // -+- 00871 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); 00872 VECCOPY(q1, v1); 00873 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); 00874 VECCOPY(q3, q2); 00875 } 00876 else { 00877 // -+0 00878 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1]))); 00879 VECCOPY(q1, v1); 00880 VECCOPY(q2, v2); 00881 VECCOPY(q3, q2); 00882 } 00883 } 00884 else if(sd[1] < 0) { 00885 if(sd[2] > 0) { 00886 // --+ 00887 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); 00888 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); 00889 VECCOPY(q2, v2); 00890 VECCOPY(q3, q2); 00891 } 00892 else if(sd[2] < 0) { 00893 // --- 00894 return 0; 00895 } 00896 else { 00897 // --0 00898 return 0; 00899 } 00900 } 00901 else { 00902 if(sd[2] > 0) { 00903 // -0+ 00904 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2]))); 00905 VECCOPY(q1, v1); 00906 VECCOPY(q2, v2); 00907 VECCOPY(q3, q2); 00908 } 00909 else if(sd[2] < 0) { 00910 // -0- 00911 return 0; 00912 } 00913 else { 00914 // -00 00915 return 0; 00916 } 00917 } 00918 } 00919 else { 00920 if(sd[1] > 0) { 00921 if(sd[2] > 0) { 00922 // 0++ 00923 VECCOPY(q0, v0); 00924 VECCOPY(q1, v1); 00925 VECCOPY(q2, v2); 00926 VECCOPY(q3, q2); 00927 } 00928 else if(sd[2] < 0) { 00929 // 0+- 00930 VECCOPY(q0, v0); 00931 VECCOPY(q1, v1); 00932 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2]))); 00933 VECCOPY(q3, q2); 00934 } 00935 else { 00936 // 0+0 00937 VECCOPY(q0, v0); 00938 VECCOPY(q1, v1); 00939 VECCOPY(q2, v2); 00940 VECCOPY(q3, q2); 00941 } 00942 } 00943 else if(sd[1] < 0) { 00944 if(sd[2] > 0) { 00945 // 0-+ 00946 VECCOPY(q0, v0); 00947 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2]))); 00948 VECCOPY(q2, v2); 00949 VECCOPY(q3, q2); 00950 } 00951 else if(sd[2] < 0) { 00952 // 0-- 00953 return 0; 00954 } 00955 else { 00956 // 0-0 00957 return 0; 00958 } 00959 } 00960 else { 00961 if(sd[2] > 0) { 00962 // 00+ 00963 VECCOPY(q0, v0); 00964 VECCOPY(q1, v1); 00965 VECCOPY(q2, v2); 00966 VECCOPY(q3, q2); 00967 } 00968 else if(sd[2] < 0) { 00969 // 00- 00970 return 0; 00971 } 00972 else { 00973 // 000 00974 return 0; 00975 } 00976 } 00977 } 00978 00979 return 1; 00980 } 00981 00982 /* altivec optimization, this works, but is unused */ 00983 00984 #if 0 00985 #include <Accelerate/Accelerate.h> 00986 00987 typedef union { 00988 vFloat v; 00989 float f[4]; 00990 } vFloatResult; 00991 00992 static vFloat vec_splat_float(float val) 00993 { 00994 return (vFloat){val, val, val, val}; 00995 } 00996 00997 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) 00998 { 00999 vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle; 01000 vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3}; 01001 vFloatResult vresult; 01002 float result; 01003 01004 /* compute r* */ 01005 vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]); 01006 vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]); 01007 vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]); 01008 01009 /* normalize r* */ 01010 rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f)); 01011 vrx = vrx*rlen; 01012 vry = vry*rlen; 01013 vrz = vrz*rlen; 01014 01015 /* rotate r* for cross and dot */ 01016 vsrx= vec_perm(vrx, vrx, rotate); 01017 vsry= vec_perm(vry, vry, rotate); 01018 vsrz= vec_perm(vrz, vrz, rotate); 01019 01020 /* cross product */ 01021 gx = vsry*vrz - vsrz*vry; 01022 gy = vsrz*vrx - vsrx*vrz; 01023 gz = vsrx*vry - vsry*vrx; 01024 01025 /* normalize */ 01026 rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f)); 01027 gx = gx*rlen; 01028 gy = gy*rlen; 01029 gz = gz*rlen; 01030 01031 /* angle */ 01032 vcos = vrx*vsrx + vry*vsry + vrz*vsrz; 01033 vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f)); 01034 vangle= vacosf(vcos); 01035 01036 /* dot */ 01037 vresult.v = (vec_splat_float(n[0])*gx + 01038 vec_splat_float(n[1])*gy + 01039 vec_splat_float(n[2])*gz)*vangle; 01040 01041 result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI); 01042 result= MAX2(result, 0.0f); 01043 01044 return result; 01045 } 01046 01047 #endif 01048 01049 /* SSE optimization, acos code doesn't work */ 01050 01051 #if 0 01052 01053 #include <xmmintrin.h> 01054 01055 static __m128 sse_approx_acos(__m128 x) 01056 { 01057 /* needs a better approximation than taylor expansion of acos, since that 01058 * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work 01059 * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */ 01060 01061 return _mm_set_ps1(1.0f); 01062 } 01063 01064 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) 01065 { 01066 float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; 01067 float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; 01068 float fresult[4] __attribute__((aligned(16))); 01069 __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult; 01070 01071 /* compute r */ 01072 qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]); 01073 qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]); 01074 qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]); 01075 01076 rx = qx - _mm_set_ps1(p[0]); 01077 ry = qy - _mm_set_ps1(p[1]); 01078 rz = qz - _mm_set_ps1(p[2]); 01079 01080 /* normalize r */ 01081 rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f)); 01082 rx = rx*rlen; 01083 ry = ry*rlen; 01084 rz = rz*rlen; 01085 01086 /* cross product */ 01087 srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1)); 01088 sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1)); 01089 srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1)); 01090 01091 gx = sry*rz - srz*ry; 01092 gy = srz*rx - srx*rz; 01093 gz = srx*ry - sry*rx; 01094 01095 /* normalize g */ 01096 glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f)); 01097 gx = gx*glen; 01098 gy = gy*glen; 01099 gz = gz*glen; 01100 01101 /* compute angle */ 01102 rcos = rx*srx + ry*sry + rz*srz; 01103 rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f)); 01104 01105 angle = sse_approx_cos(rcos); 01106 aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle; 01107 01108 /* sum together */ 01109 result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI); 01110 result= MAX2(result, 0.0f); 01111 01112 return result; 01113 } 01114 01115 #endif 01116 01117 static void normalizef(float *n) 01118 { 01119 float d; 01120 01121 d= INPR(n, n); 01122 01123 if(d > 1.0e-35F) { 01124 d= 1.0f/sqrtf(d); 01125 01126 n[0] *= d; 01127 n[1] *= d; 01128 n[2] *= d; 01129 } 01130 } 01131 01132 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3) 01133 { 01134 float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3]; 01135 float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result; 01136 01137 VECSUB(r0, q0, p); 01138 VECSUB(r1, q1, p); 01139 VECSUB(r2, q2, p); 01140 VECSUB(r3, q3, p); 01141 01142 normalizef(r0); 01143 normalizef(r1); 01144 normalizef(r2); 01145 normalizef(r3); 01146 01147 cross_v3_v3v3(g0, r1, r0); normalizef(g0); 01148 cross_v3_v3v3(g1, r2, r1); normalizef(g1); 01149 cross_v3_v3v3(g2, r3, r2); normalizef(g2); 01150 cross_v3_v3v3(g3, r0, r3); normalizef(g3); 01151 01152 a1= saacosf(INPR(r0, r1)); 01153 a2= saacosf(INPR(r1, r2)); 01154 a3= saacosf(INPR(r2, r3)); 01155 a4= saacosf(INPR(r3, r0)); 01156 01157 dot1= INPR(n, g0); 01158 dot2= INPR(n, g1); 01159 dot3= INPR(n, g2); 01160 dot4= INPR(n, g3); 01161 01162 result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI; 01163 result= MAX2(result, 0.0f); 01164 01165 return result; 01166 } 01167 01168 static float occ_form_factor(OccFace *face, float *p, float *n) 01169 { 01170 ObjectInstanceRen *obi; 01171 VlakRen *vlr; 01172 float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f; 01173 01174 obi= &R.objectinstance[face->obi]; 01175 vlr= RE_findOrAddVlak(obi->obr, face->facenr); 01176 01177 VECCOPY(v1, vlr->v1->co); 01178 VECCOPY(v2, vlr->v2->co); 01179 VECCOPY(v3, vlr->v3->co); 01180 01181 if(obi->flag & R_TRANSFORMED) { 01182 mul_m4_v3(obi->mat, v1); 01183 mul_m4_v3(obi->mat, v2); 01184 mul_m4_v3(obi->mat, v3); 01185 } 01186 01187 if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3)) 01188 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3); 01189 01190 if(vlr->v4) { 01191 VECCOPY(v4, vlr->v4->co); 01192 if(obi->flag & R_TRANSFORMED) 01193 mul_m4_v3(obi->mat, v4); 01194 01195 if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3)) 01196 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3); 01197 } 01198 01199 return contrib; 01200 } 01201 01202 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float rad[3], float bentn[3]) 01203 { 01204 OccNode *node, **stack; 01205 OccFace *face; 01206 float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2; 01207 float distfac, fac, error, d2, weight, emitarea; 01208 int b, f, totstack; 01209 01210 /* init variables */ 01211 VECCOPY(p, pp); 01212 VECCOPY(n, pn); 01213 VECADDFAC(p, p, n, 1e-4f); 01214 01215 if(bentn) 01216 copy_v3_v3(bentn, n); 01217 01218 error= tree->error; 01219 distfac= tree->distfac; 01220 01221 resultocc= 0.0f; 01222 zero_v3(resultrad); 01223 01224 /* init stack */ 01225 stack= tree->stack[thread]; 01226 stack[0]= tree->root; 01227 totstack= 1; 01228 01229 while(totstack) { 01230 /* pop point off the stack */ 01231 node= stack[--totstack]; 01232 01233 VECSUB(v, node->co, p); 01234 d2= INPR(v, v) + 1e-16f; 01235 emitarea= MAX2(node->area, node->dco); 01236 01237 if(d2*error > emitarea) { 01238 if(distfac != 0.0f) { 01239 fac= 1.0f/(1.0f + distfac*d2); 01240 if(fac < 0.01f) 01241 continue; 01242 } 01243 else 01244 fac= 1.0f; 01245 01246 /* accumulate occlusion from spherical harmonics */ 01247 invd2 = 1.0f/sqrtf(d2); 01248 weight= occ_solid_angle(node, v, d2, invd2, n); 01249 01250 if(rad) 01251 madd_v3_v3fl(resultrad, node->rad, weight*fac); 01252 01253 weight *= node->occlusion; 01254 01255 if(bentn) { 01256 bentn[0] -= weight*invd2*v[0]; 01257 bentn[1] -= weight*invd2*v[1]; 01258 bentn[2] -= weight*invd2*v[2]; 01259 } 01260 01261 resultocc += weight*fac; 01262 } 01263 else { 01264 /* traverse into children */ 01265 for(b=0; b<TOTCHILD; b++) { 01266 if(node->childflag & (1<<b)) { 01267 f= node->child[b].face; 01268 face= &tree->face[f]; 01269 01270 /* accumulate occlusion with face form factor */ 01271 if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) { 01272 if(bentn || distfac != 0.0f) { 01273 occ_face(face, co, NULL, NULL); 01274 VECSUB(v, co, p); 01275 d2= INPR(v, v) + 1e-16f; 01276 01277 fac= (distfac == 0.0f)? 1.0f: 1.0f/(1.0f + distfac*d2); 01278 if(fac < 0.01f) 01279 continue; 01280 } 01281 else 01282 fac= 1.0f; 01283 01284 weight= occ_form_factor(face, p, n); 01285 01286 if(rad) 01287 madd_v3_v3fl(resultrad, tree->rad[f], weight*fac); 01288 01289 weight *= tree->occlusion[f]; 01290 01291 if(bentn) { 01292 invd2= 1.0f/sqrtf(d2); 01293 bentn[0] -= weight*invd2*v[0]; 01294 bentn[1] -= weight*invd2*v[1]; 01295 bentn[2] -= weight*invd2*v[2]; 01296 } 01297 01298 resultocc += weight*fac; 01299 } 01300 } 01301 else if(node->child[b].node) { 01302 /* push child on the stack */ 01303 stack[totstack++]= node->child[b].node; 01304 } 01305 } 01306 } 01307 } 01308 01309 if(occ) *occ= resultocc; 01310 if(rad) copy_v3_v3(rad, resultrad); 01311 /*if(rad && exclude) { 01312 int a; 01313 for(a=0; a<tree->totface; a++) 01314 if((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr)) 01315 copy_v3_v3(rad, tree->rad[a]); 01316 }*/ 01317 if(bentn) normalize_v3(bentn); 01318 } 01319 01320 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce) 01321 { 01322 float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ; 01323 int bounce, i; 01324 01325 rad= MEM_callocN(sizeof(float)*3*tree->totface, "OcclusionBounceRad"); 01326 sum= MEM_dupallocN(tree->rad); 01327 01328 for(bounce=1; bounce<totbounce; bounce++) { 01329 for(i=0; i<tree->totface; i++) { 01330 occ_face(&tree->face[i], co, n, NULL); 01331 madd_v3_v3fl(co, n, 1e-8f); 01332 01333 occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL); 01334 rad[i][0]= MAX2(rad[i][0], 0.0f); 01335 rad[i][1]= MAX2(rad[i][1], 0.0f); 01336 rad[i][2]= MAX2(rad[i][2], 0.0f); 01337 add_v3_v3(sum[i], rad[i]); 01338 01339 if(re->test_break(re->tbh)) 01340 break; 01341 } 01342 01343 if(re->test_break(re->tbh)) 01344 break; 01345 01346 tmp= tree->rad; 01347 tree->rad= rad; 01348 rad= tmp; 01349 01350 occ_sum_occlusion(tree, tree->root); 01351 } 01352 01353 MEM_freeN(rad); 01354 MEM_freeN(tree->rad); 01355 tree->rad= sum; 01356 01357 if(!re->test_break(re->tbh)) 01358 occ_sum_occlusion(tree, tree->root); 01359 } 01360 01361 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass) 01362 { 01363 float *occ, co[3], n[3]; 01364 int pass, i; 01365 01366 occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc"); 01367 01368 for(pass=0; pass<totpass; pass++) { 01369 for(i=0; i<tree->totface; i++) { 01370 occ_face(&tree->face[i], co, n, NULL); 01371 negate_v3(n); 01372 VECADDFAC(co, co, n, 1e-8f); 01373 01374 occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL); 01375 if(re->test_break(re->tbh)) 01376 break; 01377 } 01378 01379 if(re->test_break(re->tbh)) 01380 break; 01381 01382 for(i=0; i<tree->totface; i++) { 01383 tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f); 01384 if(tree->occlusion[i] < 0.0f) 01385 tree->occlusion[i]= 0.0f; 01386 } 01387 01388 occ_sum_occlusion(tree, tree->root); 01389 } 01390 01391 MEM_freeN(occ); 01392 } 01393 01394 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *ao, float *env, float *indirect) 01395 { 01396 float nn[3], bn[3], fac, occ, occlusion, correction, rad[3]; 01397 int envcolor; 01398 01399 envcolor= re->wrld.aocolor; 01400 if(onlyshadow) 01401 envcolor= WO_AOPLAIN; 01402 01403 negate_v3_v3(nn, n); 01404 01405 occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect)? rad: NULL, (env && envcolor)? bn: NULL); 01406 01407 correction= re->wrld.ao_approx_correction; 01408 01409 occlusion= (1.0f-correction)*(1.0f-occ); 01410 CLAMP(occlusion, 0.0f, 1.0f); 01411 if(correction != 0.0f) 01412 occlusion += correction*exp(-occ); 01413 01414 if(env) { 01415 /* sky shading using bent normal */ 01416 if(ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) { 01417 fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]); 01418 env[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr; 01419 env[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng; 01420 env[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb; 01421 01422 mul_v3_fl(env, occlusion); 01423 } 01424 else { 01425 env[0]= occlusion; 01426 env[1]= occlusion; 01427 env[2]= occlusion; 01428 } 01429 #if 0 01430 else { /* WO_AOSKYTEX */ 01431 float dxyview[3]; 01432 bn[0]= -bn[0]; 01433 bn[1]= -bn[1]; 01434 bn[2]= -bn[2]; 01435 dxyview[0]= 1.0f; 01436 dxyview[1]= 1.0f; 01437 dxyview[2]= 0.0f; 01438 shadeSkyView(ao, co, bn, dxyview); 01439 } 01440 #endif 01441 } 01442 01443 if(ao) { 01444 ao[0]= occlusion; 01445 ao[1]= occlusion; 01446 ao[2]= occlusion; 01447 } 01448 01449 if(tree->doindirect) copy_v3_v3(indirect, rad); 01450 else zero_v3(indirect); 01451 } 01452 01453 /* ---------------------------- Caching ------------------------------- */ 01454 01455 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y) 01456 { 01457 x -= cache->x; 01458 y -= cache->y; 01459 01460 x /= cache->step; 01461 y /= cache->step; 01462 x *= cache->step; 01463 y *= cache->step; 01464 01465 if(x < 0 || x >= cache->w || y < 0 || y >= cache->h) 01466 return NULL; 01467 else 01468 return &cache->sample[y*cache->w + x]; 01469 } 01470 01471 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect) 01472 { 01473 OcclusionCache *cache; 01474 OcclusionCacheSample *samples[4], *sample; 01475 float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo; 01476 float d[3], dist2; 01477 int i, x1, y1, x2, y2; 01478 01479 if(!tree->cache) 01480 return 0; 01481 01482 /* first try to find a sample in the same pixel */ 01483 cache= &tree->cache[thread]; 01484 01485 if(cache->sample && cache->step) { 01486 sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)]; 01487 if(sample->filled) { 01488 VECSUB(d, sample->co, co); 01489 dist2= INPR(d, d); 01490 if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) { 01491 VECCOPY(ao, sample->ao); 01492 VECCOPY(env, sample->env); 01493 VECCOPY(indirect, sample->indirect); 01494 return 1; 01495 } 01496 } 01497 } 01498 else 01499 return 0; 01500 01501 /* try to interpolate between 4 neighbouring pixels */ 01502 samples[0]= find_occ_sample(cache, x, y); 01503 samples[1]= find_occ_sample(cache, x+cache->step, y); 01504 samples[2]= find_occ_sample(cache, x, y+cache->step); 01505 samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step); 01506 01507 for(i=0; i<4; i++) 01508 if(!samples[i] || !samples[i]->filled) 01509 return 0; 01510 01511 /* require intensities not being too different */ 01512 mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); 01513 maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity); 01514 01515 if(maxo - mino > 0.05f) 01516 return 0; 01517 01518 /* compute weighted interpolation between samples */ 01519 zero_v3(ao); 01520 zero_v3(env); 01521 zero_v3(indirect); 01522 totw= 0.0f; 01523 01524 x1= samples[0]->x; 01525 y1= samples[0]->y; 01526 x2= samples[3]->x; 01527 y2= samples[3]->y; 01528 01529 tx= (float)(x2 - x)/(float)(x2 - x1); 01530 ty= (float)(y2 - y)/(float)(y2 - y1); 01531 01532 wb[3]= (1.0f-tx)*(1.0f-ty); 01533 wb[2]= (tx)*(1.0f-ty); 01534 wb[1]= (1.0f-tx)*(ty); 01535 wb[0]= tx*ty; 01536 01537 for(i=0; i<4; i++) { 01538 VECSUB(d, samples[i]->co, co); 01539 //dist2= INPR(d, d); 01540 01541 wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2)); 01542 wn[i]= pow(INPR(samples[i]->n, n), 32.0f); 01543 01544 w= wb[i]*wn[i]*wz[i]; 01545 01546 totw += w; 01547 madd_v3_v3fl(ao, samples[i]->ao, w); 01548 madd_v3_v3fl(env, samples[i]->env, w); 01549 madd_v3_v3fl(indirect, samples[i]->indirect, w); 01550 } 01551 01552 if(totw >= 0.9f) { 01553 totw= 1.0f/totw; 01554 mul_v3_fl(ao, totw); 01555 mul_v3_fl(env, totw); 01556 mul_v3_fl(indirect, totw); 01557 return 1; 01558 } 01559 01560 return 0; 01561 } 01562 01563 static void sample_occ_surface(ShadeInput *shi) 01564 { 01565 StrandRen *strand= shi->strand; 01566 StrandSurface *mesh= strand->buffer->surface; 01567 int *face, *index = RE_strandren_get_face(shi->obr, strand, 0); 01568 float w[4], *co1, *co2, *co3, *co4; 01569 01570 if(mesh && mesh->face && mesh->co && mesh->ao && index) { 01571 face= mesh->face[*index]; 01572 01573 co1= mesh->co[face[0]]; 01574 co2= mesh->co[face[1]]; 01575 co3= mesh->co[face[2]]; 01576 co4= (face[3])? mesh->co[face[3]]: NULL; 01577 01578 interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co); 01579 01580 zero_v3(shi->ao); 01581 zero_v3(shi->env); 01582 zero_v3(shi->indirect); 01583 01584 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]); 01585 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]); 01586 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]); 01587 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]); 01588 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]); 01589 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]); 01590 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]); 01591 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]); 01592 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]); 01593 if(face[3]) { 01594 madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]); 01595 madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]); 01596 madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]); 01597 } 01598 } 01599 else { 01600 shi->ao[0]= 1.0f; 01601 shi->ao[1]= 1.0f; 01602 shi->ao[2]= 1.0f; 01603 zero_v3(shi->env); 01604 zero_v3(shi->indirect); 01605 } 01606 } 01607 01608 /* ------------------------- External Functions --------------------------- */ 01609 01610 static void *exec_strandsurface_sample(void *data) 01611 { 01612 OcclusionThread *othread= (OcclusionThread*)data; 01613 Render *re= othread->re; 01614 StrandSurface *mesh= othread->mesh; 01615 float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4; 01616 int a, *face; 01617 01618 for(a=othread->begin; a<othread->end; a++) { 01619 face= mesh->face[a]; 01620 co1= mesh->co[face[0]]; 01621 co2= mesh->co[face[1]]; 01622 co3= mesh->co[face[2]]; 01623 01624 if(face[3]) { 01625 co4= mesh->co[face[3]]; 01626 01627 interp_v3_v3v3(co, co1, co3, 0.5f); 01628 normal_quad_v3( n,co1, co2, co3, co4); 01629 } 01630 else { 01631 cent_tri_v3(co, co1, co2, co3); 01632 normal_tri_v3( n,co1, co2, co3); 01633 } 01634 negate_v3(n); 01635 01636 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect); 01637 VECCOPY(othread->faceao[a], ao); 01638 VECCOPY(othread->faceenv[a], env); 01639 VECCOPY(othread->faceindirect[a], indirect); 01640 } 01641 01642 return 0; 01643 } 01644 01645 void make_occ_tree(Render *re) 01646 { 01647 OcclusionThread othreads[BLENDER_MAX_THREADS]; 01648 OcclusionTree *tree; 01649 StrandSurface *mesh; 01650 ListBase threads; 01651 float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3]; 01652 int a, totface, totthread, *face, *count; 01653 01654 /* ugly, needed for occ_face */ 01655 R= *re; 01656 01657 re->i.infostr= "Occlusion preprocessing"; 01658 re->stats_draw(re->sdh, &re->i); 01659 01660 re->occlusiontree= tree= occ_tree_build(re); 01661 01662 if(tree) { 01663 if(re->wrld.ao_approx_passes > 0) 01664 occ_compute_passes(re, tree, re->wrld.ao_approx_passes); 01665 if(tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT)) 01666 occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces); 01667 01668 for(mesh=re->strandsurface.first; mesh; mesh=mesh->next) { 01669 if(!mesh->face || !mesh->co || !mesh->ao) 01670 continue; 01671 01672 count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount"); 01673 faceao= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceAO"); 01674 faceenv= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceEnv"); 01675 faceindirect= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceIndirect"); 01676 01677 totthread= (mesh->totface > 10000)? re->r.threads: 1; 01678 totface= mesh->totface/totthread; 01679 for(a=0; a<totthread; a++) { 01680 othreads[a].re= re; 01681 othreads[a].faceao= faceao; 01682 othreads[a].faceenv= faceenv; 01683 othreads[a].faceindirect= faceindirect; 01684 othreads[a].thread= a; 01685 othreads[a].mesh= mesh; 01686 othreads[a].begin= a*totface; 01687 othreads[a].end= (a == totthread-1)? mesh->totface: (a+1)*totface; 01688 } 01689 01690 if(totthread == 1) { 01691 exec_strandsurface_sample(&othreads[0]); 01692 } 01693 else { 01694 BLI_init_threads(&threads, exec_strandsurface_sample, totthread); 01695 01696 for(a=0; a<totthread; a++) 01697 BLI_insert_thread(&threads, &othreads[a]); 01698 01699 BLI_end_threads(&threads); 01700 } 01701 01702 for(a=0; a<mesh->totface; a++) { 01703 face= mesh->face[a]; 01704 01705 VECCOPY(ao, faceao[a]); 01706 VECCOPY(env, faceenv[a]); 01707 VECCOPY(indirect, faceindirect[a]); 01708 01709 VECADD(mesh->ao[face[0]], mesh->ao[face[0]], ao); 01710 VECADD(mesh->env[face[0]], mesh->env[face[0]], env); 01711 VECADD(mesh->indirect[face[0]], mesh->indirect[face[0]], indirect); 01712 count[face[0]]++; 01713 VECADD(mesh->ao[face[1]], mesh->ao[face[1]], ao); 01714 VECADD(mesh->env[face[1]], mesh->env[face[1]], env); 01715 VECADD(mesh->indirect[face[1]], mesh->indirect[face[1]], indirect); 01716 count[face[1]]++; 01717 VECADD(mesh->ao[face[2]], mesh->ao[face[2]], ao); 01718 VECADD(mesh->env[face[2]], mesh->env[face[2]], env); 01719 VECADD(mesh->indirect[face[2]], mesh->indirect[face[2]], indirect); 01720 count[face[2]]++; 01721 01722 if(face[3]) { 01723 VECADD(mesh->ao[face[3]], mesh->ao[face[3]], ao); 01724 VECADD(mesh->env[face[3]], mesh->env[face[3]], env); 01725 VECADD(mesh->indirect[face[3]], mesh->indirect[face[3]], indirect); 01726 count[face[3]]++; 01727 } 01728 } 01729 01730 for(a=0; a<mesh->totvert; a++) { 01731 if(count[a]) { 01732 mul_v3_fl(mesh->ao[a], 1.0f/count[a]); 01733 mul_v3_fl(mesh->env[a], 1.0f/count[a]); 01734 mul_v3_fl(mesh->indirect[a], 1.0f/count[a]); 01735 } 01736 } 01737 01738 MEM_freeN(count); 01739 MEM_freeN(faceao); 01740 MEM_freeN(faceenv); 01741 MEM_freeN(faceindirect); 01742 } 01743 } 01744 } 01745 01746 void free_occ(Render *re) 01747 { 01748 if(re->occlusiontree) { 01749 occ_free_tree(re->occlusiontree); 01750 re->occlusiontree = NULL; 01751 } 01752 } 01753 01754 void sample_occ(Render *re, ShadeInput *shi) 01755 { 01756 OcclusionTree *tree= re->occlusiontree; 01757 OcclusionCache *cache; 01758 OcclusionCacheSample *sample; 01759 OccFace exclude; 01760 int onlyshadow; 01761 01762 if(tree) { 01763 if(shi->strand) { 01764 sample_occ_surface(shi); 01765 } 01766 /* try to get result from the cache if possible */ 01767 else if(shi->depth!=0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) { 01768 /* no luck, let's sample the occlusion */ 01769 exclude.obi= shi->obi - re->objectinstance; 01770 exclude.facenr= shi->vlr->index; 01771 onlyshadow= (shi->mat->mode & MA_ONLYSHADOW); 01772 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect); 01773 01774 /* fill result into sample, each time */ 01775 if(tree->cache) { 01776 cache= &tree->cache[shi->thread]; 01777 01778 if(cache->sample && cache->step) { 01779 sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)]; 01780 VECCOPY(sample->co, shi->co); 01781 VECCOPY(sample->n, shi->vno); 01782 VECCOPY(sample->ao, shi->ao); 01783 VECCOPY(sample->env, shi->env); 01784 VECCOPY(sample->indirect, shi->indirect); 01785 sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]); 01786 sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2])); 01787 sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2])); 01788 sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco); 01789 sample->filled= 1; 01790 } 01791 } 01792 } 01793 } 01794 else { 01795 shi->ao[0]= 1.0f; 01796 shi->ao[1]= 1.0f; 01797 shi->ao[2]= 1.0f; 01798 01799 shi->env[0]= 0.0f; 01800 shi->env[1]= 0.0f; 01801 shi->env[2]= 0.0f; 01802 01803 shi->indirect[0]= 0.0f; 01804 shi->indirect[1]= 0.0f; 01805 shi->indirect[2]= 0.0f; 01806 } 01807 } 01808 01809 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp) 01810 { 01811 OcclusionTree *tree= re->occlusiontree; 01812 PixStr ps; 01813 OcclusionCache *cache; 01814 OcclusionCacheSample *sample; 01815 OccFace exclude; 01816 ShadeInput *shi; 01817 intptr_t *rd=NULL; 01818 int *ro=NULL, *rp=NULL, *rz=NULL, onlyshadow; 01819 int x, y, step = CACHE_STEP; 01820 01821 if(!tree->cache) 01822 return; 01823 01824 cache= &tree->cache[pa->thread]; 01825 cache->w= pa->rectx; 01826 cache->h= pa->recty; 01827 cache->x= pa->disprect.xmin; 01828 cache->y= pa->disprect.ymin; 01829 cache->step= step; 01830 cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample"); 01831 sample= cache->sample; 01832 01833 if(re->osa) { 01834 rd= pa->rectdaps; 01835 } 01836 else { 01837 /* fake pixel struct for non-osa */ 01838 ps.next= NULL; 01839 ps.mask= 0xFFFF; 01840 01841 ro= pa->recto; 01842 rp= pa->rectp; 01843 rz= pa->rectz; 01844 } 01845 01846 /* compute a sample at every step pixels */ 01847 for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) { 01848 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) { 01849 if(!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1)) 01850 continue; 01851 if(!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1)) 01852 continue; 01853 01854 if(re->osa) { 01855 if(!*rd) continue; 01856 01857 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y); 01858 } 01859 else { 01860 if(!*rp) continue; 01861 01862 ps.obi= *ro; 01863 ps.facenr= *rp; 01864 ps.z= *rz; 01865 shade_samples_fill_with_ps(ssamp, &ps, x, y); 01866 } 01867 01868 shi= ssamp->shi; 01869 if(shi->vlr) { 01870 onlyshadow= (shi->mat->mode & MA_ONLYSHADOW); 01871 exclude.obi= shi->obi - re->objectinstance; 01872 exclude.facenr= shi->vlr->index; 01873 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect); 01874 01875 VECCOPY(sample->co, shi->co); 01876 VECCOPY(sample->n, shi->vno); 01877 VECCOPY(sample->ao, shi->ao); 01878 VECCOPY(sample->env, shi->env); 01879 VECCOPY(sample->indirect, shi->indirect); 01880 sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]); 01881 sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2])); 01882 sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2])); 01883 sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco); 01884 sample->x= shi->xs; 01885 sample->y= shi->ys; 01886 sample->filled= 1; 01887 } 01888 01889 if(re->test_break(re->tbh)) 01890 break; 01891 } 01892 } 01893 } 01894 01895 void free_occ_samples(Render *re, RenderPart *pa) 01896 { 01897 OcclusionTree *tree= re->occlusiontree; 01898 OcclusionCache *cache; 01899 01900 if(tree->cache) { 01901 cache= &tree->cache[pa->thread]; 01902 01903 if(cache->sample) 01904 MEM_freeN(cache->sample); 01905 01906 cache->w= 0; 01907 cache->h= 0; 01908 cache->step= 0; 01909 } 01910 } 01911