Blender  V2.59
occlusion.c
Go to the documentation of this file.
00001 /* 
00002  * $Id: occlusion.c 35943 2011-04-01 16:01:29Z blendix $
00003  *
00004  * ***** BEGIN GPL LICENSE BLOCK *****
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software Foundation,
00018  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00019  *
00020  * The Original Code is Copyright (C) 2008 Blender Foundation.
00021  * All rights reserved.
00022  *
00023  * The Original Code is: all of this file.
00024  *
00025  * Contributor(s): Brecht Van Lommel.
00026  *
00027  * ***** END GPL LICENSE BLOCK *****
00028  */
00029 
00035 #include <math.h>
00036 #include <stdio.h>
00037 #include <stdlib.h>
00038 #include <string.h>
00039 
00040 #include "MEM_guardedalloc.h"
00041 
00042 #include "DNA_material_types.h"
00043 
00044 #include "BLI_math.h"
00045 #include "BLI_blenlib.h"
00046 #include "BLI_memarena.h"
00047 #include "BLI_threads.h"
00048 #include "BLI_utildefines.h"
00049 
00050 #include "BKE_global.h"
00051 #include "BKE_scene.h"
00052 
00053 
00054 #include "RE_shader_ext.h"
00055 
00056 /* local includes */
00057 #include "occlusion.h"
00058 #include "render_types.h"
00059 #include "rendercore.h"
00060 #include "renderdatabase.h"
00061 #include "pixelshading.h"
00062 #include "shading.h"
00063 #include "zbuf.h"
00064 
00065 /* ------------------------- Declarations --------------------------- */
00066 
00067 #define INVALID_INDEX ((int)(~0))
00068 #define INVPI 0.31830988618379069f
00069 #define TOTCHILD 8
00070 #define CACHE_STEP 3
00071 
00072 typedef struct OcclusionCacheSample {
00073         float co[3], n[3], ao[3], env[3], indirect[3], intensity, dist2;
00074         int x, y, filled;
00075 } OcclusionCacheSample;
00076 
00077 typedef struct OcclusionCache {
00078         OcclusionCacheSample *sample;
00079         int x, y, w, h, step;
00080 } OcclusionCache;
00081 
00082 typedef struct OccFace {
00083         int obi;
00084         int facenr;
00085 } OccFace;
00086 
00087 typedef struct OccNode {
00088         float co[3], area;
00089         float sh[9], dco;
00090         float occlusion, rad[3];
00091         int childflag;
00092         union {
00093                 //OccFace face;
00094                 int face;
00095                 struct OccNode *node;
00096         } child[TOTCHILD];
00097 } OccNode;
00098 
00099 typedef struct OcclusionTree {
00100         MemArena *arena;
00101 
00102         float (*co)[3];         /* temporary during build */
00103 
00104         OccFace *face;          /* instance and face indices */
00105         float *occlusion;       /* occlusion for faces */
00106         float (*rad)[3];        /* radiance for faces */
00107         
00108         OccNode *root;
00109 
00110         OccNode **stack[BLENDER_MAX_THREADS];
00111         int maxdepth;
00112 
00113         int totface;
00114 
00115         float error;
00116         float distfac;
00117 
00118         int dothreadedbuild;
00119         int totbuildthread;
00120         int doindirect;
00121 
00122         OcclusionCache *cache;
00123 } OcclusionTree;
00124 
00125 typedef struct OcclusionThread {
00126         Render *re;
00127         StrandSurface *mesh;
00128         float (*faceao)[3];
00129         float (*faceenv)[3];
00130         float (*faceindirect)[3];
00131         int begin, end;
00132         int thread;
00133 } OcclusionThread;
00134 
00135 typedef struct OcclusionBuildThread {
00136         OcclusionTree *tree;
00137         int begin, end, depth;
00138         OccNode *node;
00139 } OcclusionBuildThread;
00140 
00141 /* ------------------------- Shading --------------------------- */
00142 
00143 extern Render R; // meh
00144 
00145 static void occ_shade(ShadeSample *ssamp, ObjectInstanceRen *obi, VlakRen *vlr, float *rad)
00146 {
00147         ShadeInput *shi= ssamp->shi;
00148         ShadeResult *shr= ssamp->shr;
00149         float l, u, v, *v1, *v2, *v3;
00150         
00151         /* init */
00152         if(vlr->v4) {
00153                 shi->u= u= 0.5f;
00154                 shi->v= v= 0.5f;
00155         }
00156         else {
00157                 shi->u= u= 1.0f/3.0f;
00158                 shi->v= v= 1.0f/3.0f;
00159         }
00160 
00161         /* setup render coordinates */
00162         v1= vlr->v1->co;
00163         v2= vlr->v2->co;
00164         v3= vlr->v3->co;
00165         
00166         /* renderco */
00167         l= 1.0f-u-v;
00168         
00169         shi->co[0]= l*v3[0]+u*v1[0]+v*v2[0];
00170         shi->co[1]= l*v3[1]+u*v1[1]+v*v2[1];
00171         shi->co[2]= l*v3[2]+u*v1[2]+v*v2[2];
00172 
00173         shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
00174 
00175         /* set up view vector */
00176         VECCOPY(shi->view, shi->co);
00177         normalize_v3(shi->view);
00178         
00179         /* cache for shadow */
00180         shi->samplenr++;
00181         
00182         shi->xs= 0; // TODO
00183         shi->ys= 0;
00184         
00185         shade_input_set_normals(shi);
00186 
00187         /* no normal flip */
00188         if(shi->flippednor)
00189                 shade_input_flip_normals(shi);
00190 
00191         madd_v3_v3fl(shi->co, shi->vn, 0.0001f); /* ugly.. */
00192 
00193         /* not a pretty solution, but fixes common cases */
00194         if(shi->obr->ob && shi->obr->ob->transflag & OB_NEG_SCALE) {
00195                 negate_v3(shi->vn);
00196                 negate_v3(shi->vno);
00197                 negate_v3(shi->nmapnorm);
00198         }
00199 
00200         /* init material vars */
00201         // note, keep this synced with render_types.h
00202         memcpy(&shi->r, &shi->mat->r, 23*sizeof(float));
00203         shi->har= shi->mat->har;
00204         
00205         /* render */
00206         shade_input_set_shade_texco(shi);
00207         shade_material_loop(shi, shr); /* todo: nodes */
00208         
00209         VECCOPY(rad, shr->combined);
00210 }
00211 
00212 static void occ_build_shade(Render *re, OcclusionTree *tree)
00213 {
00214         ShadeSample ssamp;
00215         ObjectInstanceRen *obi;
00216         VlakRen *vlr;
00217         int a;
00218 
00219         R= *re;
00220 
00221         /* setup shade sample with correct passes */
00222         memset(&ssamp, 0, sizeof(ShadeSample));
00223         ssamp.shi[0].lay= re->lay;
00224         ssamp.shi[0].passflag= SCE_PASS_DIFFUSE|SCE_PASS_RGBA;
00225         ssamp.shi[0].combinedflag= ~(SCE_PASS_SPEC);
00226         ssamp.tot= 1;
00227 
00228         for(a=0; a<tree->totface; a++) {
00229                 obi= &R.objectinstance[tree->face[a].obi];
00230                 vlr= RE_findOrAddVlak(obi->obr, tree->face[a].facenr);
00231 
00232                 occ_shade(&ssamp, obi, vlr, tree->rad[a]);
00233         }
00234 }
00235 
00236 /* ------------------------- Spherical Harmonics --------------------------- */
00237 
00238 /* Use 2nd order SH => 9 coefficients, stored in this order:
00239    0 = (0,0),
00240    1 = (1,-1), 2 = (1,0), 3 = (1,1),
00241    4 = (2,-2), 5 = (2,-1), 6 = (2,0), 7 = (2,1), 8 = (2,2) */
00242 
00243 static void sh_copy(float *shresult, float *sh)
00244 {
00245         memcpy(shresult, sh, sizeof(float)*9);
00246 }
00247 
00248 static void sh_mul(float *sh, float f)
00249 {
00250         int i;
00251 
00252         for(i=0; i<9; i++)
00253                 sh[i] *= f;
00254 }
00255 
00256 static void sh_add(float *shresult, float *sh1, float *sh2)
00257 {
00258         int i;
00259 
00260         for(i=0; i<9; i++)
00261                 shresult[i]= sh1[i] + sh2[i];
00262 }
00263 
00264 static void sh_from_disc(float *n, float area, float *shresult)
00265 {
00266         /* See formula (3) in:
00267            "An Efficient Representation for Irradiance Environment Maps" */
00268         float sh[9], x, y, z;
00269 
00270         x= n[0];
00271         y= n[1];
00272         z= n[2];
00273 
00274         sh[0]= 0.282095f;
00275 
00276         sh[1]= 0.488603f*y;
00277         sh[2]= 0.488603f*z;
00278         sh[3]= 0.488603f*x;
00279         
00280         sh[4]= 1.092548f*x*y;
00281         sh[5]= 1.092548f*y*z;
00282         sh[6]= 0.315392f*(3.0f*z*z - 1.0f);
00283         sh[7]= 1.092548f*x*z;
00284         sh[8]= 0.546274f*(x*x - y*y);
00285 
00286         sh_mul(sh, area);
00287         sh_copy(shresult, sh);
00288 }
00289 
00290 static float sh_eval(float *sh, float *v)
00291 {
00292         /* See formula (13) in:
00293            "An Efficient Representation for Irradiance Environment Maps" */
00294         static const float c1 = 0.429043f, c2 = 0.511664f, c3 = 0.743125f;
00295         static const float c4 = 0.886227f, c5 = 0.247708f;
00296         float x, y, z, sum;
00297 
00298         x= v[0];
00299         y= v[1];
00300         z= v[2];
00301 
00302         sum= c1*sh[8]*(x*x - y*y);
00303         sum += c3*sh[6]*z*z;
00304         sum += c4*sh[0];
00305         sum += -c5*sh[6];
00306         sum += 2.0f*c1*(sh[4]*x*y + sh[7]*x*z + sh[5]*y*z);
00307         sum += 2.0f*c2*(sh[3]*x + sh[1]*y + sh[2]*z);
00308 
00309         return sum;
00310 }
00311 
00312 /* ------------------------------ Building --------------------------------- */
00313 
00314 static void occ_face(const OccFace *face, float co[3], float normal[3], float *area)
00315 {
00316         ObjectInstanceRen *obi;
00317         VlakRen *vlr;
00318         float v1[3], v2[3], v3[3], v4[3];
00319 
00320         obi= &R.objectinstance[face->obi];
00321         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
00322         
00323         if(co) {
00324                 if(vlr->v4)
00325                         interp_v3_v3v3(co, vlr->v1->co, vlr->v3->co, 0.5f);
00326                 else
00327                         cent_tri_v3(co, vlr->v1->co, vlr->v2->co, vlr->v3->co);
00328 
00329                 if(obi->flag & R_TRANSFORMED)
00330                         mul_m4_v3(obi->mat, co);
00331         }
00332         
00333         if(normal) {
00334                 normal[0]= -vlr->n[0];
00335                 normal[1]= -vlr->n[1];
00336                 normal[2]= -vlr->n[2];
00337 
00338                 if(obi->flag & R_TRANSFORMED)
00339                         mul_m3_v3(obi->nmat, normal);
00340         }
00341 
00342         if(area) {
00343                 VECCOPY(v1, vlr->v1->co);
00344                 VECCOPY(v2, vlr->v2->co);
00345                 VECCOPY(v3, vlr->v3->co);
00346                 if(vlr->v4) VECCOPY(v4, vlr->v4->co);
00347 
00348                 if(obi->flag & R_TRANSFORMED) {
00349                         mul_m4_v3(obi->mat, v1);
00350                         mul_m4_v3(obi->mat, v2);
00351                         mul_m4_v3(obi->mat, v3);
00352                         if(vlr->v4) mul_m4_v3(obi->mat, v4);
00353                 }
00354 
00355                 /* todo: correct area for instances */
00356                 if(vlr->v4)
00357                         *area= area_quad_v3(v1, v2, v3, v4);
00358                 else
00359                         *area= area_tri_v3(v1, v2, v3);
00360         }
00361 }
00362 
00363 static void occ_sum_occlusion(OcclusionTree *tree, OccNode *node)
00364 {
00365         OccNode *child;
00366         float occ, area, totarea, rad[3];
00367         int a, b, indirect= tree->doindirect;
00368 
00369         occ= 0.0f;
00370         totarea= 0.0f;
00371         if(indirect) zero_v3(rad);
00372 
00373         for(b=0; b<TOTCHILD; b++) {
00374                 if(node->childflag & (1<<b)) {
00375                         a= node->child[b].face;
00376                         occ_face(&tree->face[a], 0, 0, &area);
00377                         occ += area*tree->occlusion[a];
00378                         if(indirect) madd_v3_v3fl(rad, tree->rad[a], area);
00379                         totarea += area;
00380                 }
00381                 else if(node->child[b].node) {
00382                         child= node->child[b].node;
00383                         occ_sum_occlusion(tree, child);
00384 
00385                         occ += child->area*child->occlusion;
00386                         if(indirect) madd_v3_v3fl(rad, child->rad, child->area);
00387                         totarea += child->area;
00388                 }
00389         }
00390 
00391         if(totarea != 0.0f) {
00392                 occ /= totarea;
00393                 if(indirect) mul_v3_fl(rad, 1.0f/totarea);
00394         }
00395         
00396         node->occlusion= occ;
00397         if(indirect) copy_v3_v3(node->rad, rad);
00398 }
00399 
00400 static int occ_find_bbox_axis(OcclusionTree *tree, int begin, int end, float *min, float *max)
00401 {
00402         float len, maxlen= -1.0f;
00403         int a, axis = 0;
00404 
00405         INIT_MINMAX(min, max);
00406 
00407         for(a=begin; a<end; a++)
00408                 DO_MINMAX(tree->co[a], min, max)
00409 
00410         for(a=0; a<3; a++) {
00411                 len= max[a] - min[a];
00412 
00413                 if(len > maxlen) {
00414                         maxlen= len;
00415                         axis= a;
00416                 }
00417         }
00418 
00419         return axis;
00420 }
00421 
00422 static void occ_node_from_face(OccFace *face, OccNode *node)
00423 {
00424         float n[3];
00425 
00426         occ_face(face, node->co, n, &node->area);
00427         node->dco= 0.0f;
00428         sh_from_disc(n, node->area, node->sh);
00429 }
00430 
00431 static void occ_build_dco(OcclusionTree *tree, OccNode *node, const float co[3], float *dco)
00432 {
00433         int b;
00434         for(b=0; b<TOTCHILD; b++) {
00435                 float dist, d[3], nco[3];
00436 
00437                 if(node->childflag & (1<<b)) {
00438                         occ_face(tree->face+node->child[b].face, nco, NULL, NULL);
00439                 }
00440                 else if(node->child[b].node) {
00441                         OccNode *child= node->child[b].node;
00442                         occ_build_dco(tree, child, co, dco);
00443                         VECCOPY(nco, child->co);
00444                 }
00445                 else {
00446                         continue;
00447                 }
00448 
00449                 VECSUB(d, nco, co);
00450                 dist= INPR(d, d);
00451                 if(dist > *dco)
00452                         *dco= dist;
00453         }
00454 }
00455 
00456 static void occ_build_split(OcclusionTree *tree, int begin, int end, int *split)
00457 {
00458         float min[3], max[3], mid;
00459         int axis, a, enda;
00460 
00461         /* split in middle of boundbox. this seems faster than median split
00462          * on complex scenes, possibly since it avoids two distant faces to
00463          * be in the same node better? */
00464         axis= occ_find_bbox_axis(tree, begin, end, min, max);
00465         mid= 0.5f*(min[axis]+max[axis]);
00466 
00467         a= begin;
00468         enda= end;
00469         while(a<enda) {
00470                 if(tree->co[a][axis] > mid) {
00471                         enda--;
00472                         SWAP(OccFace, tree->face[a], tree->face[enda]);
00473                         SWAP(float, tree->co[a][0], tree->co[enda][0]);
00474                         SWAP(float, tree->co[a][1], tree->co[enda][1]);
00475                         SWAP(float, tree->co[a][2], tree->co[enda][2]);
00476                 }
00477                 else
00478                         a++;
00479         }
00480 
00481         *split= enda;
00482 }
00483 
00484 static void occ_build_8_split(OcclusionTree *tree, int begin, int end, int *offset, int *count)
00485 {
00486         /* split faces into eight groups */
00487         int b, splitx, splity[2], splitz[4];
00488 
00489         occ_build_split(tree, begin, end, &splitx);
00490 
00491         /* force split if none found, to deal with degenerate geometry */
00492         if(splitx == begin || splitx == end)
00493                 splitx= (begin+end)/2;
00494 
00495         occ_build_split(tree, begin, splitx, &splity[0]);
00496         occ_build_split(tree, splitx, end, &splity[1]);
00497 
00498         occ_build_split(tree, begin, splity[0], &splitz[0]);
00499         occ_build_split(tree, splity[0], splitx, &splitz[1]);
00500         occ_build_split(tree, splitx, splity[1], &splitz[2]);
00501         occ_build_split(tree, splity[1], end, &splitz[3]);
00502 
00503         offset[0]= begin;
00504         offset[1]= splitz[0];
00505         offset[2]= splity[0];
00506         offset[3]= splitz[1];
00507         offset[4]= splitx;
00508         offset[5]= splitz[2];
00509         offset[6]= splity[1];
00510         offset[7]= splitz[3];
00511 
00512         for(b=0; b<7; b++)
00513                 count[b]= offset[b+1] - offset[b];
00514         count[7]= end - offset[7];
00515 }
00516 
00517 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth);
00518 
00519 static void *exec_occ_build(void *data)
00520 {
00521         OcclusionBuildThread *othread= (OcclusionBuildThread*)data;
00522 
00523         occ_build_recursive(othread->tree, othread->node, othread->begin, othread->end, othread->depth);
00524 
00525         return 0;
00526 }
00527 
00528 static void occ_build_recursive(OcclusionTree *tree, OccNode *node, int begin, int end, int depth)
00529 {
00530         ListBase threads;
00531         OcclusionBuildThread othreads[BLENDER_MAX_THREADS];
00532         OccNode *child, tmpnode;
00533         /* OccFace *face; */
00534         int a, b, totthread=0, offset[TOTCHILD], count[TOTCHILD];
00535 
00536         /* add a new node */
00537         node->occlusion= 1.0f;
00538 
00539         /* leaf node with only children */
00540         if(end - begin <= TOTCHILD) {
00541                 for(a=begin, b=0; a<end; a++, b++) {
00542                         /* face= &tree->face[a]; */
00543                         node->child[b].face= a;
00544                         node->childflag |= (1<<b);
00545                 }
00546         }
00547         else {
00548                 /* order faces */
00549                 occ_build_8_split(tree, begin, end, offset, count);
00550 
00551                 if(depth == 1 && tree->dothreadedbuild)
00552                         BLI_init_threads(&threads, exec_occ_build, tree->totbuildthread);
00553 
00554                 for(b=0; b<TOTCHILD; b++) {
00555                         if(count[b] == 0) {
00556                                 node->child[b].node= NULL;
00557                         }
00558                         else if(count[b] == 1) {
00559                                 /* face= &tree->face[offset[b]]; */
00560                                 node->child[b].face= offset[b];
00561                                 node->childflag |= (1<<b);
00562                         }
00563                         else {
00564                                 if(tree->dothreadedbuild)
00565                                         BLI_lock_thread(LOCK_CUSTOM1);
00566 
00567                                 child= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
00568                                 node->child[b].node= child;
00569 
00570                                 /* keep track of maximum depth for stack */
00571                                 if(depth+1 > tree->maxdepth)
00572                                         tree->maxdepth= depth+1;
00573 
00574                                 if(tree->dothreadedbuild)
00575                                         BLI_unlock_thread(LOCK_CUSTOM1);
00576 
00577                                 if(depth == 1 && tree->dothreadedbuild) {
00578                                         othreads[totthread].tree= tree;
00579                                         othreads[totthread].node= child;
00580                                         othreads[totthread].begin= offset[b];
00581                                         othreads[totthread].end= offset[b]+count[b];
00582                                         othreads[totthread].depth= depth+1;
00583                                         BLI_insert_thread(&threads, &othreads[totthread]);
00584                                         totthread++;
00585                                 }
00586                                 else
00587                                         occ_build_recursive(tree, child, offset[b], offset[b]+count[b], depth+1);
00588                         }
00589                 }
00590 
00591                 if(depth == 1 && tree->dothreadedbuild)
00592                         BLI_end_threads(&threads);
00593         }
00594 
00595         /* combine area, position and sh */
00596         for(b=0; b<TOTCHILD; b++) {
00597                 if(node->childflag & (1<<b)) {
00598                         child= &tmpnode;
00599                         occ_node_from_face(tree->face+node->child[b].face, &tmpnode);
00600                 }
00601                 else {
00602                         child= node->child[b].node;
00603                 }
00604 
00605                 if(child) {
00606                         node->area += child->area;
00607                         sh_add(node->sh, node->sh, child->sh);
00608                         VECADDFAC(node->co, node->co, child->co, child->area);
00609                 }
00610         }
00611 
00612         if(node->area != 0.0f)
00613                 mul_v3_fl(node->co, 1.0f/node->area);
00614 
00615         /* compute maximum distance from center */
00616         node->dco= 0.0f;
00617         if(node->area > 0.0f)
00618                 occ_build_dco(tree, node, node->co, &node->dco);
00619 }
00620 
00621 static void occ_build_sh_normalize(OccNode *node)
00622 {
00623         /* normalize spherical harmonics to not include area, so
00624          * we can clamp the dot product and then mutliply by area */
00625         int b;
00626 
00627         if(node->area != 0.0f)
00628                 sh_mul(node->sh, 1.0f/node->area);
00629 
00630         for(b=0; b<TOTCHILD; b++) {
00631                 if(node->childflag & (1<<b));
00632                 else if(node->child[b].node)
00633                         occ_build_sh_normalize(node->child[b].node);
00634         }
00635 }
00636 
00637 static OcclusionTree *occ_tree_build(Render *re)
00638 {
00639         OcclusionTree *tree;
00640         ObjectInstanceRen *obi;
00641         ObjectRen *obr;
00642         Material *ma;
00643         VlakRen *vlr= NULL;
00644         int a, b, c, totface;
00645 
00646         /* count */
00647         totface= 0;
00648         for(obi=re->instancetable.first; obi; obi=obi->next) {
00649                 obr= obi->obr;
00650                 for(a=0; a<obr->totvlak; a++) {
00651                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
00652                         else vlr++;
00653 
00654                         ma= vlr->mat;
00655 
00656                         if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE))
00657                                 totface++;
00658                 }
00659         }
00660 
00661         if(totface == 0)
00662                 return NULL;
00663         
00664         tree= MEM_callocN(sizeof(OcclusionTree), "OcclusionTree");
00665         tree->totface= totface;
00666 
00667         /* parameters */
00668         tree->error= get_render_aosss_error(&re->r, re->wrld.ao_approx_error);
00669         tree->distfac= (re->wrld.aomode & WO_AODIST)? re->wrld.aodistfac: 0.0f;
00670         tree->doindirect= (re->wrld.ao_indirect_energy > 0.0f && re->wrld.ao_indirect_bounces > 0);
00671 
00672         /* allocation */
00673         tree->arena= BLI_memarena_new(0x8000 * sizeof(OccNode), "occ tree arena");
00674         BLI_memarena_use_calloc(tree->arena);
00675 
00676         if(re->wrld.aomode & WO_AOCACHE)
00677                 tree->cache= MEM_callocN(sizeof(OcclusionCache)*BLENDER_MAX_THREADS, "OcclusionCache");
00678 
00679         tree->face= MEM_callocN(sizeof(OccFace)*totface, "OcclusionFace");
00680         tree->co= MEM_callocN(sizeof(float)*3*totface, "OcclusionCo");
00681         tree->occlusion= MEM_callocN(sizeof(float)*totface, "OcclusionOcclusion");
00682 
00683         if(tree->doindirect)
00684                 tree->rad= MEM_callocN(sizeof(float)*3*totface, "OcclusionRad");
00685 
00686         /* make array of face pointers */
00687         for(b=0, c=0, obi=re->instancetable.first; obi; obi=obi->next, c++) {
00688                 obr= obi->obr;
00689                 for(a=0; a<obr->totvlak; a++) {
00690                         if((a & 255)==0) vlr= obr->vlaknodes[a>>8].vlak;
00691                         else vlr++;
00692 
00693                         ma= vlr->mat;
00694 
00695                         if((ma->shade_flag & MA_APPROX_OCCLUSION) && (ma->material_type == MA_TYPE_SURFACE)) {
00696                                 tree->face[b].obi= c;
00697                                 tree->face[b].facenr= a;
00698                                 tree->occlusion[b]= 1.0f;
00699                                 occ_face(&tree->face[b], tree->co[b], NULL, NULL); 
00700                                 b++;
00701                         }
00702                 }
00703         }
00704 
00705         /* threads */
00706         tree->totbuildthread= (re->r.threads > 1 && totface > 10000)? 8: 1;
00707         tree->dothreadedbuild= (tree->totbuildthread > 1);
00708 
00709         /* recurse */
00710         tree->root= BLI_memarena_alloc(tree->arena, sizeof(OccNode));
00711         tree->maxdepth= 1;
00712         occ_build_recursive(tree, tree->root, 0, totface, 1);
00713 
00714         if(tree->doindirect) {
00715                 occ_build_shade(re, tree);
00716                 occ_sum_occlusion(tree, tree->root);
00717         }
00718         
00719         MEM_freeN(tree->co);
00720         tree->co= NULL;
00721 
00722         occ_build_sh_normalize(tree->root);
00723 
00724         for(a=0; a<BLENDER_MAX_THREADS; a++)
00725                 tree->stack[a]= MEM_callocN(sizeof(OccNode)*TOTCHILD*(tree->maxdepth+1), "OccStack");
00726 
00727         return tree;
00728 }
00729 
00730 static void occ_free_tree(OcclusionTree *tree)
00731 {
00732         int a;
00733 
00734         if(tree) {
00735                 if(tree->arena) BLI_memarena_free(tree->arena);
00736                 for(a=0; a<BLENDER_MAX_THREADS; a++)
00737                         if(tree->stack[a])
00738                                 MEM_freeN(tree->stack[a]);
00739                 if(tree->occlusion) MEM_freeN(tree->occlusion);
00740                 if(tree->cache) MEM_freeN(tree->cache);
00741                 if(tree->face) MEM_freeN(tree->face);
00742                 if(tree->rad) MEM_freeN(tree->rad);
00743                 MEM_freeN(tree);
00744         }
00745 }
00746 
00747 /* ------------------------- Traversal --------------------------- */
00748 
00749 static float occ_solid_angle(OccNode *node, float *v, float d2, float invd2, float *receivenormal)
00750 {
00751         float dotreceive, dotemit;
00752         float ev[3];
00753 
00754         ev[0]= -v[0]*invd2;
00755         ev[1]= -v[1]*invd2;
00756         ev[2]= -v[2]*invd2;
00757         dotemit= sh_eval(node->sh, ev);
00758         dotreceive= INPR(receivenormal, v)*invd2;
00759 
00760         CLAMP(dotemit, 0.0f, 1.0f);
00761         CLAMP(dotreceive, 0.0f, 1.0f);
00762         
00763         return ((node->area*dotemit*dotreceive)/(d2 + node->area*INVPI))*INVPI;
00764 }
00765 
00766 static void VecAddDir(float *result, float *v1, float *v2, float fac)
00767 {
00768         result[0]= v1[0] + fac*(v2[0] - v1[0]);
00769         result[1]= v1[1] + fac*(v2[1] - v1[1]);
00770         result[2]= v1[2] + fac*(v2[2] - v1[2]);
00771 }
00772 
00773 static int occ_visible_quad(float *p, float *n, float *v0, float *v1, float *v2, float *q0, float *q1, float *q2, float *q3)
00774 {
00775         static const float epsilon = 1e-6f;
00776         float c, sd[3];
00777         
00778         c= INPR(n, p);
00779 
00780         /* signed distances from the vertices to the plane. */
00781         sd[0]= INPR(n, v0) - c;
00782         sd[1]= INPR(n, v1) - c;
00783         sd[2]= INPR(n, v2) - c;
00784 
00785         if(fabsf(sd[0]) < epsilon) sd[0] = 0.0f;
00786         if(fabsf(sd[1]) < epsilon) sd[1] = 0.0f;
00787         if(fabsf(sd[2]) < epsilon) sd[2] = 0.0f;
00788 
00789         if(sd[0] > 0) {
00790                 if(sd[1] > 0) {
00791                         if(sd[2] > 0) {
00792                                 // +++
00793                                 VECCOPY(q0, v0);
00794                                 VECCOPY(q1, v1);
00795                                 VECCOPY(q2, v2);
00796                                 VECCOPY(q3, q2);
00797                         }
00798                         else if(sd[2] < 0) {
00799                                 // ++-
00800                                 VECCOPY(q0, v0);
00801                                 VECCOPY(q1, v1);
00802                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
00803                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
00804                         }
00805                         else {
00806                                 // ++0
00807                                 VECCOPY(q0, v0);
00808                                 VECCOPY(q1, v1);
00809                                 VECCOPY(q2, v2);
00810                                 VECCOPY(q3, q2);
00811                         }
00812                 }
00813                 else if(sd[1] < 0) {
00814                         if(sd[2] > 0) {
00815                                 // +-+
00816                                 VECCOPY(q0, v0);
00817                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
00818                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
00819                                 VECCOPY(q3, v2);
00820                         }
00821                         else if(sd[2] < 0) {
00822                                 // +--
00823                                 VECCOPY(q0, v0);
00824                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
00825                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
00826                                 VECCOPY(q3, q2);
00827                         }
00828                         else {
00829                                 // +-0
00830                                 VECCOPY(q0, v0);
00831                                 VecAddDir(q1, v0, v1, (sd[0]/(sd[0]-sd[1])));
00832                                 VECCOPY(q2, v2);
00833                                 VECCOPY(q3, q2);
00834                         }
00835                 }
00836                 else {
00837                         if(sd[2] > 0) {
00838                                 // +0+
00839                                 VECCOPY(q0, v0);
00840                                 VECCOPY(q1, v1);
00841                                 VECCOPY(q2, v2);
00842                                 VECCOPY(q3, q2);
00843                         }
00844                         else if(sd[2] < 0) {
00845                                 // +0-
00846                                 VECCOPY(q0, v0);
00847                                 VECCOPY(q1, v1);
00848                                 VecAddDir(q2, v0, v2, (sd[0]/(sd[0]-sd[2])));
00849                                 VECCOPY(q3, q2);
00850                         }
00851                         else {
00852                                 // +00
00853                                 VECCOPY(q0, v0);
00854                                 VECCOPY(q1, v1);
00855                                 VECCOPY(q2, v2);
00856                                 VECCOPY(q3, q2);
00857                         }
00858                 }
00859         }
00860         else if(sd[0] < 0) {
00861                 if(sd[1] > 0) {
00862                         if(sd[2] > 0) {
00863                                 // -++
00864                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
00865                                 VECCOPY(q1, v1);
00866                                 VECCOPY(q2, v2);
00867                                 VecAddDir(q3, v0, v2, (sd[0]/(sd[0]-sd[2])));
00868                         }
00869                         else if(sd[2] < 0) {
00870                                 // -+-
00871                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
00872                                 VECCOPY(q1, v1);
00873                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
00874                                 VECCOPY(q3, q2);
00875                         }
00876                         else {
00877                                 // -+0
00878                                 VecAddDir(q0, v0, v1, (sd[0]/(sd[0]-sd[1])));
00879                                 VECCOPY(q1, v1);
00880                                 VECCOPY(q2, v2);
00881                                 VECCOPY(q3, q2);
00882                         }
00883                 }
00884                 else if(sd[1] < 0) {
00885                         if(sd[2] > 0) {
00886                                 // --+
00887                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
00888                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
00889                                 VECCOPY(q2, v2);
00890                                 VECCOPY(q3, q2);
00891                         }
00892                         else if(sd[2] < 0) {
00893                                 // ---
00894                                 return 0;
00895                         }
00896                         else {
00897                                 // --0
00898                                 return 0;
00899                         }
00900                 }
00901                 else {
00902                         if(sd[2] > 0) {
00903                                 // -0+
00904                                 VecAddDir(q0, v0, v2, (sd[0]/(sd[0]-sd[2])));
00905                                 VECCOPY(q1, v1);
00906                                 VECCOPY(q2, v2);
00907                                 VECCOPY(q3, q2);
00908                         }
00909                         else if(sd[2] < 0) {
00910                                 // -0-
00911                                 return 0;
00912                         }
00913                         else {
00914                                 // -00
00915                                 return 0;
00916                         }
00917                 }
00918         }
00919         else {
00920                 if(sd[1] > 0) {
00921                         if(sd[2] > 0) {
00922                                 // 0++
00923                                 VECCOPY(q0, v0);
00924                                 VECCOPY(q1, v1);
00925                                 VECCOPY(q2, v2);
00926                                 VECCOPY(q3, q2);
00927                         }
00928                         else if(sd[2] < 0) {
00929                                 // 0+-
00930                                 VECCOPY(q0, v0);
00931                                 VECCOPY(q1, v1);
00932                                 VecAddDir(q2, v1, v2, (sd[1]/(sd[1]-sd[2])));
00933                                 VECCOPY(q3, q2);
00934                         }
00935                         else {
00936                                 // 0+0
00937                                 VECCOPY(q0, v0);
00938                                 VECCOPY(q1, v1);
00939                                 VECCOPY(q2, v2);
00940                                 VECCOPY(q3, q2);
00941                         }
00942                 }
00943                 else if(sd[1] < 0) {
00944                         if(sd[2] > 0) {
00945                                 // 0-+
00946                                 VECCOPY(q0, v0);
00947                                 VecAddDir(q1, v1, v2, (sd[1]/(sd[1]-sd[2])));
00948                                 VECCOPY(q2, v2);
00949                                 VECCOPY(q3, q2);
00950                         }
00951                         else if(sd[2] < 0) {
00952                                 // 0--
00953                                 return 0;
00954                         }
00955                         else {
00956                                 // 0-0
00957                                 return 0;
00958                         }
00959                 }
00960                 else {
00961                         if(sd[2] > 0) {
00962                                 // 00+
00963                                 VECCOPY(q0, v0);
00964                                 VECCOPY(q1, v1);
00965                                 VECCOPY(q2, v2);
00966                                 VECCOPY(q3, q2);
00967                         }
00968                         else if(sd[2] < 0) {
00969                                 // 00-
00970                                 return 0;
00971                         }
00972                         else {
00973                                 // 000
00974                                 return 0;
00975                         }
00976                 }
00977         }
00978 
00979         return 1;
00980 }
00981 
00982 /* altivec optimization, this works, but is unused */
00983 
00984 #if 0
00985 #include <Accelerate/Accelerate.h>
00986 
00987 typedef union {
00988         vFloat v;
00989         float f[4];
00990 } vFloatResult;
00991 
00992 static vFloat vec_splat_float(float val)
00993 {
00994         return (vFloat){val, val, val, val};
00995 }
00996 
00997 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
00998 {
00999         vFloat vcos, rlen, vrx, vry, vrz, vsrx, vsry, vsrz, gx, gy, gz, vangle;
01000         vUInt8 rotate = (vUInt8){4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3};
01001         vFloatResult vresult;
01002         float result;
01003 
01004         /* compute r* */
01005         vrx = (vFloat){q0[0], q1[0], q2[0], q3[0]} - vec_splat_float(p[0]);
01006         vry = (vFloat){q0[1], q1[1], q2[1], q3[1]} - vec_splat_float(p[1]);
01007         vrz = (vFloat){q0[2], q1[2], q2[2], q3[2]} - vec_splat_float(p[2]);
01008 
01009         /* normalize r* */
01010         rlen = vec_rsqrte(vrx*vrx + vry*vry + vrz*vrz + vec_splat_float(1e-16f));
01011         vrx = vrx*rlen;
01012         vry = vry*rlen;
01013         vrz = vrz*rlen;
01014 
01015         /* rotate r* for cross and dot */
01016         vsrx= vec_perm(vrx, vrx, rotate);
01017         vsry= vec_perm(vry, vry, rotate);
01018         vsrz= vec_perm(vrz, vrz, rotate);
01019 
01020         /* cross product */
01021         gx = vsry*vrz - vsrz*vry;
01022         gy = vsrz*vrx - vsrx*vrz;
01023         gz = vsrx*vry - vsry*vrx;
01024 
01025         /* normalize */
01026         rlen = vec_rsqrte(gx*gx + gy*gy + gz*gz + vec_splat_float(1e-16f));
01027         gx = gx*rlen;
01028         gy = gy*rlen;
01029         gz = gz*rlen;
01030 
01031         /* angle */
01032         vcos = vrx*vsrx + vry*vsry + vrz*vsrz;
01033         vcos= vec_max(vec_min(vcos, vec_splat_float(1.0f)), vec_splat_float(-1.0f));
01034         vangle= vacosf(vcos);
01035 
01036         /* dot */
01037         vresult.v = (vec_splat_float(n[0])*gx +
01038                                  vec_splat_float(n[1])*gy +
01039                                  vec_splat_float(n[2])*gz)*vangle;
01040 
01041         result= (vresult.f[0] + vresult.f[1] + vresult.f[2] + vresult.f[3])*(0.5f/(float)M_PI);
01042         result= MAX2(result, 0.0f);
01043 
01044         return result;
01045 }
01046 
01047 #endif
01048 
01049 /* SSE optimization, acos code doesn't work */
01050 
01051 #if 0
01052 
01053 #include <xmmintrin.h>
01054 
01055 static __m128 sse_approx_acos(__m128 x)
01056 {
01057         /* needs a better approximation than taylor expansion of acos, since that
01058          * gives big erros for near 1.0 values, sqrt(2*x)*acos(1-x) should work
01059          * better, see http://www.tom.womack.net/projects/sse-fast-arctrig.html */
01060 
01061         return _mm_set_ps1(1.0f);
01062 }
01063 
01064 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
01065 {
01066         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
01067         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
01068         float fresult[4] __attribute__((aligned(16)));
01069         __m128 qx, qy, qz, rx, ry, rz, rlen, srx, sry, srz, gx, gy, gz, glen, rcos, angle, aresult;
01070 
01071         /* compute r */
01072         qx = _mm_set_ps(q3[0], q2[0], q1[0], q0[0]);
01073         qy = _mm_set_ps(q3[1], q2[1], q1[1], q0[1]);
01074         qz = _mm_set_ps(q3[2], q2[2], q1[2], q0[2]);
01075 
01076         rx = qx - _mm_set_ps1(p[0]);
01077         ry = qy - _mm_set_ps1(p[1]);
01078         rz = qz - _mm_set_ps1(p[2]);
01079 
01080         /* normalize r */
01081         rlen = _mm_rsqrt_ps(rx*rx + ry*ry + rz*rz + _mm_set_ps1(1e-16f));
01082         rx = rx*rlen;
01083         ry = ry*rlen;
01084         rz = rz*rlen;
01085 
01086         /* cross product */
01087         srx = _mm_shuffle_ps(rx, rx, _MM_SHUFFLE(0,3,2,1));
01088         sry = _mm_shuffle_ps(ry, ry, _MM_SHUFFLE(0,3,2,1));
01089         srz = _mm_shuffle_ps(rz, rz, _MM_SHUFFLE(0,3,2,1));
01090 
01091         gx = sry*rz - srz*ry;
01092         gy = srz*rx - srx*rz;
01093         gz = srx*ry - sry*rx;
01094 
01095         /* normalize g */
01096         glen = _mm_rsqrt_ps(gx*gx + gy*gy + gz*gz + _mm_set_ps1(1e-16f));
01097         gx = gx*glen;
01098         gy = gy*glen;
01099         gz = gz*glen;
01100 
01101         /* compute angle */
01102         rcos = rx*srx + ry*sry + rz*srz;
01103         rcos= _mm_max_ps(_mm_min_ps(rcos, _mm_set_ps1(1.0f)), _mm_set_ps1(-1.0f));
01104 
01105         angle = sse_approx_cos(rcos);
01106         aresult = (_mm_set_ps1(n[0])*gx + _mm_set_ps1(n[1])*gy + _mm_set_ps1(n[2])*gz)*angle;
01107 
01108         /* sum together */
01109         result= (fresult[0] + fresult[1] + fresult[2] + fresult[3])*(0.5f/(float)M_PI);
01110         result= MAX2(result, 0.0f);
01111 
01112         return result;
01113 }
01114 
01115 #endif
01116 
01117 static void normalizef(float *n)
01118 {
01119         float d;
01120         
01121         d= INPR(n, n);
01122 
01123         if(d > 1.0e-35F) {
01124                 d= 1.0f/sqrtf(d);
01125 
01126                 n[0] *= d; 
01127                 n[1] *= d; 
01128                 n[2] *= d;
01129         } 
01130 }
01131 
01132 static float occ_quad_form_factor(float *p, float *n, float *q0, float *q1, float *q2, float *q3)
01133 {
01134         float r0[3], r1[3], r2[3], r3[3], g0[3], g1[3], g2[3], g3[3];
01135         float a1, a2, a3, a4, dot1, dot2, dot3, dot4, result;
01136 
01137         VECSUB(r0, q0, p);
01138         VECSUB(r1, q1, p);
01139         VECSUB(r2, q2, p);
01140         VECSUB(r3, q3, p);
01141 
01142         normalizef(r0);
01143         normalizef(r1);
01144         normalizef(r2);
01145         normalizef(r3);
01146 
01147         cross_v3_v3v3(g0, r1, r0); normalizef(g0);
01148         cross_v3_v3v3(g1, r2, r1); normalizef(g1);
01149         cross_v3_v3v3(g2, r3, r2); normalizef(g2);
01150         cross_v3_v3v3(g3, r0, r3); normalizef(g3);
01151 
01152         a1= saacosf(INPR(r0, r1));
01153         a2= saacosf(INPR(r1, r2));
01154         a3= saacosf(INPR(r2, r3));
01155         a4= saacosf(INPR(r3, r0));
01156 
01157         dot1= INPR(n, g0);
01158         dot2= INPR(n, g1);
01159         dot3= INPR(n, g2);
01160         dot4= INPR(n, g3);
01161 
01162         result= (a1*dot1 + a2*dot2 + a3*dot3 + a4*dot4)*0.5f/(float)M_PI;
01163         result= MAX2(result, 0.0f);
01164 
01165         return result;
01166 }
01167 
01168 static float occ_form_factor(OccFace *face, float *p, float *n)
01169 {
01170         ObjectInstanceRen *obi;
01171         VlakRen *vlr;
01172         float v1[3], v2[3], v3[3], v4[3], q0[3], q1[3], q2[3], q3[3], contrib= 0.0f;
01173 
01174         obi= &R.objectinstance[face->obi];
01175         vlr= RE_findOrAddVlak(obi->obr, face->facenr);
01176 
01177         VECCOPY(v1, vlr->v1->co);
01178         VECCOPY(v2, vlr->v2->co);
01179         VECCOPY(v3, vlr->v3->co);
01180 
01181         if(obi->flag & R_TRANSFORMED) {
01182                 mul_m4_v3(obi->mat, v1);
01183                 mul_m4_v3(obi->mat, v2);
01184                 mul_m4_v3(obi->mat, v3);
01185         }
01186 
01187         if(occ_visible_quad(p, n, v1, v2, v3, q0, q1, q2, q3))
01188                 contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
01189 
01190         if(vlr->v4) {
01191                 VECCOPY(v4, vlr->v4->co);
01192                 if(obi->flag & R_TRANSFORMED)
01193                         mul_m4_v3(obi->mat, v4);
01194 
01195                 if(occ_visible_quad(p, n, v1, v3, v4, q0, q1, q2, q3))
01196                         contrib += occ_quad_form_factor(p, n, q0, q1, q2, q3);
01197         }
01198 
01199         return contrib;
01200 }
01201 
01202 static void occ_lookup(OcclusionTree *tree, int thread, OccFace *exclude, float *pp, float *pn, float *occ, float rad[3], float bentn[3])
01203 {
01204         OccNode *node, **stack;
01205         OccFace *face;
01206         float resultocc, resultrad[3], v[3], p[3], n[3], co[3], invd2;
01207         float distfac, fac, error, d2, weight, emitarea;
01208         int b, f, totstack;
01209 
01210         /* init variables */
01211         VECCOPY(p, pp);
01212         VECCOPY(n, pn);
01213         VECADDFAC(p, p, n, 1e-4f);
01214 
01215         if(bentn)
01216                 copy_v3_v3(bentn, n);
01217         
01218         error= tree->error;
01219         distfac= tree->distfac;
01220 
01221         resultocc= 0.0f;
01222         zero_v3(resultrad);
01223 
01224         /* init stack */
01225         stack= tree->stack[thread];
01226         stack[0]= tree->root;
01227         totstack= 1;
01228 
01229         while(totstack) {
01230                 /* pop point off the stack */
01231                 node= stack[--totstack];
01232 
01233                 VECSUB(v, node->co, p);
01234                 d2= INPR(v, v) + 1e-16f;
01235                 emitarea= MAX2(node->area, node->dco);
01236 
01237                 if(d2*error > emitarea) {
01238                         if(distfac != 0.0f) {
01239                                 fac= 1.0f/(1.0f + distfac*d2);
01240                                 if(fac < 0.01f)
01241                                         continue;
01242                         }
01243                         else
01244                                 fac= 1.0f;
01245 
01246                         /* accumulate occlusion from spherical harmonics */
01247                         invd2 = 1.0f/sqrtf(d2);
01248                         weight= occ_solid_angle(node, v, d2, invd2, n);
01249 
01250                         if(rad)
01251                                 madd_v3_v3fl(resultrad, node->rad, weight*fac);
01252 
01253                         weight *= node->occlusion;
01254 
01255                         if(bentn) {
01256                                 bentn[0] -= weight*invd2*v[0];
01257                                 bentn[1] -= weight*invd2*v[1];
01258                                 bentn[2] -= weight*invd2*v[2];
01259                         }
01260 
01261                         resultocc += weight*fac;
01262                 }
01263                 else {
01264                         /* traverse into children */
01265                         for(b=0; b<TOTCHILD; b++) {
01266                                 if(node->childflag & (1<<b)) {
01267                                         f= node->child[b].face;
01268                                         face= &tree->face[f];
01269 
01270                                         /* accumulate occlusion with face form factor */
01271                                         if(!exclude || !(face->obi == exclude->obi && face->facenr == exclude->facenr)) {
01272                                                 if(bentn || distfac != 0.0f) {
01273                                                         occ_face(face, co, NULL, NULL); 
01274                                                         VECSUB(v, co, p);
01275                                                         d2= INPR(v, v) + 1e-16f;
01276 
01277                                                         fac= (distfac == 0.0f)? 1.0f: 1.0f/(1.0f + distfac*d2);
01278                                                         if(fac < 0.01f)
01279                                                                 continue;
01280                                                 }
01281                                                 else
01282                                                         fac= 1.0f;
01283 
01284                                                 weight= occ_form_factor(face, p, n);
01285 
01286                                                 if(rad)
01287                                                         madd_v3_v3fl(resultrad, tree->rad[f], weight*fac);
01288 
01289                                                 weight *= tree->occlusion[f];
01290 
01291                                                 if(bentn) {
01292                                                         invd2= 1.0f/sqrtf(d2);
01293                                                         bentn[0] -= weight*invd2*v[0];
01294                                                         bentn[1] -= weight*invd2*v[1];
01295                                                         bentn[2] -= weight*invd2*v[2];
01296                                                 }
01297 
01298                                                 resultocc += weight*fac;
01299                                         }
01300                                 }
01301                                 else if(node->child[b].node) {
01302                                         /* push child on the stack */
01303                                         stack[totstack++]= node->child[b].node;
01304                                 }
01305                         }
01306                 }
01307         }
01308 
01309         if(occ) *occ= resultocc;
01310         if(rad) copy_v3_v3(rad, resultrad);
01311         /*if(rad && exclude) {
01312                 int a;
01313                 for(a=0; a<tree->totface; a++)
01314                         if((tree->face[a].obi == exclude->obi && tree->face[a].facenr == exclude->facenr))
01315                                 copy_v3_v3(rad, tree->rad[a]);
01316         }*/
01317         if(bentn) normalize_v3(bentn);
01318 }
01319 
01320 static void occ_compute_bounces(Render *re, OcclusionTree *tree, int totbounce)
01321 {
01322         float (*rad)[3], (*sum)[3], (*tmp)[3], co[3], n[3], occ;
01323         int bounce, i;
01324 
01325         rad= MEM_callocN(sizeof(float)*3*tree->totface, "OcclusionBounceRad");
01326         sum= MEM_dupallocN(tree->rad);
01327 
01328         for(bounce=1; bounce<totbounce; bounce++) {
01329                 for(i=0; i<tree->totface; i++) {
01330                         occ_face(&tree->face[i], co, n, NULL);
01331                         madd_v3_v3fl(co, n, 1e-8f);
01332 
01333                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ, rad[i], NULL);
01334                         rad[i][0]= MAX2(rad[i][0], 0.0f);
01335                         rad[i][1]= MAX2(rad[i][1], 0.0f);
01336                         rad[i][2]= MAX2(rad[i][2], 0.0f);
01337                         add_v3_v3(sum[i], rad[i]);
01338 
01339                         if(re->test_break(re->tbh))
01340                                 break;
01341                 }
01342 
01343                 if(re->test_break(re->tbh))
01344                         break;
01345 
01346                 tmp= tree->rad;
01347                 tree->rad= rad;
01348                 rad= tmp;
01349 
01350                 occ_sum_occlusion(tree, tree->root);
01351         }
01352 
01353         MEM_freeN(rad);
01354         MEM_freeN(tree->rad);
01355         tree->rad= sum;
01356 
01357         if(!re->test_break(re->tbh))
01358                 occ_sum_occlusion(tree, tree->root);
01359 }
01360 
01361 static void occ_compute_passes(Render *re, OcclusionTree *tree, int totpass)
01362 {
01363         float *occ, co[3], n[3];
01364         int pass, i;
01365         
01366         occ= MEM_callocN(sizeof(float)*tree->totface, "OcclusionPassOcc");
01367 
01368         for(pass=0; pass<totpass; pass++) {
01369                 for(i=0; i<tree->totface; i++) {
01370                         occ_face(&tree->face[i], co, n, NULL);
01371                         negate_v3(n);
01372                         VECADDFAC(co, co, n, 1e-8f);
01373 
01374                         occ_lookup(tree, 0, &tree->face[i], co, n, &occ[i], NULL, NULL);
01375                         if(re->test_break(re->tbh))
01376                                 break;
01377                 }
01378 
01379                 if(re->test_break(re->tbh))
01380                         break;
01381 
01382                 for(i=0; i<tree->totface; i++) {
01383                         tree->occlusion[i] -= occ[i]; //MAX2(1.0f-occ[i], 0.0f);
01384                         if(tree->occlusion[i] < 0.0f)
01385                                 tree->occlusion[i]= 0.0f;
01386                 }
01387 
01388                 occ_sum_occlusion(tree, tree->root);
01389         }
01390 
01391         MEM_freeN(occ);
01392 }
01393 
01394 static void sample_occ_tree(Render *re, OcclusionTree *tree, OccFace *exclude, float *co, float *n, int thread, int onlyshadow, float *ao, float *env, float *indirect)
01395 {
01396         float nn[3], bn[3], fac, occ, occlusion, correction, rad[3];
01397         int envcolor;
01398 
01399         envcolor= re->wrld.aocolor;
01400         if(onlyshadow)
01401                 envcolor= WO_AOPLAIN;
01402 
01403         negate_v3_v3(nn, n);
01404 
01405         occ_lookup(tree, thread, exclude, co, nn, &occ, (tree->doindirect)? rad: NULL, (env && envcolor)? bn: NULL);
01406 
01407         correction= re->wrld.ao_approx_correction;
01408 
01409         occlusion= (1.0f-correction)*(1.0f-occ);
01410         CLAMP(occlusion, 0.0f, 1.0f);
01411         if(correction != 0.0f)
01412                 occlusion += correction*exp(-occ);
01413 
01414         if(env) {
01415                 /* sky shading using bent normal */
01416                 if(ELEM(envcolor, WO_AOSKYCOL, WO_AOSKYTEX)) {
01417                         fac= 0.5*(1.0f+bn[0]*re->grvec[0]+ bn[1]*re->grvec[1]+ bn[2]*re->grvec[2]);
01418                         env[0]= (1.0f-fac)*re->wrld.horr + fac*re->wrld.zenr;
01419                         env[1]= (1.0f-fac)*re->wrld.horg + fac*re->wrld.zeng;
01420                         env[2]= (1.0f-fac)*re->wrld.horb + fac*re->wrld.zenb;
01421 
01422                         mul_v3_fl(env, occlusion);
01423                 }
01424                 else {
01425                         env[0]= occlusion;
01426                         env[1]= occlusion;
01427                         env[2]= occlusion;
01428                 }
01429 #if 0
01430                 else {  /* WO_AOSKYTEX */
01431                         float dxyview[3];
01432                         bn[0]= -bn[0];
01433                         bn[1]= -bn[1];
01434                         bn[2]= -bn[2];
01435                         dxyview[0]= 1.0f;
01436                         dxyview[1]= 1.0f;
01437                         dxyview[2]= 0.0f;
01438                         shadeSkyView(ao, co, bn, dxyview);
01439                 }
01440 #endif
01441         }
01442 
01443         if(ao) {
01444                 ao[0]= occlusion;
01445                 ao[1]= occlusion;
01446                 ao[2]= occlusion;
01447         }
01448 
01449         if(tree->doindirect) copy_v3_v3(indirect, rad);
01450         else zero_v3(indirect);
01451 }
01452 
01453 /* ---------------------------- Caching ------------------------------- */
01454 
01455 static OcclusionCacheSample *find_occ_sample(OcclusionCache *cache, int x, int y)
01456 {
01457         x -= cache->x;
01458         y -= cache->y;
01459 
01460         x /= cache->step;
01461         y /= cache->step;
01462         x *= cache->step;
01463         y *= cache->step;
01464 
01465         if(x < 0 || x >= cache->w || y < 0 || y >= cache->h)
01466                 return NULL;
01467         else
01468                 return &cache->sample[y*cache->w + x];
01469 }
01470 
01471 static int sample_occ_cache(OcclusionTree *tree, float *co, float *n, int x, int y, int thread, float *ao, float *env, float *indirect)
01472 {
01473         OcclusionCache *cache;
01474         OcclusionCacheSample *samples[4], *sample;
01475         float wn[4], wz[4], wb[4], tx, ty, w, totw, mino, maxo;
01476         float d[3], dist2;
01477         int i, x1, y1, x2, y2;
01478 
01479         if(!tree->cache)
01480                 return 0;
01481         
01482         /* first try to find a sample in the same pixel */
01483         cache= &tree->cache[thread];
01484 
01485         if(cache->sample && cache->step) {
01486                 sample= &cache->sample[(y-cache->y)*cache->w + (x-cache->x)];
01487                 if(sample->filled) {
01488                         VECSUB(d, sample->co, co);
01489                         dist2= INPR(d, d);
01490                         if(dist2 < 0.5f*sample->dist2 && INPR(sample->n, n) > 0.98f) {
01491                                 VECCOPY(ao, sample->ao);
01492                                 VECCOPY(env, sample->env);
01493                                 VECCOPY(indirect, sample->indirect);
01494                                 return 1;
01495                         }
01496                 }
01497         }
01498         else
01499                 return 0;
01500 
01501         /* try to interpolate between 4 neighbouring pixels */
01502         samples[0]= find_occ_sample(cache, x, y);
01503         samples[1]= find_occ_sample(cache, x+cache->step, y);
01504         samples[2]= find_occ_sample(cache, x, y+cache->step);
01505         samples[3]= find_occ_sample(cache, x+cache->step, y+cache->step);
01506 
01507         for(i=0; i<4; i++)
01508                 if(!samples[i] || !samples[i]->filled)
01509                         return 0;
01510 
01511         /* require intensities not being too different */
01512         mino= MIN4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
01513         maxo= MAX4(samples[0]->intensity, samples[1]->intensity, samples[2]->intensity, samples[3]->intensity);
01514 
01515         if(maxo - mino > 0.05f)
01516                 return 0;
01517 
01518         /* compute weighted interpolation between samples */
01519         zero_v3(ao);
01520         zero_v3(env);
01521         zero_v3(indirect);
01522         totw= 0.0f;
01523 
01524         x1= samples[0]->x;
01525         y1= samples[0]->y;
01526         x2= samples[3]->x;
01527         y2= samples[3]->y;
01528 
01529         tx= (float)(x2 - x)/(float)(x2 - x1);
01530         ty= (float)(y2 - y)/(float)(y2 - y1);
01531 
01532         wb[3]= (1.0f-tx)*(1.0f-ty);
01533         wb[2]= (tx)*(1.0f-ty);
01534         wb[1]= (1.0f-tx)*(ty);
01535         wb[0]= tx*ty;
01536 
01537         for(i=0; i<4; i++) {
01538                 VECSUB(d, samples[i]->co, co);
01539                 //dist2= INPR(d, d);
01540 
01541                 wz[i]= 1.0f; //(samples[i]->dist2/(1e-4f + dist2));
01542                 wn[i]= pow(INPR(samples[i]->n, n), 32.0f);
01543 
01544                 w= wb[i]*wn[i]*wz[i];
01545 
01546                 totw += w;
01547                 madd_v3_v3fl(ao, samples[i]->ao, w);
01548                 madd_v3_v3fl(env, samples[i]->env, w);
01549                 madd_v3_v3fl(indirect, samples[i]->indirect, w);
01550         }
01551 
01552         if(totw >= 0.9f) {
01553                 totw= 1.0f/totw;
01554                 mul_v3_fl(ao, totw);
01555                 mul_v3_fl(env, totw);
01556                 mul_v3_fl(indirect, totw);
01557                 return 1;
01558         }
01559 
01560         return 0;
01561 }
01562 
01563 static void sample_occ_surface(ShadeInput *shi)
01564 {
01565         StrandRen *strand= shi->strand;
01566         StrandSurface *mesh= strand->buffer->surface;
01567         int *face, *index = RE_strandren_get_face(shi->obr, strand, 0);
01568         float w[4], *co1, *co2, *co3, *co4;
01569 
01570         if(mesh && mesh->face && mesh->co && mesh->ao && index) {
01571                 face= mesh->face[*index];
01572 
01573                 co1= mesh->co[face[0]];
01574                 co2= mesh->co[face[1]];
01575                 co3= mesh->co[face[2]];
01576                 co4= (face[3])? mesh->co[face[3]]: NULL;
01577 
01578                 interp_weights_face_v3(w, co1, co2, co3, co4, strand->vert->co);
01579 
01580                 zero_v3(shi->ao);
01581                 zero_v3(shi->env);
01582                 zero_v3(shi->indirect);
01583 
01584                 madd_v3_v3fl(shi->ao, mesh->ao[face[0]], w[0]);
01585                 madd_v3_v3fl(shi->env, mesh->env[face[0]], w[0]);
01586                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[0]], w[0]);
01587                 madd_v3_v3fl(shi->ao, mesh->ao[face[1]], w[1]);
01588                 madd_v3_v3fl(shi->env, mesh->env[face[1]], w[1]);
01589                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[1]], w[1]);
01590                 madd_v3_v3fl(shi->ao, mesh->ao[face[2]], w[2]);
01591                 madd_v3_v3fl(shi->env, mesh->env[face[2]], w[2]);
01592                 madd_v3_v3fl(shi->indirect, mesh->indirect[face[2]], w[2]);
01593                 if(face[3]) {
01594                         madd_v3_v3fl(shi->ao, mesh->ao[face[3]], w[3]);
01595                         madd_v3_v3fl(shi->env, mesh->env[face[3]], w[3]);
01596                         madd_v3_v3fl(shi->indirect, mesh->indirect[face[3]], w[3]);
01597                 }
01598         }
01599         else {
01600                 shi->ao[0]= 1.0f;
01601                 shi->ao[1]= 1.0f;
01602                 shi->ao[2]= 1.0f;
01603                 zero_v3(shi->env);
01604                 zero_v3(shi->indirect);
01605         }
01606 }
01607 
01608 /* ------------------------- External Functions --------------------------- */
01609 
01610 static void *exec_strandsurface_sample(void *data)
01611 {
01612         OcclusionThread *othread= (OcclusionThread*)data;
01613         Render *re= othread->re;
01614         StrandSurface *mesh= othread->mesh;
01615         float ao[3], env[3], indirect[3], co[3], n[3], *co1, *co2, *co3, *co4;
01616         int a, *face;
01617 
01618         for(a=othread->begin; a<othread->end; a++) {
01619                 face= mesh->face[a];
01620                 co1= mesh->co[face[0]];
01621                 co2= mesh->co[face[1]];
01622                 co3= mesh->co[face[2]];
01623 
01624                 if(face[3]) {
01625                         co4= mesh->co[face[3]];
01626 
01627                         interp_v3_v3v3(co, co1, co3, 0.5f);
01628                         normal_quad_v3( n,co1, co2, co3, co4);
01629                 }
01630                 else {
01631                         cent_tri_v3(co, co1, co2, co3);
01632                         normal_tri_v3( n,co1, co2, co3);
01633                 }
01634                 negate_v3(n);
01635 
01636                 sample_occ_tree(re, re->occlusiontree, NULL, co, n, othread->thread, 0, ao, env, indirect);
01637                 VECCOPY(othread->faceao[a], ao);
01638                 VECCOPY(othread->faceenv[a], env);
01639                 VECCOPY(othread->faceindirect[a], indirect);
01640         }
01641 
01642         return 0;
01643 }
01644 
01645 void make_occ_tree(Render *re)
01646 {
01647         OcclusionThread othreads[BLENDER_MAX_THREADS];
01648         OcclusionTree *tree;
01649         StrandSurface *mesh;
01650         ListBase threads;
01651         float ao[3], env[3], indirect[3], (*faceao)[3], (*faceenv)[3], (*faceindirect)[3];
01652         int a, totface, totthread, *face, *count;
01653 
01654         /* ugly, needed for occ_face */
01655         R= *re;
01656 
01657         re->i.infostr= "Occlusion preprocessing";
01658         re->stats_draw(re->sdh, &re->i);
01659         
01660         re->occlusiontree= tree= occ_tree_build(re);
01661         
01662         if(tree) {
01663                 if(re->wrld.ao_approx_passes > 0)
01664                         occ_compute_passes(re, tree, re->wrld.ao_approx_passes);
01665                 if(tree->doindirect && (re->wrld.mode & WO_INDIRECT_LIGHT))
01666                         occ_compute_bounces(re, tree, re->wrld.ao_indirect_bounces);
01667 
01668                 for(mesh=re->strandsurface.first; mesh; mesh=mesh->next) {
01669                         if(!mesh->face || !mesh->co || !mesh->ao)
01670                                 continue;
01671 
01672                         count= MEM_callocN(sizeof(int)*mesh->totvert, "OcclusionCount");
01673                         faceao= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceAO");
01674                         faceenv= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceEnv");
01675                         faceindirect= MEM_callocN(sizeof(float)*3*mesh->totface, "StrandSurfFaceIndirect");
01676 
01677                         totthread= (mesh->totface > 10000)? re->r.threads: 1;
01678                         totface= mesh->totface/totthread;
01679                         for(a=0; a<totthread; a++) {
01680                                 othreads[a].re= re;
01681                                 othreads[a].faceao= faceao;
01682                                 othreads[a].faceenv= faceenv;
01683                                 othreads[a].faceindirect= faceindirect;
01684                                 othreads[a].thread= a;
01685                                 othreads[a].mesh= mesh;
01686                                 othreads[a].begin= a*totface;
01687                                 othreads[a].end= (a == totthread-1)? mesh->totface: (a+1)*totface;
01688                         }
01689 
01690                         if(totthread == 1) {
01691                                 exec_strandsurface_sample(&othreads[0]);
01692                         }
01693                         else {
01694                                 BLI_init_threads(&threads, exec_strandsurface_sample, totthread);
01695 
01696                                 for(a=0; a<totthread; a++)
01697                                         BLI_insert_thread(&threads, &othreads[a]);
01698 
01699                                 BLI_end_threads(&threads);
01700                         }
01701 
01702                         for(a=0; a<mesh->totface; a++) {
01703                                 face= mesh->face[a];
01704 
01705                                 VECCOPY(ao, faceao[a]);
01706                                 VECCOPY(env, faceenv[a]);
01707                                 VECCOPY(indirect, faceindirect[a]);
01708 
01709                                 VECADD(mesh->ao[face[0]], mesh->ao[face[0]], ao);
01710                                 VECADD(mesh->env[face[0]], mesh->env[face[0]], env);
01711                                 VECADD(mesh->indirect[face[0]], mesh->indirect[face[0]], indirect);
01712                                 count[face[0]]++;
01713                                 VECADD(mesh->ao[face[1]], mesh->ao[face[1]], ao);
01714                                 VECADD(mesh->env[face[1]], mesh->env[face[1]], env);
01715                                 VECADD(mesh->indirect[face[1]], mesh->indirect[face[1]], indirect);
01716                                 count[face[1]]++;
01717                                 VECADD(mesh->ao[face[2]], mesh->ao[face[2]], ao);
01718                                 VECADD(mesh->env[face[2]], mesh->env[face[2]], env);
01719                                 VECADD(mesh->indirect[face[2]], mesh->indirect[face[2]], indirect);
01720                                 count[face[2]]++;
01721 
01722                                 if(face[3]) {
01723                                         VECADD(mesh->ao[face[3]], mesh->ao[face[3]], ao);
01724                                         VECADD(mesh->env[face[3]], mesh->env[face[3]], env);
01725                                         VECADD(mesh->indirect[face[3]], mesh->indirect[face[3]], indirect);
01726                                         count[face[3]]++;
01727                                 }
01728                         }
01729 
01730                         for(a=0; a<mesh->totvert; a++) {
01731                                 if(count[a]) {
01732                                         mul_v3_fl(mesh->ao[a], 1.0f/count[a]);
01733                                         mul_v3_fl(mesh->env[a], 1.0f/count[a]);
01734                                         mul_v3_fl(mesh->indirect[a], 1.0f/count[a]);
01735                                 }
01736                         }
01737 
01738                         MEM_freeN(count);
01739                         MEM_freeN(faceao);
01740                         MEM_freeN(faceenv);
01741                         MEM_freeN(faceindirect);
01742                 }
01743         }
01744 }
01745 
01746 void free_occ(Render *re)
01747 {
01748         if(re->occlusiontree) {
01749                 occ_free_tree(re->occlusiontree);
01750                 re->occlusiontree = NULL;
01751         }
01752 }
01753 
01754 void sample_occ(Render *re, ShadeInput *shi)
01755 {
01756         OcclusionTree *tree= re->occlusiontree;
01757         OcclusionCache *cache;
01758         OcclusionCacheSample *sample;
01759         OccFace exclude;
01760         int onlyshadow;
01761 
01762         if(tree) {
01763                 if(shi->strand) {
01764                         sample_occ_surface(shi);
01765                 }
01766                 /* try to get result from the cache if possible */
01767                 else if(shi->depth!=0 || !sample_occ_cache(tree, shi->co, shi->vno, shi->xs, shi->ys, shi->thread, shi->ao, shi->env, shi->indirect)) {
01768                         /* no luck, let's sample the occlusion */
01769                         exclude.obi= shi->obi - re->objectinstance;
01770                         exclude.facenr= shi->vlr->index;
01771                         onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
01772                         sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
01773 
01774                         /* fill result into sample, each time */
01775                         if(tree->cache) {
01776                                 cache= &tree->cache[shi->thread];
01777 
01778                                 if(cache->sample && cache->step) {
01779                                         sample= &cache->sample[(shi->ys-cache->y)*cache->w + (shi->xs-cache->x)];
01780                                         VECCOPY(sample->co, shi->co);
01781                                         VECCOPY(sample->n, shi->vno);
01782                                         VECCOPY(sample->ao, shi->ao);
01783                                         VECCOPY(sample->env, shi->env);
01784                                         VECCOPY(sample->indirect, shi->indirect);
01785                                         sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]);
01786                                         sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2]));
01787                                         sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
01788                                         sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
01789                                         sample->filled= 1;
01790                                 }
01791                         }
01792                 }
01793         }
01794         else {
01795                 shi->ao[0]= 1.0f;
01796                 shi->ao[1]= 1.0f;
01797                 shi->ao[2]= 1.0f;
01798 
01799                 shi->env[0]= 0.0f;
01800                 shi->env[1]= 0.0f;
01801                 shi->env[2]= 0.0f;
01802 
01803                 shi->indirect[0]= 0.0f;
01804                 shi->indirect[1]= 0.0f;
01805                 shi->indirect[2]= 0.0f;
01806         }
01807 }
01808 
01809 void cache_occ_samples(Render *re, RenderPart *pa, ShadeSample *ssamp)
01810 {
01811         OcclusionTree *tree= re->occlusiontree;
01812         PixStr ps;
01813         OcclusionCache *cache;
01814         OcclusionCacheSample *sample;
01815         OccFace exclude;
01816         ShadeInput *shi;
01817         intptr_t *rd=NULL;
01818         int *ro=NULL, *rp=NULL, *rz=NULL, onlyshadow;
01819         int x, y, step = CACHE_STEP;
01820 
01821         if(!tree->cache)
01822                 return;
01823 
01824         cache= &tree->cache[pa->thread];
01825         cache->w= pa->rectx;
01826         cache->h= pa->recty;
01827         cache->x= pa->disprect.xmin;
01828         cache->y= pa->disprect.ymin;
01829         cache->step= step;
01830         cache->sample= MEM_callocN(sizeof(OcclusionCacheSample)*cache->w*cache->h, "OcclusionCacheSample");
01831         sample= cache->sample;
01832 
01833         if(re->osa) {
01834                 rd= pa->rectdaps;
01835         }
01836         else {
01837                 /* fake pixel struct for non-osa */
01838                 ps.next= NULL;
01839                 ps.mask= 0xFFFF;
01840 
01841                 ro= pa->recto;
01842                 rp= pa->rectp;
01843                 rz= pa->rectz;
01844         }
01845 
01846         /* compute a sample at every step pixels */
01847         for(y=pa->disprect.ymin; y<pa->disprect.ymax; y++) {
01848                 for(x=pa->disprect.xmin; x<pa->disprect.xmax; x++, sample++, rd++, ro++, rp++, rz++) {
01849                         if(!(((x - pa->disprect.xmin + step) % step) == 0 || x == pa->disprect.xmax-1))
01850                                 continue;
01851                         if(!(((y - pa->disprect.ymin + step) % step) == 0 || y == pa->disprect.ymax-1))
01852                                 continue;
01853 
01854                         if(re->osa) {
01855                                 if(!*rd) continue;
01856 
01857                                 shade_samples_fill_with_ps(ssamp, (PixStr *)(*rd), x, y);
01858                         }
01859                         else {
01860                                 if(!*rp) continue;
01861 
01862                                 ps.obi= *ro;
01863                                 ps.facenr= *rp;
01864                                 ps.z= *rz;
01865                                 shade_samples_fill_with_ps(ssamp, &ps, x, y);
01866                         }
01867 
01868                         shi= ssamp->shi;
01869                         if(shi->vlr) {
01870                                 onlyshadow= (shi->mat->mode & MA_ONLYSHADOW);
01871                                 exclude.obi= shi->obi - re->objectinstance;
01872                                 exclude.facenr= shi->vlr->index;
01873                                 sample_occ_tree(re, tree, &exclude, shi->co, shi->vno, shi->thread, onlyshadow, shi->ao, shi->env, shi->indirect);
01874 
01875                                 VECCOPY(sample->co, shi->co);
01876                                 VECCOPY(sample->n, shi->vno);
01877                                 VECCOPY(sample->ao, shi->ao);
01878                                 VECCOPY(sample->env, shi->env);
01879                                 VECCOPY(sample->indirect, shi->indirect);
01880                                 sample->intensity= MAX3(sample->ao[0], sample->ao[1], sample->ao[2]);
01881                                 sample->intensity= MAX2(sample->intensity, MAX3(sample->env[0], sample->env[1], sample->env[2]));
01882                                 sample->intensity= MAX2(sample->intensity, MAX3(sample->indirect[0], sample->indirect[1], sample->indirect[2]));
01883                                 sample->dist2= INPR(shi->dxco, shi->dxco) + INPR(shi->dyco, shi->dyco);
01884                                 sample->x= shi->xs;
01885                                 sample->y= shi->ys;
01886                                 sample->filled= 1;
01887                         }
01888 
01889                         if(re->test_break(re->tbh))
01890                                 break;
01891                 }
01892         }
01893 }
01894 
01895 void free_occ_samples(Render *re, RenderPart *pa)
01896 {
01897         OcclusionTree *tree= re->occlusiontree;
01898         OcclusionCache *cache;
01899 
01900         if(tree->cache) {
01901                 cache= &tree->cache[pa->thread];
01902 
01903                 if(cache->sample)
01904                         MEM_freeN(cache->sample);
01905 
01906                 cache->w= 0;
01907                 cache->h= 0;
01908                 cache->step= 0;
01909         }
01910 }
01911