Blender  V2.59
svbvh.h
Go to the documentation of this file.
00001 /*
00002  * $Id: svbvh.h 35233 2011-02-27 19:31:27Z jesterking $
00003  *
00004  * ***** BEGIN GPL LICENSE BLOCK *****
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version. 
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software Foundation,
00018  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00019  *
00020  * The Original Code is Copyright (C) 2009 Blender Foundation.
00021  * All rights reserved.
00022  *
00023  * The Original Code is: all of this file.
00024  *
00025  * Contributor(s): André Pinto.
00026  *
00027  * ***** END GPL LICENSE BLOCK *****
00028  */
00029 
00035 #ifdef __SSE__
00036  
00037 #ifndef RE_RAYTRACE_SVBVH_H
00038 #define RE_RAYTRACE_SVBVH_H
00039 
00040 #include "bvh.h"
00041 #include "BLI_memarena.h"
00042 #include "BKE_global.h"
00043 #include <stdio.h>
00044 #include <algorithm>
00045 
00046 struct SVBVHNode
00047 {
00048         float child_bb[24];
00049         SVBVHNode *child[4];
00050         int nchilds;
00051 };
00052 
00053 static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group)
00054 {
00055         const __m128 tmin0 = _mm_setzero_ps();
00056         const __m128 tmax0 = _mm_set_ps1(isec->dist);
00057 
00058         const __m128 start0 = _mm_set_ps1(isec->start[0]);
00059         const __m128 start1 = _mm_set_ps1(isec->start[1]);
00060         const __m128 start2 = _mm_set_ps1(isec->start[2]);
00061         const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0);
00062         const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0);
00063         const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1);
00064         const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1);
00065         const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2);
00066         const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2);
00067         const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]);
00068         const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]);
00069         const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]);
00070         const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0);
00071         const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0);
00072         const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1);
00073         const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1);
00074         const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2);
00075         const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2);
00076         const __m128 tmin1 = _mm_max_ps(tmin0, mul0);
00077         const __m128 tmax1 = _mm_min_ps(tmax0, mul1);
00078         const __m128 tmin2 = _mm_max_ps(tmin1, mul2);
00079         const __m128 tmax2 = _mm_min_ps(tmax1, mul3);
00080         const __m128 tmin3 = _mm_max_ps(tmin2, mul4);
00081         const __m128 tmax3 = _mm_min_ps(tmax2, mul5);
00082         
00083         return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
00084 }
00085 
00086 static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb)
00087 {
00088         const float *bb = _bb;
00089         
00090         float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
00091         float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
00092         float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
00093         float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
00094         float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
00095         float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
00096         
00097         RE_RC_COUNT(isec->raycounter->bb.test);
00098 
00099         if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
00100         if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0;
00101         if(t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
00102 
00103         RE_RC_COUNT(isec->raycounter->bb.hit);  
00104 
00105         return 1;
00106 }
00107 
00108 static bool svbvh_node_is_leaf(const SVBVHNode *node)
00109 {
00110         return !RE_rayobject_isAligned(node);
00111 }
00112 
00113 template<int MAX_STACK_SIZE, bool SHADOW>
00114 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec)
00115 {
00116         SVBVHNode *stack[MAX_STACK_SIZE], *node;
00117         int hit = 0, stack_pos = 0;
00118 
00119         stack[stack_pos++] = root;
00120 
00121         while(stack_pos)
00122         {
00123                 node = stack[--stack_pos];
00124 
00125                 if(!svbvh_node_is_leaf(node))
00126                 {
00127                         int nchilds= node->nchilds;
00128 
00129                         if(nchilds == 4) {
00130                                 float *child_bb= node->child_bb;
00131                                 int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb)));
00132                                 SVBVHNode **child= node->child;
00133 
00134                                 RE_RC_COUNT(isec->raycounter->simd_bb.test);
00135 
00136                                 if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00137                                 if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00138                                 if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00139                                 if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00140                         }
00141                         else {
00142                                 float *child_bb= node->child_bb;
00143                                 SVBVHNode **child= node->child;
00144                                 int i;
00145 
00146                                 for(i=0; i<nchilds; i++)
00147                                         if(svbvh_bb_intersect_test(isec, (float*)child_bb+6*i))
00148                                                 stack[stack_pos++] = child[i];
00149                         }
00150                 }
00151                 else
00152                 {
00153                         hit |= RE_rayobject_intersect((RayObject*)node, isec);
00154                         if(SHADOW && hit) break;
00155                 }
00156         }
00157 
00158         return hit;
00159 }
00160 
00161 
00162 template<>
00163 inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max)
00164 {
00165         if(is_leaf(node))
00166         {
00167                 RE_rayobject_merge_bb((RayObject*)node, min, max);
00168         }
00169         else
00170         {
00171                 int i=0;
00172                 while(i+4 <= node->nchilds)
00173                 {
00174                         float *res = node->child_bb + 6*i;
00175                         for(int j=0; j<3; j++)
00176                         {
00177                                 min[j] = MIN2(min[j], res[4*j+0]);
00178                                 min[j] = MIN2(min[j], res[4*j+1]);
00179                                 min[j] = MIN2(min[j], res[4*j+2]);
00180                                 min[j] = MIN2(min[j], res[4*j+3]);
00181                         }
00182                         for(int j=0; j<3; j++)
00183                         {
00184                                 max[j] = MAX2(max[j], res[4*(j+3)+0]);
00185                                 max[j] = MAX2(max[j], res[4*(j+3)+1]);
00186                                 max[j] = MAX2(max[j], res[4*(j+3)+2]);
00187                                 max[j] = MAX2(max[j], res[4*(j+3)+3]);
00188                         }
00189                         
00190                         i += 4;
00191                 }
00192 
00193                 for(; i<node->nchilds; i++)
00194                 {
00195                         DO_MIN(node->child_bb+6*i  , min);
00196                         DO_MAX(node->child_bb+3+6*i, max);
00197                 }
00198         }
00199 }
00200 
00201 
00202 
00203 /*
00204  * Builds a SVBVH tree form a VBVHTree
00205  */
00206 template<class OldNode>
00207 struct Reorganize_SVBVH
00208 {
00209         MemArena *arena;
00210 
00211         float childs_per_node;
00212         int nodes_with_childs[16];
00213         int useless_bb;
00214         int nodes;
00215 
00216         Reorganize_SVBVH(MemArena *a)
00217         {
00218                 arena = a;
00219                 nodes = 0;
00220                 childs_per_node = 0;
00221                 useless_bb = 0;
00222                 
00223                 for(int i=0; i<16; i++)
00224                         nodes_with_childs[i] = 0;
00225         }
00226         
00227         ~Reorganize_SVBVH()
00228         {
00229                 if(G.f & G_DEBUG) {
00230                         printf("%f childs per node\n", childs_per_node / nodes);
00231                         printf("%d childs BB are useless\n", useless_bb);
00232                         for(int i=0; i<16; i++)
00233                                 printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i]/float(nodes));
00234                 }
00235         }
00236         
00237         SVBVHNode *create_node(int nchilds)
00238         {
00239                 SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode));
00240                 node->nchilds = nchilds;
00241 
00242                 return node;
00243         }
00244         
00245         void copy_bb(float *bb, const float *old_bb)
00246         {
00247                 std::copy(old_bb, old_bb+6, bb);
00248         }
00249         
00250         void prepare_for_simd(SVBVHNode *node)
00251         {
00252                 int i=0;
00253                 while(i+4 <= node->nchilds)
00254                 {
00255                         float vec_tmp[4*6];
00256                         float *res = node->child_bb+6*i;
00257                         std::copy(res, res+6*4, vec_tmp);
00258                         
00259                         for(int j=0; j<6; j++)
00260                         {
00261                                 res[4*j+0] = vec_tmp[6*0+j];
00262                                 res[4*j+1] = vec_tmp[6*1+j];
00263                                 res[4*j+2] = vec_tmp[6*2+j];
00264                                 res[4*j+3] = vec_tmp[6*3+j];
00265                         }
00266 
00267                         i += 4;
00268                 }
00269         }
00270 
00271         /* amt must be power of two */
00272         inline int padup(int num, int amt)
00273         {
00274                 return ((num+(amt-1))&~(amt-1));
00275         }
00276         
00277         SVBVHNode *transform(OldNode *old)
00278         {
00279                 if(is_leaf(old))
00280                         return (SVBVHNode*)old;
00281                 if(is_leaf(old->child))
00282                         return (SVBVHNode*)old->child;
00283 
00284                 int nchilds = count_childs(old);
00285                 int alloc_childs = nchilds;
00286                 if(nchilds % 4 > 2)
00287                         alloc_childs = padup(nchilds, 4);
00288                 
00289                 SVBVHNode *node = create_node(alloc_childs);
00290 
00291                 childs_per_node += nchilds;
00292                 nodes++;
00293                 if(nchilds < 16)
00294                         nodes_with_childs[nchilds]++;
00295                 
00296                 useless_bb += alloc_childs-nchilds;
00297                 while(alloc_childs > nchilds)
00298                 {
00299                         const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN };
00300                         alloc_childs--;
00301                         node->child[alloc_childs] = 0;
00302                         copy_bb(node->child_bb+alloc_childs*6, def_bb);
00303                 }
00304                 
00305                 int i=nchilds;
00306                 for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
00307                 {
00308                         i--;
00309                         node->child[i] = transform(o_child);
00310                         if(is_leaf(o_child))
00311                         {
00312                                 float bb[6];
00313                                 INIT_MINMAX(bb, bb+3);
00314                                 RE_rayobject_merge_bb((RayObject*)o_child, bb, bb+3);
00315                                 copy_bb(node->child_bb+i*6, bb);
00316                                 break;
00317                         }
00318                         else
00319                         {
00320                                 copy_bb(node->child_bb+i*6, o_child->bb);
00321                         }
00322                 }
00323                 assert(i == 0);
00324 
00325                 prepare_for_simd(node);
00326                 
00327                 return node;
00328         }       
00329 };
00330 
00331 #endif
00332 
00333 #endif //__SSE__
00334