|
Blender
V2.59
|
00001 /* 00002 * $Id: svbvh.h 35233 2011-02-27 19:31:27Z jesterking $ 00003 * 00004 * ***** BEGIN GPL LICENSE BLOCK ***** 00005 * 00006 * This program is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU General Public License 00008 * as published by the Free Software Foundation; either version 2 00009 * of the License, or (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software Foundation, 00018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00019 * 00020 * The Original Code is Copyright (C) 2009 Blender Foundation. 00021 * All rights reserved. 00022 * 00023 * The Original Code is: all of this file. 00024 * 00025 * Contributor(s): André Pinto. 00026 * 00027 * ***** END GPL LICENSE BLOCK ***** 00028 */ 00029 00035 #ifdef __SSE__ 00036 00037 #ifndef RE_RAYTRACE_SVBVH_H 00038 #define RE_RAYTRACE_SVBVH_H 00039 00040 #include "bvh.h" 00041 #include "BLI_memarena.h" 00042 #include "BKE_global.h" 00043 #include <stdio.h> 00044 #include <algorithm> 00045 00046 struct SVBVHNode 00047 { 00048 float child_bb[24]; 00049 SVBVHNode *child[4]; 00050 int nchilds; 00051 }; 00052 00053 static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group) 00054 { 00055 const __m128 tmin0 = _mm_setzero_ps(); 00056 const __m128 tmax0 = _mm_set_ps1(isec->dist); 00057 00058 const __m128 start0 = _mm_set_ps1(isec->start[0]); 00059 const __m128 start1 = _mm_set_ps1(isec->start[1]); 00060 const __m128 start2 = _mm_set_ps1(isec->start[2]); 00061 const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0); 00062 const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0); 00063 const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1); 00064 const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1); 00065 const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2); 00066 const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2); 00067 const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]); 00068 const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]); 00069 const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]); 00070 const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0); 00071 const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0); 00072 const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1); 00073 const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1); 00074 const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2); 00075 const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2); 00076 const __m128 tmin1 = _mm_max_ps(tmin0, mul0); 00077 const __m128 tmax1 = _mm_min_ps(tmax0, mul1); 00078 const __m128 tmin2 = _mm_max_ps(tmin1, mul2); 00079 const __m128 tmax2 = _mm_min_ps(tmax1, mul3); 00080 const __m128 tmin3 = _mm_max_ps(tmin2, mul4); 00081 const __m128 tmax3 = _mm_min_ps(tmax2, mul5); 00082 00083 return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); 00084 } 00085 00086 static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb) 00087 { 00088 const float *bb = _bb; 00089 00090 float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; 00091 float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; 00092 float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; 00093 float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; 00094 float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; 00095 float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; 00096 00097 RE_RC_COUNT(isec->raycounter->bb.test); 00098 00099 if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; 00100 if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0; 00101 if(t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; 00102 00103 RE_RC_COUNT(isec->raycounter->bb.hit); 00104 00105 return 1; 00106 } 00107 00108 static bool svbvh_node_is_leaf(const SVBVHNode *node) 00109 { 00110 return !RE_rayobject_isAligned(node); 00111 } 00112 00113 template<int MAX_STACK_SIZE, bool SHADOW> 00114 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec) 00115 { 00116 SVBVHNode *stack[MAX_STACK_SIZE], *node; 00117 int hit = 0, stack_pos = 0; 00118 00119 stack[stack_pos++] = root; 00120 00121 while(stack_pos) 00122 { 00123 node = stack[--stack_pos]; 00124 00125 if(!svbvh_node_is_leaf(node)) 00126 { 00127 int nchilds= node->nchilds; 00128 00129 if(nchilds == 4) { 00130 float *child_bb= node->child_bb; 00131 int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb))); 00132 SVBVHNode **child= node->child; 00133 00134 RE_RC_COUNT(isec->raycounter->simd_bb.test); 00135 00136 if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00137 if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00138 if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00139 if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00140 } 00141 else { 00142 float *child_bb= node->child_bb; 00143 SVBVHNode **child= node->child; 00144 int i; 00145 00146 for(i=0; i<nchilds; i++) 00147 if(svbvh_bb_intersect_test(isec, (float*)child_bb+6*i)) 00148 stack[stack_pos++] = child[i]; 00149 } 00150 } 00151 else 00152 { 00153 hit |= RE_rayobject_intersect((RayObject*)node, isec); 00154 if(SHADOW && hit) break; 00155 } 00156 } 00157 00158 return hit; 00159 } 00160 00161 00162 template<> 00163 inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max) 00164 { 00165 if(is_leaf(node)) 00166 { 00167 RE_rayobject_merge_bb((RayObject*)node, min, max); 00168 } 00169 else 00170 { 00171 int i=0; 00172 while(i+4 <= node->nchilds) 00173 { 00174 float *res = node->child_bb + 6*i; 00175 for(int j=0; j<3; j++) 00176 { 00177 min[j] = MIN2(min[j], res[4*j+0]); 00178 min[j] = MIN2(min[j], res[4*j+1]); 00179 min[j] = MIN2(min[j], res[4*j+2]); 00180 min[j] = MIN2(min[j], res[4*j+3]); 00181 } 00182 for(int j=0; j<3; j++) 00183 { 00184 max[j] = MAX2(max[j], res[4*(j+3)+0]); 00185 max[j] = MAX2(max[j], res[4*(j+3)+1]); 00186 max[j] = MAX2(max[j], res[4*(j+3)+2]); 00187 max[j] = MAX2(max[j], res[4*(j+3)+3]); 00188 } 00189 00190 i += 4; 00191 } 00192 00193 for(; i<node->nchilds; i++) 00194 { 00195 DO_MIN(node->child_bb+6*i , min); 00196 DO_MAX(node->child_bb+3+6*i, max); 00197 } 00198 } 00199 } 00200 00201 00202 00203 /* 00204 * Builds a SVBVH tree form a VBVHTree 00205 */ 00206 template<class OldNode> 00207 struct Reorganize_SVBVH 00208 { 00209 MemArena *arena; 00210 00211 float childs_per_node; 00212 int nodes_with_childs[16]; 00213 int useless_bb; 00214 int nodes; 00215 00216 Reorganize_SVBVH(MemArena *a) 00217 { 00218 arena = a; 00219 nodes = 0; 00220 childs_per_node = 0; 00221 useless_bb = 0; 00222 00223 for(int i=0; i<16; i++) 00224 nodes_with_childs[i] = 0; 00225 } 00226 00227 ~Reorganize_SVBVH() 00228 { 00229 if(G.f & G_DEBUG) { 00230 printf("%f childs per node\n", childs_per_node / nodes); 00231 printf("%d childs BB are useless\n", useless_bb); 00232 for(int i=0; i<16; i++) 00233 printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i]/float(nodes)); 00234 } 00235 } 00236 00237 SVBVHNode *create_node(int nchilds) 00238 { 00239 SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode)); 00240 node->nchilds = nchilds; 00241 00242 return node; 00243 } 00244 00245 void copy_bb(float *bb, const float *old_bb) 00246 { 00247 std::copy(old_bb, old_bb+6, bb); 00248 } 00249 00250 void prepare_for_simd(SVBVHNode *node) 00251 { 00252 int i=0; 00253 while(i+4 <= node->nchilds) 00254 { 00255 float vec_tmp[4*6]; 00256 float *res = node->child_bb+6*i; 00257 std::copy(res, res+6*4, vec_tmp); 00258 00259 for(int j=0; j<6; j++) 00260 { 00261 res[4*j+0] = vec_tmp[6*0+j]; 00262 res[4*j+1] = vec_tmp[6*1+j]; 00263 res[4*j+2] = vec_tmp[6*2+j]; 00264 res[4*j+3] = vec_tmp[6*3+j]; 00265 } 00266 00267 i += 4; 00268 } 00269 } 00270 00271 /* amt must be power of two */ 00272 inline int padup(int num, int amt) 00273 { 00274 return ((num+(amt-1))&~(amt-1)); 00275 } 00276 00277 SVBVHNode *transform(OldNode *old) 00278 { 00279 if(is_leaf(old)) 00280 return (SVBVHNode*)old; 00281 if(is_leaf(old->child)) 00282 return (SVBVHNode*)old->child; 00283 00284 int nchilds = count_childs(old); 00285 int alloc_childs = nchilds; 00286 if(nchilds % 4 > 2) 00287 alloc_childs = padup(nchilds, 4); 00288 00289 SVBVHNode *node = create_node(alloc_childs); 00290 00291 childs_per_node += nchilds; 00292 nodes++; 00293 if(nchilds < 16) 00294 nodes_with_childs[nchilds]++; 00295 00296 useless_bb += alloc_childs-nchilds; 00297 while(alloc_childs > nchilds) 00298 { 00299 const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN }; 00300 alloc_childs--; 00301 node->child[alloc_childs] = 0; 00302 copy_bb(node->child_bb+alloc_childs*6, def_bb); 00303 } 00304 00305 int i=nchilds; 00306 for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling) 00307 { 00308 i--; 00309 node->child[i] = transform(o_child); 00310 if(is_leaf(o_child)) 00311 { 00312 float bb[6]; 00313 INIT_MINMAX(bb, bb+3); 00314 RE_rayobject_merge_bb((RayObject*)o_child, bb, bb+3); 00315 copy_bb(node->child_bb+i*6, bb); 00316 break; 00317 } 00318 else 00319 { 00320 copy_bb(node->child_bb+i*6, o_child->bb); 00321 } 00322 } 00323 assert(i == 0); 00324 00325 prepare_for_simd(node); 00326 00327 return node; 00328 } 00329 }; 00330 00331 #endif 00332 00333 #endif //__SSE__ 00334