Drizzled Public API Documentation

sel_tree.cc

00001 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008-2009 Sun Microsystems, Inc.
00005  *
00006  *  This program is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License as published by
00008  *  the Free Software Foundation; version 2 of the License.
00009  *
00010  *  This program is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  *  GNU General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with this program; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018  */
00019 
00020 #include <config.h>
00021 
00022 #include <drizzled/sql_base.h>
00023 #include <drizzled/sql_select.h>
00024 #include <drizzled/memory/sql_alloc.h>
00025 #include <drizzled/optimizer/range.h>
00026 #include <drizzled/optimizer/range_param.h>
00027 #include <drizzled/optimizer/sel_arg.h>
00028 #include <drizzled/optimizer/sel_tree.h>
00029 #include <drizzled/optimizer/sel_imerge.h>
00030 
00031 using namespace std;
00032 using namespace drizzled;
00033 
00034 static optimizer::SEL_ARG *key_or(optimizer::RangeParameter *param, optimizer::SEL_ARG *key1, optimizer::SEL_ARG *key2);
00035 static bool eq_tree(optimizer::SEL_ARG* a, optimizer::SEL_ARG *b);
00036 
00037 bool optimizer::sel_trees_can_be_ored(optimizer::SEL_TREE *tree1, 
00038                                       optimizer::SEL_TREE *tree2,
00039                                       optimizer::RangeParameter* param)
00040 {
00041   key_map common_keys= tree1->keys_map;
00042   common_keys&= tree2->keys_map;
00043 
00044   if (common_keys.none())
00045   {
00046     return false;
00047   }
00048 
00049   /* trees have a common key, check if they refer to same key part */
00050   optimizer::SEL_ARG **key1,**key2;
00051   for (uint32_t key_no=0; key_no < param->keys; key_no++)
00052   {
00053     if (common_keys.test(key_no))
00054     {
00055       key1= tree1->keys + key_no;
00056       key2= tree2->keys + key_no;
00057       if ((*key1)->part == (*key2)->part)
00058       {
00059         return true;
00060       }
00061     }
00062   }
00063   return false;
00064 }
00065 
00066 
00067 /*
00068   Perform OR operation on 2 index_merge lists, storing result in first list.
00069 
00070   NOTES
00071     The following conversion is implemented:
00072      (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
00073       => (a_1||b_1).
00074 
00075     i.e. all conjuncts except the first one are currently dropped.
00076     This is done to avoid producing N*K ways to do index_merge.
00077 
00078     If (a_1||b_1) produce a condition that is always true, NULL is returned
00079     and index_merge is discarded (while it is actually possible to try
00080     harder).
00081 
00082     As a consequence of this, choice of keys to do index_merge read may depend
00083     on the order of conditions in WHERE part of the query.
00084 
00085   RETURN
00086     0     OK, result is stored in *im1
00087     other Error, both passed lists are unusable
00088 */
00089 
00090 static int imerge_list_or_list(optimizer::RangeParameter *param,
00091                                List<optimizer::SEL_IMERGE> *im1,
00092                                List<optimizer::SEL_IMERGE> *im2)
00093 {
00094   optimizer::SEL_IMERGE *imerge= &im1->front();
00095   im1->clear();
00096   im1->push_back(imerge);
00097 
00098   return imerge->or_sel_imerge_with_checks(param, &im2->front());
00099 }
00100 
00101 
00102 /*
00103   Perform OR operation on index_merge list and key tree.
00104 
00105   RETURN
00106      0     OK, result is stored in *im1.
00107      other Error
00108  */
00109 
00110 static int imerge_list_or_tree(optimizer::RangeParameter *param,
00111                                List<optimizer::SEL_IMERGE> *im1,
00112                                optimizer::SEL_TREE *tree)
00113 {
00114   optimizer::SEL_IMERGE *imerge= NULL;
00115   List_iterator<optimizer::SEL_IMERGE> it(im1->begin());
00116   while ((imerge= it++))
00117   {
00118     if (imerge->or_sel_tree_with_checks(param, tree))
00119       it.remove();
00120   }
00121   return im1->is_empty();
00122 }
00123 
00124 
00125 optimizer::SEL_TREE *
00126 optimizer::tree_or(optimizer::RangeParameter *param,
00127                    optimizer::SEL_TREE *tree1,
00128                    optimizer::SEL_TREE *tree2)
00129 {
00130   if (! tree1 || ! tree2)
00131   {
00132     return 0;
00133   }
00134 
00135   if (tree1->type == optimizer::SEL_TREE::IMPOSSIBLE || tree2->type == optimizer::SEL_TREE::ALWAYS)
00136   {
00137     return tree2;
00138   }
00139 
00140   if (tree2->type == optimizer::SEL_TREE::IMPOSSIBLE || tree1->type == optimizer::SEL_TREE::ALWAYS)
00141   {
00142     return tree1;
00143   }
00144 
00145   if (tree1->type == optimizer::SEL_TREE::MAYBE)
00146   {
00147     return tree1; // Can't use this
00148   }
00149 
00150   if (tree2->type == optimizer::SEL_TREE::MAYBE)
00151   {
00152     return tree2;
00153   }
00154 
00155   optimizer::SEL_TREE *result= NULL;
00156   key_map  result_keys;
00157   result_keys.reset();
00158   if (sel_trees_can_be_ored(tree1, tree2, param))
00159   {
00160     /* Join the trees key per key */
00161     optimizer::SEL_ARG **key1= NULL;
00162     optimizer::SEL_ARG **key2= NULL;
00163     optimizer::SEL_ARG **end= NULL;
00164     for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys;
00165          key1 != end; 
00166          key1++, key2++)
00167     {
00168       *key1= key_or(param, *key1, *key2);
00169       if (*key1)
00170       {
00171         result= tree1; // Added to tree1
00172         result_keys.set(key1 - tree1->keys);
00173       }
00174     }
00175     if (result)
00176       result->keys_map= result_keys;
00177   }
00178   else
00179   {
00180     /* ok, two trees have KEY type but cannot be used without index merge */
00181     if (tree1->merges.is_empty() && tree2->merges.is_empty())
00182     {
00183       if (param->remove_jump_scans)
00184       {
00185         bool no_trees= optimizer::remove_nonrange_trees(param, tree1);
00186         no_trees= no_trees || optimizer::remove_nonrange_trees(param, tree2);
00187         if (no_trees)
00188         {
00189           return (new optimizer::SEL_TREE(optimizer::SEL_TREE::ALWAYS));
00190         }
00191       }
00192       optimizer::SEL_IMERGE *merge= NULL;
00193       /* both trees are "range" trees, produce new index merge structure */
00194       if (! (result= new optimizer::SEL_TREE()) || 
00195           ! (merge= new optimizer::SEL_IMERGE()) ||
00196           (result->merges.push_back(merge)) ||
00197           (merge->or_sel_tree(param, tree1)) ||
00198           (merge->or_sel_tree(param, tree2)))
00199       {
00200         result= NULL;
00201       }
00202       else
00203       {
00204         result->type= tree1->type;
00205       }
00206     }
00207     else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
00208     {
00209       if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
00210         result= new optimizer::SEL_TREE(optimizer::SEL_TREE::ALWAYS);
00211       else
00212         result= tree1;
00213     }
00214     else
00215     {
00216       /* one tree is index merge tree and another is range tree */
00217       if (tree1->merges.is_empty())
00218         std::swap(tree1, tree2);
00219 
00220       if (param->remove_jump_scans && optimizer::remove_nonrange_trees(param, tree2))
00221          return(new optimizer::SEL_TREE(optimizer::SEL_TREE::ALWAYS));
00222       /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
00223       if (imerge_list_or_tree(param, &tree1->merges, tree2))
00224         result= new optimizer::SEL_TREE(optimizer::SEL_TREE::ALWAYS);
00225       else
00226         result= tree1;
00227     }
00228   }
00229   return result;
00230 }
00231 
00232 
00233 static optimizer::SEL_ARG *
00234 key_or(optimizer::RangeParameter *param, optimizer::SEL_ARG *key1, optimizer::SEL_ARG *key2)
00235 {
00236   if (! key1)
00237   {
00238     if (key2)
00239     {
00240       key2->use_count--;
00241       key2->free_tree();
00242     }
00243     return 0;
00244   }
00245   if (! key2)
00246   {
00247     key1->use_count--;
00248     key1->free_tree();
00249     return 0;
00250   }
00251   key1->use_count--;
00252   key2->use_count--;
00253 
00254   if (key1->part != key2->part)
00255   {
00256     key1->free_tree();
00257     key2->free_tree();
00258     return 0;         // Can't optimize this
00259   }
00260 
00261   // If one of the key is MAYBE_KEY then the found region may be bigger
00262   if (key1->type == optimizer::SEL_ARG::MAYBE_KEY)
00263   {
00264     key2->free_tree();
00265     key1->use_count++;
00266     return key1;
00267   }
00268   if (key2->type == optimizer::SEL_ARG::MAYBE_KEY)
00269   {
00270     key1->free_tree();
00271     key2->use_count++;
00272     return key2;
00273   }
00274 
00275   if (key1->use_count > 0)
00276   {
00277     if (key2->use_count == 0 || key1->elements > key2->elements)
00278     {
00279       std::swap(key1,key2);
00280     }
00281     if (key1->use_count > 0 || !(key1=key1->clone_tree(param)))
00282       return 0;         // OOM
00283   }
00284 
00285   // Add tree at key2 to tree at key1
00286   bool key2_shared= key2->use_count != 0;
00287   key1->maybe_flag|= key2->maybe_flag;
00288 
00289   for (key2=key2->first(); key2; )
00290   {
00291     optimizer::SEL_ARG *tmp= key1->find_range(key2); // Find key1.min <= key2.min
00292     int cmp;
00293 
00294     if (! tmp)
00295     {
00296       tmp=key1->first(); // tmp.min > key2.min
00297       cmp= -1;
00298     }
00299     else if ((cmp=tmp->cmp_max_to_min(key2)) < 0)
00300     {           // Found tmp.max < key2.min
00301       optimizer::SEL_ARG *next= tmp->next;
00302       if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
00303       {
00304         // Join near ranges like tmp.max < 0 and key2.min >= 0
00305         optimizer::SEL_ARG *key2_next=key2->next;
00306         if (key2_shared)
00307         {
00308           if (! (key2=new optimizer::SEL_ARG(*key2)))
00309             return 0;   // out of memory
00310           key2->increment_use_count(key1->use_count+1);
00311           key2->next= key2_next; // New copy of key2
00312         }
00313         key2->copy_min(tmp);
00314         if (! (key1=key1->tree_delete(tmp)))
00315         {         // Only one key in tree
00316           key1= key2;
00317           key1->make_root();
00318           key2= key2_next;
00319           break;
00320         }
00321       }
00322       if (! (tmp= next)) // tmp.min > key2.min
00323         break; // Copy rest of key2
00324     }
00325     if (cmp < 0)
00326     {           // tmp.min > key2.min
00327       int tmp_cmp;
00328       if ((tmp_cmp= tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max
00329       {
00330         if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
00331         {         // ranges are connected
00332           tmp->copy_min_to_min(key2);
00333           key1->merge_flags(key2);
00334           if (tmp->min_flag & NO_MIN_RANGE &&
00335               tmp->max_flag & NO_MAX_RANGE)
00336           {
00337             if (key1->maybe_flag)
00338               return new optimizer::SEL_ARG(optimizer::SEL_ARG::MAYBE_KEY);
00339             return 0;
00340           }
00341           key2->increment_use_count(-1);  // Free not used tree
00342           key2= key2->next;
00343           continue;
00344         }
00345         else
00346         {
00347           optimizer::SEL_ARG *next= key2->next; // Keys are not overlapping
00348           if (key2_shared)
00349           {
00350             optimizer::SEL_ARG *cpy= new optimizer::SEL_ARG(*key2); // Must make copy
00351             if (! cpy)
00352               return 0; // OOM
00353             key1= key1->insert(cpy);
00354             key2->increment_use_count(key1->use_count+1);
00355           }
00356           else
00357             key1= key1->insert(key2);   // Will destroy key2_root
00358           key2= next;
00359           continue;
00360         }
00361       }
00362     }
00363 
00364     // tmp.max >= key2.min && tmp.min <= key.cmax(overlapping ranges)
00365     if (eq_tree(tmp->next_key_part,key2->next_key_part))
00366     {
00367       if (tmp->is_same(key2))
00368       {
00369         tmp->merge_flags(key2);     // Copy maybe flags
00370         key2->increment_use_count(-1);    // Free not used tree
00371       }
00372       else
00373       {
00374         optimizer::SEL_ARG *last= tmp;
00375         while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
00376                eq_tree(last->next->next_key_part,key2->next_key_part))
00377         {
00378           optimizer::SEL_ARG *save= last;
00379           last= last->next;
00380           key1= key1->tree_delete(save);
00381         }
00382         last->copy_min(tmp);
00383         if (last->copy_min(key2) || last->copy_max(key2))
00384         {         // Full range
00385           key1->free_tree();
00386           for (; key2; key2= key2->next)
00387             key2->increment_use_count(-1);  // Free not used tree
00388           if (key1->maybe_flag)
00389             return new optimizer::SEL_ARG(optimizer::SEL_ARG::MAYBE_KEY);
00390           return 0;
00391         }
00392       }
00393       key2= key2->next;
00394       continue;
00395     }
00396 
00397     if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
00398     {           // tmp.min <= x < key2.min
00399       optimizer::SEL_ARG *new_arg= tmp->clone_first(key2);
00400       if (! new_arg)
00401         return 0;       // OOM
00402       if ((new_arg->next_key_part= key1->next_key_part))
00403         new_arg->increment_use_count(key1->use_count+1);
00404       tmp->copy_min_to_min(key2);
00405       key1= key1->insert(new_arg);
00406     }
00407 
00408     // tmp.min >= key2.min && tmp.min <= key2.max
00409     optimizer::SEL_ARG key(*key2); // Get copy we can modify
00410     for (;;)
00411     {
00412       if (tmp->cmp_min_to_min(&key) > 0)
00413       {           // key.min <= x < tmp.min
00414         optimizer::SEL_ARG *new_arg= key.clone_first(tmp);
00415         if (! new_arg)
00416           return 0;       // OOM
00417         if ((new_arg->next_key_part=key.next_key_part))
00418           new_arg->increment_use_count(key1->use_count+1);
00419         key1= key1->insert(new_arg);
00420       }
00421       if ((cmp=tmp->cmp_max_to_max(&key)) <= 0)
00422       {           // tmp.min. <= x <= tmp.max
00423         tmp->maybe_flag|= key.maybe_flag;
00424         key.increment_use_count(key1->use_count+1);
00425         tmp->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
00426         if (! cmp)        // Key2 is ready
00427           break;
00428         key.copy_max_to_min(tmp);
00429         if (! (tmp= tmp->next))
00430         {
00431           optimizer::SEL_ARG *tmp2= new optimizer::SEL_ARG(key);
00432           if (! tmp2)
00433             return 0;       // OOM
00434           key1= key1->insert(tmp2);
00435           key2= key2->next;
00436           goto end;
00437         }
00438         if (tmp->cmp_min_to_max(&key) > 0)
00439         {
00440           optimizer::SEL_ARG *tmp2= new optimizer::SEL_ARG(key);
00441           if (! tmp2)
00442             return 0;       // OOM
00443           key1= key1->insert(tmp2);
00444           break;
00445         }
00446       }
00447       else
00448       {
00449         optimizer::SEL_ARG *new_arg= tmp->clone_last(&key); // tmp.min <= x <= key.max
00450         if (! new_arg)
00451           return 0;       // OOM
00452         tmp->copy_max_to_min(&key);
00453         tmp->increment_use_count(key1->use_count+1);
00454         /* Increment key count as it may be used for next loop */
00455         key.increment_use_count(1);
00456         new_arg->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
00457         key1= key1->insert(new_arg);
00458         break;
00459       }
00460     }
00461     key2= key2->next;
00462   }
00463 
00464 end:
00465   while (key2)
00466   {
00467     optimizer::SEL_ARG *next= key2->next;
00468     if (key2_shared)
00469     {
00470       optimizer::SEL_ARG *tmp= new optimizer::SEL_ARG(*key2);   // Must make copy
00471       if (! tmp)
00472         return 0;
00473       key2->increment_use_count(key1->use_count+1);
00474       key1= key1->insert(tmp);
00475     }
00476     else
00477       key1= key1->insert(key2);     // Will destroy key2_root
00478     key2= next;
00479   }
00480   key1->use_count++;
00481   return key1;
00482 }
00483 
00484 
00485 /*
00486   Remove the trees that are not suitable for record retrieval.
00487   SYNOPSIS
00488     param  Range analysis parameter
00489     tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
00490 
00491   DESCRIPTION
00492     This function walks through tree->keys[] and removes the SEL_ARG* trees
00493     that are not "maybe" trees (*) and cannot be used to construct quick range
00494     selects.
00495     (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
00496           these types here as well.
00497 
00498     A SEL_ARG* tree cannot be used to construct quick select if it has
00499     tree->part != 0. (e.g. it could represent "keypart2 < const").
00500 
00501     WHY THIS FUNCTION IS NEEDED
00502 
00503     Normally we allow construction of optimizer::SEL_TREE objects that have SEL_ARG
00504     trees that do not allow quick range select construction. For example for
00505     " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
00506     tree1= optimizer::SEL_TREE { SEL_ARG{keypart1=1} }
00507     tree2= optimizer::SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
00508                                                from this
00509     call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
00510                                    tree.
00511 
00512     There is an exception though: when we construct index_merge optimizer::SEL_TREE,
00513     any SEL_ARG* tree that cannot be used to construct quick range select can
00514     be removed, because current range analysis code doesn't provide any way
00515     that tree could be later combined with another tree.
00516     Consider an example: we should not construct
00517     st1 = optimizer::SEL_TREE {
00518       merges = SEL_IMERGE {
00519                             optimizer::SEL_TREE(t.key1part1 = 1),
00520                             optimizer::SEL_TREE(t.key2part2 = 2)   -- (*)
00521                           }
00522                    };
00523     because
00524      - (*) cannot be used to construct quick range select,
00525      - There is no execution path that would cause (*) to be converted to
00526        a tree that could be used.
00527 
00528     The latter is easy to verify: first, notice that the only way to convert
00529     (*) into a usable tree is to call tree_and(something, (*)).
00530 
00531     Second look at what tree_and/tree_or function would do when passed a
00532     optimizer::SEL_TREE that has the structure like st1 tree has, and conlcude that
00533     tree_and(something, (*)) will not be called.
00534 
00535   RETURN
00536     0  Ok, some suitable trees left
00537     1  No tree->keys[] left.
00538 */
00539 bool optimizer::remove_nonrange_trees(optimizer::RangeParameter *param, optimizer::SEL_TREE *tree)
00540 {
00541   bool res= false;
00542   for (uint32_t i= 0; i < param->keys; i++)
00543   {
00544     if (tree->keys[i])
00545     {
00546       if (tree->keys[i]->part)
00547       {
00548         tree->keys[i]= NULL;
00549         tree->keys_map.reset(i);
00550       }
00551       else
00552         res= true;
00553     }
00554   }
00555   return ! res;
00556 }
00557 
00558 
00559 /* Compare if two trees are equal */
00560 static bool eq_tree(optimizer::SEL_ARG *a, optimizer::SEL_ARG *b)
00561 {
00562   if (a == b)
00563   {
00564     return true;
00565   }
00566 
00567   if (! a || ! b || ! a->is_same(b))
00568   {
00569     return false;
00570   }
00571 
00572   if (a->left != &optimizer::null_element && b->left != &optimizer::null_element)
00573   {
00574     if (! eq_tree(a->left,b->left))
00575       return false;
00576   }
00577   else if (a->left != &optimizer::null_element || b->left != &optimizer::null_element)
00578   {
00579     return false;
00580   }
00581 
00582   if (a->right != &optimizer::null_element && b->right != &optimizer::null_element)
00583   {
00584     if (! eq_tree(a->right,b->right))
00585       return false;
00586   }
00587   else if (a->right != &optimizer::null_element || b->right != &optimizer::null_element)
00588   {
00589     return false;
00590   }
00591 
00592   if (a->next_key_part != b->next_key_part)
00593   {           // Sub range
00594     if (! a->next_key_part != ! b->next_key_part ||
00595         ! eq_tree(a->next_key_part, b->next_key_part))
00596       return false;
00597   }
00598 
00599   return true;
00600 }