Drizzled Public API Documentation

btr0btr.cc

00001 /*****************************************************************************
00002 
00003 Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
00004 
00005 This program is free software; you can redistribute it and/or modify it under
00006 the terms of the GNU General Public License as published by the Free Software
00007 Foundation; version 2 of the License.
00008 
00009 This program is distributed in the hope that it will be useful, but WITHOUT
00010 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00012 
00013 You should have received a copy of the GNU General Public License along with
00014 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00015 St, Fifth Floor, Boston, MA 02110-1301 USA
00016 
00017 *****************************************************************************/
00018 
00019 /**************************************************/
00026 #include "btr0btr.h"
00027 
00028 #ifdef UNIV_NONINL
00029 #include "btr0btr.ic"
00030 #endif
00031 
00032 #include "fsp0fsp.h"
00033 #include "page0page.h"
00034 
00035 #ifndef UNIV_HOTBACKUP
00036 #include "btr0cur.h"
00037 #include "btr0sea.h"
00038 #include "btr0pcur.h"
00039 #include "rem0cmp.h"
00040 #include "lock0lock.h"
00041 #include "ibuf0ibuf.h"
00042 #include "trx0trx.h"
00043 #include "page0zip.h"
00044 
00045 /*
00046 Latching strategy of the InnoDB B-tree
00047 --------------------------------------
00048 A tree latch protects all non-leaf nodes of the tree. Each node of a tree
00049 also has a latch of its own.
00050 
00051 A B-tree operation normally first acquires an S-latch on the tree. It
00052 searches down the tree and releases the tree latch when it has the
00053 leaf node latch. To save CPU time we do not acquire any latch on
00054 non-leaf nodes of the tree during a search, those pages are only bufferfixed.
00055 
00056 If an operation needs to restructure the tree, it acquires an X-latch on
00057 the tree before searching to a leaf node. If it needs, for example, to
00058 split a leaf,
00059 (1) InnoDB decides the split point in the leaf,
00060 (2) allocates a new page,
00061 (3) inserts the appropriate node pointer to the first non-leaf level,
00062 (4) releases the tree X-latch,
00063 (5) and then moves records from the leaf to the new allocated page.
00064 
00065 Node pointers
00066 -------------
00067 Leaf pages of a B-tree contain the index records stored in the
00068 tree. On levels n > 0 we store 'node pointers' to pages on level
00069 n - 1. For each page there is exactly one node pointer stored:
00070 thus the our tree is an ordinary B-tree, not a B-link tree.
00071 
00072 A node pointer contains a prefix P of an index record. The prefix
00073 is long enough so that it determines an index record uniquely.
00074 The file page number of the child page is added as the last
00075 field. To the child page we can store node pointers or index records
00076 which are >= P in the alphabetical order, but < P1 if there is
00077 a next node pointer on the level, and P1 is its prefix.
00078 
00079 If a node pointer with a prefix P points to a non-leaf child,
00080 then the leftmost record in the child must have the same
00081 prefix P. If it points to a leaf node, the child is not required
00082 to contain any record with a prefix equal to P. The leaf case
00083 is decided this way to allow arbitrary deletions in a leaf node
00084 without touching upper levels of the tree.
00085 
00086 We have predefined a special minimum record which we
00087 define as the smallest record in any alphabetical order.
00088 A minimum record is denoted by setting a bit in the record
00089 header. A minimum record acts as the prefix of a node pointer
00090 which points to a leftmost node on any level of the tree.
00091 
00092 File page allocation
00093 --------------------
00094 In the root node of a B-tree there are two file segment headers.
00095 The leaf pages of a tree are allocated from one file segment, to
00096 make them consecutive on disk if possible. From the other file segment
00097 we allocate pages for the non-leaf levels of the tree.
00098 */
00099 
00100 #ifdef UNIV_BTR_DEBUG
00101 /**************************************************************/
00104 static
00105 ibool
00106 btr_root_fseg_validate(
00107 /*===================*/
00108   const fseg_header_t*  seg_header, 
00109   ulint     space)    
00110 {
00111   ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
00112 
00113   ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
00114   ut_a(offset >= FIL_PAGE_DATA);
00115   ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
00116   return(TRUE);
00117 }
00118 #endif /* UNIV_BTR_DEBUG */
00119 
00120 /**************************************************************/
00123 static
00124 buf_block_t*
00125 btr_root_block_get(
00126 /*===============*/
00127   dict_index_t* index,  
00128   mtr_t*    mtr)  
00129 {
00130   ulint   space;
00131   ulint   zip_size;
00132   ulint   root_page_no;
00133   buf_block_t*  block;
00134 
00135   space = dict_index_get_space(index);
00136   zip_size = dict_table_zip_size(index->table);
00137   root_page_no = dict_index_get_page(index);
00138 
00139   block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
00140   ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
00141        == dict_table_is_comp(index->table));
00142 #ifdef UNIV_BTR_DEBUG
00143   if (!dict_index_is_ibuf(index)) {
00144     const page_t* root = buf_block_get_frame(block);
00145 
00146     ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
00147               + root, space));
00148     ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
00149               + root, space));
00150   }
00151 #endif /* UNIV_BTR_DEBUG */
00152 
00153   return(block);
00154 }
00155 
00156 /**************************************************************/
00159 UNIV_INTERN
00160 page_t*
00161 btr_root_get(
00162 /*=========*/
00163   dict_index_t* index,  
00164   mtr_t*    mtr)  
00165 {
00166   return(buf_block_get_frame(btr_root_block_get(index, mtr)));
00167 }
00168 
00169 /*************************************************************/
00173 UNIV_INTERN
00174 rec_t*
00175 btr_get_prev_user_rec(
00176 /*==================*/
00177   rec_t*  rec,  
00178   mtr_t*  mtr)  
00180 {
00181   page_t* page;
00182   page_t* prev_page;
00183   ulint prev_page_no;
00184 
00185   if (!page_rec_is_infimum(rec)) {
00186 
00187     rec_t*  prev_rec = page_rec_get_prev(rec);
00188 
00189     if (!page_rec_is_infimum(prev_rec)) {
00190 
00191       return(prev_rec);
00192     }
00193   }
00194 
00195   page = page_align(rec);
00196   prev_page_no = btr_page_get_prev(page, mtr);
00197 
00198   if (prev_page_no != FIL_NULL) {
00199 
00200     ulint   space;
00201     ulint   zip_size;
00202     buf_block_t*  prev_block;
00203 
00204     space = page_get_space_id(page);
00205     zip_size = fil_space_get_zip_size(space);
00206 
00207     prev_block = buf_page_get_with_no_latch(space, zip_size,
00208               prev_page_no, mtr);
00209     prev_page = buf_block_get_frame(prev_block);
00210     /* The caller must already have a latch to the brother */
00211     ut_ad(mtr_memo_contains(mtr, prev_block,
00212           MTR_MEMO_PAGE_S_FIX)
00213           || mtr_memo_contains(mtr, prev_block,
00214              MTR_MEMO_PAGE_X_FIX));
00215 #ifdef UNIV_BTR_DEBUG
00216     ut_a(page_is_comp(prev_page) == page_is_comp(page));
00217     ut_a(btr_page_get_next(prev_page, mtr)
00218          == page_get_page_no(page));
00219 #endif /* UNIV_BTR_DEBUG */
00220 
00221     return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
00222   }
00223 
00224   return(NULL);
00225 }
00226 
00227 /*************************************************************/
00231 UNIV_INTERN
00232 rec_t*
00233 btr_get_next_user_rec(
00234 /*==================*/
00235   rec_t*  rec,  
00236   mtr_t*  mtr)  
00238 {
00239   page_t* page;
00240   page_t* next_page;
00241   ulint next_page_no;
00242 
00243   if (!page_rec_is_supremum(rec)) {
00244 
00245     rec_t*  next_rec = page_rec_get_next(rec);
00246 
00247     if (!page_rec_is_supremum(next_rec)) {
00248 
00249       return(next_rec);
00250     }
00251   }
00252 
00253   page = page_align(rec);
00254   next_page_no = btr_page_get_next(page, mtr);
00255 
00256   if (next_page_no != FIL_NULL) {
00257     ulint   space;
00258     ulint   zip_size;
00259     buf_block_t*  next_block;
00260 
00261     space = page_get_space_id(page);
00262     zip_size = fil_space_get_zip_size(space);
00263 
00264     next_block = buf_page_get_with_no_latch(space, zip_size,
00265               next_page_no, mtr);
00266     next_page = buf_block_get_frame(next_block);
00267     /* The caller must already have a latch to the brother */
00268     ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
00269           || mtr_memo_contains(mtr, next_block,
00270              MTR_MEMO_PAGE_X_FIX));
00271 #ifdef UNIV_BTR_DEBUG
00272     ut_a(page_is_comp(next_page) == page_is_comp(page));
00273     ut_a(btr_page_get_prev(next_page, mtr)
00274          == page_get_page_no(page));
00275 #endif /* UNIV_BTR_DEBUG */
00276 
00277     return(page_rec_get_next(page_get_infimum_rec(next_page)));
00278   }
00279 
00280   return(NULL);
00281 }
00282 
00283 /**************************************************************/
00286 static
00287 void
00288 btr_page_create(
00289 /*============*/
00290   buf_block_t*  block,  
00291   page_zip_des_t* page_zip,
00292   dict_index_t* index,  
00293   ulint   level,  
00294   mtr_t*    mtr)  
00295 {
00296   page_t*   page = buf_block_get_frame(block);
00297 
00298   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
00299 
00300   if (UNIV_LIKELY_NULL(page_zip)) {
00301     page_create_zip(block, index, level, mtr);
00302   } else {
00303     page_create(block, mtr, dict_table_is_comp(index->table));
00304     /* Set the level of the new index page */
00305     btr_page_set_level(page, NULL, level, mtr);
00306   }
00307 
00308   block->check_index_page_at_flush = TRUE;
00309 
00310   btr_page_set_index_id(page, page_zip, index->id, mtr);
00311 }
00312 
00313 /**************************************************************/
00317 static
00318 buf_block_t*
00319 btr_page_alloc_for_ibuf(
00320 /*====================*/
00321   dict_index_t* index,  
00322   mtr_t*    mtr)  
00323 {
00324   fil_addr_t  node_addr;
00325   page_t*   root;
00326   page_t*   new_page;
00327   buf_block_t*  new_block;
00328 
00329   root = btr_root_get(index, mtr);
00330 
00331   node_addr = flst_get_first(root + PAGE_HEADER
00332            + PAGE_BTR_IBUF_FREE_LIST, mtr);
00333   ut_a(node_addr.page != FIL_NULL);
00334 
00335   new_block = buf_page_get(dict_index_get_space(index),
00336          dict_table_zip_size(index->table),
00337          node_addr.page, RW_X_LATCH, mtr);
00338   new_page = buf_block_get_frame(new_block);
00339   buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
00340 
00341   flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
00342         new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
00343         mtr);
00344   ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
00345           mtr));
00346 
00347   return(new_block);
00348 }
00349 
00350 /**************************************************************/
00354 UNIV_INTERN
00355 buf_block_t*
00356 btr_page_alloc(
00357 /*===========*/
00358   dict_index_t* index,    
00359   ulint   hint_page_no, 
00360   byte    file_direction, 
00362   ulint   level,    
00364   mtr_t*    mtr)    
00365 {
00366   fseg_header_t*  seg_header;
00367   page_t*   root;
00368   buf_block_t*  new_block;
00369   ulint   new_page_no;
00370 
00371   if (dict_index_is_ibuf(index)) {
00372 
00373     return(btr_page_alloc_for_ibuf(index, mtr));
00374   }
00375 
00376   root = btr_root_get(index, mtr);
00377 
00378   if (level == 0) {
00379     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
00380   } else {
00381     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
00382   }
00383 
00384   /* Parameter TRUE below states that the caller has made the
00385   reservation for free extents, and thus we know that a page can
00386   be allocated: */
00387 
00388   new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
00389                file_direction, TRUE, mtr);
00390   if (new_page_no == FIL_NULL) {
00391 
00392     return(NULL);
00393   }
00394 
00395   new_block = buf_page_get(dict_index_get_space(index),
00396          dict_table_zip_size(index->table),
00397          new_page_no, RW_X_LATCH, mtr);
00398   buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
00399 
00400   return(new_block);
00401 }
00402 
00403 /**************************************************************/
00406 UNIV_INTERN
00407 ulint
00408 btr_get_size(
00409 /*=========*/
00410   dict_index_t* index,  
00411   ulint   flag) 
00412 {
00413   fseg_header_t*  seg_header;
00414   page_t*   root;
00415   ulint   n;
00416   ulint   dummy;
00417   mtr_t   mtr;
00418 
00419   mtr_start(&mtr);
00420 
00421   mtr_s_lock(dict_index_get_lock(index), &mtr);
00422 
00423   root = btr_root_get(index, &mtr);
00424 
00425   if (flag == BTR_N_LEAF_PAGES) {
00426     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
00427 
00428     fseg_n_reserved_pages(seg_header, &n, &mtr);
00429 
00430   } else if (flag == BTR_TOTAL_SIZE) {
00431     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
00432 
00433     n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
00434 
00435     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
00436 
00437     n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
00438   } else {
00439     ut_error;
00440   }
00441 
00442   mtr_commit(&mtr);
00443 
00444   return(n);
00445 }
00446 
00447 /**************************************************************/
00450 static
00451 void
00452 btr_page_free_for_ibuf(
00453 /*===================*/
00454   dict_index_t* index,  
00455   buf_block_t*  block,  
00456   mtr_t*    mtr)  
00457 {
00458   page_t*   root;
00459 
00460   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
00461   root = btr_root_get(index, mtr);
00462 
00463   flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
00464            buf_block_get_frame(block)
00465            + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
00466 
00467   ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
00468           mtr));
00469 }
00470 
00471 /**************************************************************/
00475 UNIV_INTERN
00476 void
00477 btr_page_free_low(
00478 /*==============*/
00479   dict_index_t* index,  
00480   buf_block_t*  block,  
00481   ulint   level,  
00482   mtr_t*    mtr)  
00483 {
00484   fseg_header_t*  seg_header;
00485   page_t*   root;
00486 
00487   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
00488   /* The page gets invalid for optimistic searches: increment the frame
00489   modify clock */
00490 
00491   buf_block_modify_clock_inc(block);
00492 
00493   if (dict_index_is_ibuf(index)) {
00494 
00495     btr_page_free_for_ibuf(index, block, mtr);
00496 
00497     return;
00498   }
00499 
00500   root = btr_root_get(index, mtr);
00501 
00502   if (level == 0) {
00503     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
00504   } else {
00505     seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
00506   }
00507 
00508   fseg_free_page(seg_header,
00509            buf_block_get_space(block),
00510            buf_block_get_page_no(block), mtr);
00511 }
00512 
00513 /**************************************************************/
00516 UNIV_INTERN
00517 void
00518 btr_page_free(
00519 /*==========*/
00520   dict_index_t* index,  
00521   buf_block_t*  block,  
00522   mtr_t*    mtr)  
00523 {
00524   ulint   level;
00525 
00526   level = btr_page_get_level(buf_block_get_frame(block), mtr);
00527 
00528   btr_page_free_low(index, block, level, mtr);
00529 }
00530 
00531 /**************************************************************/
00533 UNIV_INLINE
00534 void
00535 btr_node_ptr_set_child_page_no(
00536 /*===========================*/
00537   rec_t*    rec,  
00538   page_zip_des_t* page_zip,
00540   const ulint*  offsets,
00541   ulint   page_no,
00542   mtr_t*    mtr)  
00543 {
00544   byte* field;
00545   ulint len;
00546 
00547   ut_ad(rec_offs_validate(rec, NULL, offsets));
00548   ut_ad(!page_is_leaf(page_align(rec)));
00549   ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
00550 
00551   /* The child address is in the last field */
00552   field = rec_get_nth_field(rec, offsets,
00553           rec_offs_n_fields(offsets) - 1, &len);
00554 
00555   ut_ad(len == REC_NODE_PTR_SIZE);
00556 
00557   if (UNIV_LIKELY_NULL(page_zip)) {
00558     page_zip_write_node_ptr(page_zip, rec,
00559           rec_offs_data_size(offsets),
00560           page_no, mtr);
00561   } else {
00562     mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
00563   }
00564 }
00565 
00566 /************************************************************/
00569 static
00570 buf_block_t*
00571 btr_node_ptr_get_child(
00572 /*===================*/
00573   const rec_t*  node_ptr,
00574   dict_index_t* index,  
00575   const ulint*  offsets,
00576   mtr_t*    mtr)  
00577 {
00578   ulint page_no;
00579   ulint space;
00580 
00581   ut_ad(rec_offs_validate(node_ptr, index, offsets));
00582   space = page_get_space_id(page_align(node_ptr));
00583   page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
00584 
00585   return(btr_block_get(space, dict_table_zip_size(index->table),
00586            page_no, RW_X_LATCH, mtr));
00587 }
00588 
00589 /************************************************************/
00593 static
00594 ulint*
00595 btr_page_get_father_node_ptr_func(
00596 /*==============================*/
00597   ulint*    offsets,
00598   mem_heap_t* heap, 
00599   btr_cur_t*  cursor, 
00602   const char* file, 
00603   ulint   line, 
00604   mtr_t*    mtr)  
00605 {
00606   dtuple_t* tuple;
00607   rec_t*    user_rec;
00608   rec_t*    node_ptr;
00609   ulint   level;
00610   ulint   page_no;
00611   dict_index_t* index;
00612 
00613   page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
00614   index = btr_cur_get_index(cursor);
00615 
00616   ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
00617         MTR_MEMO_X_LOCK));
00618 
00619   ut_ad(dict_index_get_page(index) != page_no);
00620 
00621   level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
00622 
00623   user_rec = btr_cur_get_rec(cursor);
00624   ut_a(page_rec_is_user_rec(user_rec));
00625   tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
00626 
00627   btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
00628             BTR_CONT_MODIFY_TREE, cursor, 0,
00629             file, line, mtr);
00630 
00631   node_ptr = btr_cur_get_rec(cursor);
00632   ut_ad(!page_rec_is_comp(node_ptr)
00633         || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
00634   offsets = rec_get_offsets(node_ptr, index, offsets,
00635           ULINT_UNDEFINED, &heap);
00636 
00637   if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
00638         != page_no)) {
00639     rec_t*  print_rec;
00640     fputs("InnoDB: Dump of the child page:\n", stderr);
00641     buf_page_print(page_align(user_rec), 0);
00642     fputs("InnoDB: Dump of the parent page:\n", stderr);
00643     buf_page_print(page_align(node_ptr), 0);
00644 
00645     fputs("InnoDB: Corruption of an index tree: table ", stderr);
00646     ut_print_name(stderr, NULL, TRUE, index->table_name);
00647     fputs(", index ", stderr);
00648     ut_print_name(stderr, NULL, FALSE, index->name);
00649     fprintf(stderr, ",\n"
00650       "InnoDB: father ptr page no %lu, child page no %lu\n",
00651       (ulong)
00652       btr_node_ptr_get_child_page_no(node_ptr, offsets),
00653       (ulong) page_no);
00654     print_rec = page_rec_get_next(
00655       page_get_infimum_rec(page_align(user_rec)));
00656     offsets = rec_get_offsets(print_rec, index,
00657             offsets, ULINT_UNDEFINED, &heap);
00658     page_rec_print(print_rec, offsets);
00659     offsets = rec_get_offsets(node_ptr, index, offsets,
00660             ULINT_UNDEFINED, &heap);
00661     page_rec_print(node_ptr, offsets);
00662 
00663     fputs("InnoDB: You should dump + drop + reimport the table"
00664           " to fix the\n"
00665           "InnoDB: corruption. If the crash happens at "
00666           "the database startup, see\n"
00667           "InnoDB: " REFMAN "forcing-recovery.html about\n"
00668           "InnoDB: forcing recovery. "
00669           "Then dump + drop + reimport.\n", stderr);
00670 
00671     ut_error;
00672   }
00673 
00674   return(offsets);
00675 }
00676 
00677 #define btr_page_get_father_node_ptr(of,heap,cur,mtr)     \
00678   btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
00679 
00680 /************************************************************/
00684 static
00685 ulint*
00686 btr_page_get_father_block(
00687 /*======================*/
00688   ulint*    offsets,
00689   mem_heap_t* heap, 
00690   dict_index_t* index,  
00691   buf_block_t*  block,  
00692   mtr_t*    mtr,  
00693   btr_cur_t*  cursor) 
00695 {
00696   rec_t*  rec
00697     = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
00698                  block)));
00699   btr_cur_position(index, rec, block, cursor);
00700   return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
00701 }
00702 
00703 /************************************************************/
00706 static
00707 void
00708 btr_page_get_father(
00709 /*================*/
00710   dict_index_t* index,  
00711   buf_block_t*  block,  
00712   mtr_t*    mtr,  
00713   btr_cur_t*  cursor) 
00715 {
00716   mem_heap_t* heap;
00717   rec_t*    rec
00718     = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
00719                  block)));
00720   btr_cur_position(index, rec, block, cursor);
00721 
00722   heap = mem_heap_create(100);
00723   btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
00724   mem_heap_free(heap);
00725 }
00726 
00727 /************************************************************/
00730 UNIV_INTERN
00731 ulint
00732 btr_create(
00733 /*=======*/
00734   ulint   type, 
00735   ulint   space,  
00736   ulint   zip_size,
00738   index_id_t  index_id,
00739   dict_index_t* index,  
00740   mtr_t*    mtr)  
00741 {
00742   ulint   page_no;
00743   buf_block_t*  block;
00744   buf_frame_t*  frame;
00745   page_t*   page;
00746   page_zip_des_t* page_zip;
00747 
00748   /* Create the two new segments (one, in the case of an ibuf tree) for
00749   the index tree; the segment headers are put on the allocated root page
00750   (for an ibuf tree, not in the root, but on a separate ibuf header
00751   page) */
00752 
00753   if (type & DICT_IBUF) {
00754     /* Allocate first the ibuf header page */
00755     buf_block_t*  ibuf_hdr_block = fseg_create(
00756       space, 0,
00757       IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
00758 
00759     buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
00760 
00761     ut_ad(buf_block_get_page_no(ibuf_hdr_block)
00762           == IBUF_HEADER_PAGE_NO);
00763     /* Allocate then the next page to the segment: it will be the
00764     tree root page */
00765 
00766     page_no = fseg_alloc_free_page(buf_block_get_frame(
00767                    ibuf_hdr_block)
00768                  + IBUF_HEADER
00769                  + IBUF_TREE_SEG_HEADER,
00770                  IBUF_TREE_ROOT_PAGE_NO,
00771                  FSP_UP, mtr);
00772     ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
00773 
00774     block = buf_page_get(space, zip_size, page_no,
00775              RW_X_LATCH, mtr);
00776   } else {
00777     block = fseg_create(space, 0,
00778             PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
00779   }
00780 
00781   if (block == NULL) {
00782 
00783     return(FIL_NULL);
00784   }
00785 
00786   page_no = buf_block_get_page_no(block);
00787   frame = buf_block_get_frame(block);
00788 
00789   buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
00790 
00791   if (type & DICT_IBUF) {
00792     /* It is an insert buffer tree: initialize the free list */
00793 
00794     ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
00795 
00796     flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
00797   } else {
00798     /* It is a non-ibuf tree: create a file segment for leaf
00799     pages */
00800     if (!fseg_create(space, page_no,
00801          PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
00802       /* Not enough space for new segment, free root
00803       segment before return. */
00804       btr_free_root(space, zip_size, page_no, mtr);
00805 
00806       return(FIL_NULL);
00807     }
00808 
00809     /* The fseg create acquires a second latch on the page,
00810     therefore we must declare it: */
00811     buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
00812   }
00813 
00814   /* Create a new index page on the the allocated segment page */
00815   page_zip = buf_block_get_page_zip(block);
00816 
00817   if (UNIV_LIKELY_NULL(page_zip)) {
00818     page = page_create_zip(block, index, 0, mtr);
00819   } else {
00820     page = page_create(block, mtr,
00821            dict_table_is_comp(index->table));
00822     /* Set the level of the new index page */
00823     btr_page_set_level(page, NULL, 0, mtr);
00824   }
00825 
00826   block->check_index_page_at_flush = TRUE;
00827 
00828   /* Set the index id of the page */
00829   btr_page_set_index_id(page, page_zip, index_id, mtr);
00830 
00831   /* Set the next node and previous node fields */
00832   btr_page_set_next(page, page_zip, FIL_NULL, mtr);
00833   btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
00834 
00835   /* We reset the free bits for the page to allow creation of several
00836   trees in the same mtr, otherwise the latch on a bitmap page would
00837   prevent it because of the latching order */
00838 
00839   if (!(type & DICT_CLUSTERED)) {
00840     ibuf_reset_free_bits(block);
00841   }
00842 
00843   /* In the following assertion we test that two records of maximum
00844   allowed size fit on the root page: this fact is needed to ensure
00845   correctness of split algorithms */
00846 
00847   ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
00848 
00849   return(page_no);
00850 }
00851 
00852 /************************************************************/
00855 UNIV_INTERN
00856 void
00857 btr_free_but_not_root(
00858 /*==================*/
00859   ulint space,    
00860   ulint zip_size, 
00862   ulint root_page_no) 
00863 {
00864   ibool finished;
00865   page_t* root;
00866   mtr_t mtr;
00867 
00868 leaf_loop:
00869   mtr_start(&mtr);
00870 
00871   root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
00872 #ifdef UNIV_BTR_DEBUG
00873   ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
00874             + root, space));
00875   ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
00876             + root, space));
00877 #endif /* UNIV_BTR_DEBUG */
00878 
00879   /* NOTE: page hash indexes are dropped when a page is freed inside
00880   fsp0fsp. */
00881 
00882   finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
00883           &mtr);
00884   mtr_commit(&mtr);
00885 
00886   if (!finished) {
00887 
00888     goto leaf_loop;
00889   }
00890 top_loop:
00891   mtr_start(&mtr);
00892 
00893   root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
00894 #ifdef UNIV_BTR_DEBUG
00895   ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
00896             + root, space));
00897 #endif /* UNIV_BTR_DEBUG */
00898 
00899   finished = fseg_free_step_not_header(
00900     root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
00901   mtr_commit(&mtr);
00902 
00903   if (!finished) {
00904 
00905     goto top_loop;
00906   }
00907 }
00908 
00909 /************************************************************/
00911 UNIV_INTERN
00912 void
00913 btr_free_root(
00914 /*==========*/
00915   ulint space,    
00916   ulint zip_size, 
00918   ulint root_page_no, 
00919   mtr_t*  mtr)    
00921 {
00922   buf_block_t*  block;
00923   fseg_header_t*  header;
00924 
00925   block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
00926 
00927   btr_search_drop_page_hash_index(block);
00928 
00929   header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
00930 #ifdef UNIV_BTR_DEBUG
00931   ut_a(btr_root_fseg_validate(header, space));
00932 #endif /* UNIV_BTR_DEBUG */
00933 
00934   while (!fseg_free_step(header, mtr)) {};
00935 }
00936 #endif /* !UNIV_HOTBACKUP */
00937 
00938 /*************************************************************/
00940 static
00941 ibool
00942 btr_page_reorganize_low(
00943 /*====================*/
00944   ibool   recovery,
00949   buf_block_t*  block,  
00950   dict_index_t* index,  
00951   mtr_t*    mtr)  
00952 {
00953   buf_pool_t* buf_pool  = buf_pool_from_bpage(&block->page);
00954   page_t*   page    = buf_block_get_frame(block);
00955   page_zip_des_t* page_zip  = buf_block_get_page_zip(block);
00956   buf_block_t*  temp_block;
00957   page_t*   temp_page;
00958   ulint   log_mode;
00959   ulint   data_size1;
00960   ulint   data_size2;
00961   ulint   max_ins_size1;
00962   ulint   max_ins_size2;
00963   ibool   success   = FALSE;
00964 
00965   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
00966   ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
00967 #ifdef UNIV_ZIP_DEBUG
00968   ut_a(!page_zip || page_zip_validate(page_zip, page));
00969 #endif /* UNIV_ZIP_DEBUG */
00970   data_size1 = page_get_data_size(page);
00971   max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
00972 
00973 #ifndef UNIV_HOTBACKUP
00974   /* Write the log record */
00975   mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
00976           ? MLOG_COMP_PAGE_REORGANIZE
00977           : MLOG_PAGE_REORGANIZE, 0);
00978 #endif /* !UNIV_HOTBACKUP */
00979 
00980   /* Turn logging off */
00981   log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
00982 
00983 #ifndef UNIV_HOTBACKUP
00984   temp_block = buf_block_alloc(buf_pool, 0);
00985 #else /* !UNIV_HOTBACKUP */
00986   ut_ad(block == back_block1);
00987   temp_block = back_block2;
00988 #endif /* !UNIV_HOTBACKUP */
00989   temp_page = temp_block->frame;
00990 
00991   /* Copy the old page to temporary space */
00992   buf_frame_copy(temp_page, page);
00993 
00994 #ifndef UNIV_HOTBACKUP
00995   if (UNIV_LIKELY(!recovery)) {
00996     btr_search_drop_page_hash_index(block);
00997   }
00998 
00999   block->check_index_page_at_flush = TRUE;
01000 #endif /* !UNIV_HOTBACKUP */
01001 
01002   /* Recreate the page: note that global data on page (possible
01003   segment headers, next page-field, etc.) is preserved intact */
01004 
01005   page_create(block, mtr, dict_table_is_comp(index->table));
01006 
01007   /* Copy the records from the temporary space to the recreated page;
01008   do not copy the lock bits yet */
01009 
01010   page_copy_rec_list_end_no_locks(block, temp_block,
01011           page_get_infimum_rec(temp_page),
01012           index, mtr);
01013 
01014   if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
01015     /* Copy max trx id to recreated page */
01016     trx_id_t  max_trx_id = page_get_max_trx_id(temp_page);
01017     page_set_max_trx_id(block, NULL, max_trx_id, mtr);
01018     /* In crash recovery, dict_index_is_sec_or_ibuf() always
01019     returns TRUE, even for clustered indexes.  max_trx_id is
01020     unused in clustered index pages. */
01021     ut_ad(max_trx_id != 0 || recovery);
01022   }
01023 
01024   if (UNIV_LIKELY_NULL(page_zip)
01025       && UNIV_UNLIKELY
01026       (!page_zip_compress(page_zip, page, index, NULL))) {
01027 
01028     /* Restore the old page and exit. */
01029 
01030 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
01031     /* Check that the bytes that we skip are identical. */
01032     ut_a(!memcmp(page, temp_page, PAGE_HEADER));
01033     ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
01034            PAGE_HEADER + PAGE_N_RECS + temp_page,
01035            PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
01036     ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
01037            UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
01038            FIL_PAGE_DATA_END));
01039 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
01040 
01041     memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
01042            PAGE_N_RECS - PAGE_N_DIR_SLOTS);
01043     memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
01044            UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
01045 
01046 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
01047     ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
01048 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
01049 
01050     goto func_exit;
01051   }
01052 
01053 #ifndef UNIV_HOTBACKUP
01054   if (UNIV_LIKELY(!recovery)) {
01055     /* Update the record lock bitmaps */
01056     lock_move_reorganize_page(block, temp_block);
01057   }
01058 #endif /* !UNIV_HOTBACKUP */
01059 
01060   data_size2 = page_get_data_size(page);
01061   max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
01062 
01063   if (UNIV_UNLIKELY(data_size1 != data_size2)
01064       || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
01065     buf_page_print(page, 0);
01066     buf_page_print(temp_page, 0);
01067     fprintf(stderr,
01068       "InnoDB: Error: page old data size %lu"
01069       " new data size %lu\n"
01070       "InnoDB: Error: page old max ins size %lu"
01071       " new max ins size %lu\n"
01072       "InnoDB: Submit a detailed bug report"
01073       " to http://bugs.mysql.com\n",
01074       (unsigned long) data_size1, (unsigned long) data_size2,
01075       (unsigned long) max_ins_size1,
01076       (unsigned long) max_ins_size2);
01077   } else {
01078     success = TRUE;
01079   }
01080 
01081 func_exit:
01082 #ifdef UNIV_ZIP_DEBUG
01083   ut_a(!page_zip || page_zip_validate(page_zip, page));
01084 #endif /* UNIV_ZIP_DEBUG */
01085 #ifndef UNIV_HOTBACKUP
01086   buf_block_free(temp_block);
01087 #endif /* !UNIV_HOTBACKUP */
01088 
01089   /* Restore logging mode */
01090   mtr_set_log_mode(mtr, log_mode);
01091 
01092   return(success);
01093 }
01094 
01095 #ifndef UNIV_HOTBACKUP
01096 /*************************************************************/
01103 UNIV_INTERN
01104 ibool
01105 btr_page_reorganize(
01106 /*================*/
01107   buf_block_t*  block,  
01108   dict_index_t* index,  
01109   mtr_t*    mtr)  
01110 {
01111   return(btr_page_reorganize_low(FALSE, block, index, mtr));
01112 }
01113 #endif /* !UNIV_HOTBACKUP */
01114 
01115 /***********************************************************/
01118 UNIV_INTERN
01119 byte*
01120 btr_parse_page_reorganize(
01121 /*======================*/
01122   byte*   ptr,  
01123   byte*   /*end_ptr __attribute__((unused))*/,
01125   dict_index_t* index,  
01126   buf_block_t*  block,  
01127   mtr_t*    mtr)  
01128 {
01129   ut_ad(ptr && end_ptr);
01130 
01131   /* The record is empty, except for the record initial part */
01132 
01133   if (UNIV_LIKELY(block != NULL)) {
01134     btr_page_reorganize_low(TRUE, block, index, mtr);
01135   }
01136 
01137   return(ptr);
01138 }
01139 
01140 #ifndef UNIV_HOTBACKUP
01141 /*************************************************************/
01143 static
01144 void
01145 btr_page_empty(
01146 /*===========*/
01147   buf_block_t*  block,  
01148   page_zip_des_t* page_zip,
01149   dict_index_t* index,  
01150   ulint   level,  
01151   mtr_t*    mtr)  
01152 {
01153   page_t* page = buf_block_get_frame(block);
01154 
01155   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
01156   ut_ad(page_zip == buf_block_get_page_zip(block));
01157 #ifdef UNIV_ZIP_DEBUG
01158   ut_a(!page_zip || page_zip_validate(page_zip, page));
01159 #endif /* UNIV_ZIP_DEBUG */
01160 
01161   btr_search_drop_page_hash_index(block);
01162 
01163   /* Recreate the page: note that global data on page (possible
01164   segment headers, next page-field, etc.) is preserved intact */
01165 
01166   if (UNIV_LIKELY_NULL(page_zip)) {
01167     page_create_zip(block, index, level, mtr);
01168   } else {
01169     page_create(block, mtr, dict_table_is_comp(index->table));
01170     btr_page_set_level(page, NULL, level, mtr);
01171   }
01172 
01173   block->check_index_page_at_flush = TRUE;
01174 }
01175 
01176 /*************************************************************/
01183 UNIV_INTERN
01184 rec_t*
01185 btr_root_raise_and_insert(
01186 /*======================*/
01187   btr_cur_t*  cursor, 
01191   const dtuple_t* tuple,  
01192   ulint   n_ext,  
01193   mtr_t*    mtr)  
01194 {
01195   dict_index_t* index;
01196   page_t*   root;
01197   page_t*   new_page;
01198   ulint   new_page_no;
01199   rec_t*    rec;
01200   mem_heap_t* heap;
01201   dtuple_t* node_ptr;
01202   ulint   level;
01203   rec_t*    node_ptr_rec;
01204   page_cur_t* page_cursor;
01205   page_zip_des_t* root_page_zip;
01206   page_zip_des_t* new_page_zip;
01207   buf_block_t*  root_block;
01208   buf_block_t*  new_block;
01209 
01210   root = btr_cur_get_page(cursor);
01211   root_block = btr_cur_get_block(cursor);
01212   root_page_zip = buf_block_get_page_zip(root_block);
01213 #ifdef UNIV_ZIP_DEBUG
01214   ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
01215 #endif /* UNIV_ZIP_DEBUG */
01216   index = btr_cur_get_index(cursor);
01217 #ifdef UNIV_BTR_DEBUG
01218   if (!dict_index_is_ibuf(index)) {
01219     ulint space = dict_index_get_space(index);
01220 
01221     ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
01222               + root, space));
01223     ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
01224               + root, space));
01225   }
01226 
01227   ut_a(dict_index_get_page(index) == page_get_page_no(root));
01228 #endif /* UNIV_BTR_DEBUG */
01229   ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
01230         MTR_MEMO_X_LOCK));
01231   ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
01232 
01233   /* Allocate a new page to the tree. Root splitting is done by first
01234   moving the root records to the new page, emptying the root, putting
01235   a node pointer to the new page, and then splitting the new page. */
01236 
01237   level = btr_page_get_level(root, mtr);
01238 
01239   new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
01240   new_page = buf_block_get_frame(new_block);
01241   new_page_zip = buf_block_get_page_zip(new_block);
01242   ut_a(!new_page_zip == !root_page_zip);
01243   ut_a(!new_page_zip
01244        || page_zip_get_size(new_page_zip)
01245        == page_zip_get_size(root_page_zip));
01246 
01247   btr_page_create(new_block, new_page_zip, index, level, mtr);
01248 
01249   /* Set the next node and previous node fields of new page */
01250   btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
01251   btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
01252 
01253   /* Copy the records from root to the new page one by one. */
01254 
01255   if (0
01256 #ifdef UNIV_ZIP_COPY
01257       || new_page_zip
01258 #endif /* UNIV_ZIP_COPY */
01259       || UNIV_UNLIKELY
01260       (!page_copy_rec_list_end(new_block, root_block,
01261              page_get_infimum_rec(root),
01262              index, mtr))) {
01263     ut_a(new_page_zip);
01264 
01265     /* Copy the page byte for byte. */
01266     page_zip_copy_recs(new_page_zip, new_page,
01267            root_page_zip, root, index, mtr);
01268 
01269     /* Update the lock table and possible hash index. */
01270 
01271     lock_move_rec_list_end(new_block, root_block,
01272                page_get_infimum_rec(root));
01273 
01274     btr_search_move_or_delete_hash_entries(new_block, root_block,
01275                    index);
01276   }
01277 
01278   /* If this is a pessimistic insert which is actually done to
01279   perform a pessimistic update then we have stored the lock
01280   information of the record to be inserted on the infimum of the
01281   root page: we cannot discard the lock structs on the root page */
01282 
01283   lock_update_root_raise(new_block, root_block);
01284 
01285   /* Create a memory heap where the node pointer is stored */
01286   heap = mem_heap_create(100);
01287 
01288   rec = page_rec_get_next(page_get_infimum_rec(new_page));
01289   new_page_no = buf_block_get_page_no(new_block);
01290 
01291   /* Build the node pointer (= node key and page address) for the
01292   child */
01293 
01294   node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
01295                level);
01296   /* The node pointer must be marked as the predefined minimum record,
01297   as there is no lower alphabetical limit to records in the leftmost
01298   node of a level: */
01299   dtuple_set_info_bits(node_ptr,
01300            dtuple_get_info_bits(node_ptr)
01301            | REC_INFO_MIN_REC_FLAG);
01302 
01303   /* Rebuild the root page to get free space */
01304   btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
01305 
01306   /* Set the next node and previous node fields, although
01307   they should already have been set.  The previous node field
01308   must be FIL_NULL if root_page_zip != NULL, because the
01309   REC_INFO_MIN_REC_FLAG (of the first user record) will be
01310   set if and only if btr_page_get_prev() == FIL_NULL. */
01311   btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
01312   btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
01313 
01314   page_cursor = btr_cur_get_page_cur(cursor);
01315 
01316   /* Insert node pointer to the root */
01317 
01318   page_cur_set_before_first(root_block, page_cursor);
01319 
01320   node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
01321                index, 0, mtr);
01322 
01323   /* The root page should only contain the node pointer
01324   to new_page at this point.  Thus, the data should fit. */
01325   ut_a(node_ptr_rec);
01326 
01327   /* Free the memory heap */
01328   mem_heap_free(heap);
01329 
01330   /* We play safe and reset the free bits for the new page */
01331 
01332 #if 0
01333   fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
01334 #endif
01335 
01336   if (!dict_index_is_clust(index)) {
01337     ibuf_reset_free_bits(new_block);
01338   }
01339 
01340   /* Reposition the cursor to the child node */
01341   page_cur_search(new_block, index, tuple,
01342       PAGE_CUR_LE, page_cursor);
01343 
01344   /* Split the child and insert tuple */
01345   return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
01346 }
01347 
01348 /*************************************************************/
01352 UNIV_INTERN
01353 ibool
01354 btr_page_get_split_rec_to_left(
01355 /*===========================*/
01356   btr_cur_t*  cursor, 
01357   rec_t**   split_rec) 
01361 {
01362   page_t* page;
01363   rec_t*  insert_point;
01364   rec_t*  infimum;
01365 
01366   page = btr_cur_get_page(cursor);
01367   insert_point = btr_cur_get_rec(cursor);
01368 
01369   if (page_header_get_ptr(page, PAGE_LAST_INSERT)
01370       == page_rec_get_next(insert_point)) {
01371 
01372     infimum = page_get_infimum_rec(page);
01373 
01374     /* If the convergence is in the middle of a page, include also
01375     the record immediately before the new insert to the upper
01376     page. Otherwise, we could repeatedly move from page to page
01377     lots of records smaller than the convergence point. */
01378 
01379     if (infimum != insert_point
01380         && page_rec_get_next(infimum) != insert_point) {
01381 
01382       *split_rec = insert_point;
01383     } else {
01384       *split_rec = page_rec_get_next(insert_point);
01385     }
01386 
01387     return(TRUE);
01388   }
01389 
01390   return(FALSE);
01391 }
01392 
01393 /*************************************************************/
01397 UNIV_INTERN
01398 ibool
01399 btr_page_get_split_rec_to_right(
01400 /*============================*/
01401   btr_cur_t*  cursor, 
01402   rec_t**   split_rec) 
01406 {
01407   page_t* page;
01408   rec_t*  insert_point;
01409 
01410   page = btr_cur_get_page(cursor);
01411   insert_point = btr_cur_get_rec(cursor);
01412 
01413   /* We use eager heuristics: if the new insert would be right after
01414   the previous insert on the same page, we assume that there is a
01415   pattern of sequential inserts here. */
01416 
01417   if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
01418       == insert_point)) {
01419 
01420     rec_t*  next_rec;
01421 
01422     next_rec = page_rec_get_next(insert_point);
01423 
01424     if (page_rec_is_supremum(next_rec)) {
01425 split_at_new:
01426       /* Split at the new record to insert */
01427       *split_rec = NULL;
01428     } else {
01429       rec_t*  next_next_rec = page_rec_get_next(next_rec);
01430       if (page_rec_is_supremum(next_next_rec)) {
01431 
01432         goto split_at_new;
01433       }
01434 
01435       /* If there are >= 2 user records up from the insert
01436       point, split all but 1 off. We want to keep one because
01437       then sequential inserts can use the adaptive hash
01438       index, as they can do the necessary checks of the right
01439       search position just by looking at the records on this
01440       page. */
01441 
01442       *split_rec = next_next_rec;
01443     }
01444 
01445     return(TRUE);
01446   }
01447 
01448   return(FALSE);
01449 }
01450 
01451 /*************************************************************/
01457 static
01458 rec_t*
01459 btr_page_get_split_rec(
01460 /*===================*/
01461   btr_cur_t*  cursor, 
01462   const dtuple_t* tuple,  
01463   ulint   n_ext)  
01464 {
01465   page_t*   page;
01466   page_zip_des_t* page_zip;
01467   ulint   insert_size;
01468   ulint   free_space;
01469   ulint   total_data;
01470   ulint   total_n_recs;
01471   ulint   total_space;
01472   ulint   incl_data;
01473   rec_t*    ins_rec;
01474   rec_t*    rec;
01475   rec_t*    next_rec;
01476   ulint   n;
01477   mem_heap_t* heap;
01478   ulint*    offsets;
01479 
01480   page = btr_cur_get_page(cursor);
01481 
01482   insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
01483   free_space  = page_get_free_space_of_empty(page_is_comp(page));
01484 
01485   page_zip = btr_cur_get_page_zip(cursor);
01486   if (UNIV_LIKELY_NULL(page_zip)) {
01487     /* Estimate the free space of an empty compressed page. */
01488     ulint free_space_zip = page_zip_empty_size(
01489       cursor->index->n_fields,
01490       page_zip_get_size(page_zip));
01491 
01492     if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
01493       free_space = (ulint) free_space_zip;
01494     }
01495   }
01496 
01497   /* free_space is now the free space of a created new page */
01498 
01499   total_data   = page_get_data_size(page) + insert_size;
01500   total_n_recs = page_get_n_recs(page) + 1;
01501   ut_ad(total_n_recs >= 2);
01502   total_space  = total_data + page_dir_calc_reserved_space(total_n_recs);
01503 
01504   n = 0;
01505   incl_data = 0;
01506   ins_rec = btr_cur_get_rec(cursor);
01507   rec = page_get_infimum_rec(page);
01508 
01509   heap = NULL;
01510   offsets = NULL;
01511 
01512   /* We start to include records to the left half, and when the
01513   space reserved by them exceeds half of total_space, then if
01514   the included records fit on the left page, they will be put there
01515   if something was left over also for the right page,
01516   otherwise the last included record will be the first on the right
01517   half page */
01518 
01519   do {
01520     /* Decide the next record to include */
01521     if (rec == ins_rec) {
01522       rec = NULL; /* NULL denotes that tuple is
01523           now included */
01524     } else if (rec == NULL) {
01525       rec = page_rec_get_next(ins_rec);
01526     } else {
01527       rec = page_rec_get_next(rec);
01528     }
01529 
01530     if (rec == NULL) {
01531       /* Include tuple */
01532       incl_data += insert_size;
01533     } else {
01534       offsets = rec_get_offsets(rec, cursor->index,
01535               offsets, ULINT_UNDEFINED,
01536               &heap);
01537       incl_data += rec_offs_size(offsets);
01538     }
01539 
01540     n++;
01541   } while (incl_data + page_dir_calc_reserved_space(n)
01542      < total_space / 2);
01543 
01544   if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
01545     /* The next record will be the first on
01546     the right half page if it is not the
01547     supremum record of page */
01548 
01549     if (rec == ins_rec) {
01550       rec = NULL;
01551 
01552       goto func_exit;
01553     } else if (rec == NULL) {
01554       next_rec = page_rec_get_next(ins_rec);
01555     } else {
01556       next_rec = page_rec_get_next(rec);
01557     }
01558     ut_ad(next_rec);
01559     if (!page_rec_is_supremum(next_rec)) {
01560       rec = next_rec;
01561     }
01562   }
01563 
01564 func_exit:
01565   if (UNIV_LIKELY_NULL(heap)) {
01566     mem_heap_free(heap);
01567   }
01568   return(rec);
01569 }
01570 
01571 /*************************************************************/
01575 static
01576 ibool
01577 btr_page_insert_fits(
01578 /*=================*/
01579   btr_cur_t*  cursor, 
01581   const rec_t*  split_rec,
01584   const ulint*  offsets,
01586   const dtuple_t* tuple,  
01587   ulint   n_ext,  
01588   mem_heap_t* heap) 
01589 {
01590   page_t*   page;
01591   ulint   insert_size;
01592   ulint   free_space;
01593   ulint   total_data;
01594   ulint   total_n_recs;
01595   const rec_t*  rec;
01596   const rec_t*  end_rec;
01597   ulint*    offs;
01598 
01599   page = btr_cur_get_page(cursor);
01600 
01601   ut_ad(!split_rec == !offsets);
01602   ut_ad(!offsets
01603         || !page_is_comp(page) == !rec_offs_comp(offsets));
01604   ut_ad(!offsets
01605         || rec_offs_validate(split_rec, cursor->index, offsets));
01606 
01607   insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
01608   free_space  = page_get_free_space_of_empty(page_is_comp(page));
01609 
01610   /* free_space is now the free space of a created new page */
01611 
01612   total_data   = page_get_data_size(page) + insert_size;
01613   total_n_recs = page_get_n_recs(page) + 1;
01614 
01615   /* We determine which records (from rec to end_rec, not including
01616   end_rec) will end up on the other half page from tuple when it is
01617   inserted. */
01618 
01619   if (split_rec == NULL) {
01620     rec = page_rec_get_next(page_get_infimum_rec(page));
01621     end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
01622 
01623   } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
01624 
01625     rec = page_rec_get_next(page_get_infimum_rec(page));
01626     end_rec = split_rec;
01627   } else {
01628     rec = split_rec;
01629     end_rec = page_get_supremum_rec(page);
01630   }
01631 
01632   if (total_data + page_dir_calc_reserved_space(total_n_recs)
01633       <= free_space) {
01634 
01635     /* Ok, there will be enough available space on the
01636     half page where the tuple is inserted */
01637 
01638     return(TRUE);
01639   }
01640 
01641   offs = NULL;
01642 
01643   while (rec != end_rec) {
01644     /* In this loop we calculate the amount of reserved
01645     space after rec is removed from page. */
01646 
01647     offs = rec_get_offsets(rec, cursor->index, offs,
01648                ULINT_UNDEFINED, &heap);
01649 
01650     total_data -= rec_offs_size(offs);
01651     total_n_recs--;
01652 
01653     if (total_data + page_dir_calc_reserved_space(total_n_recs)
01654         <= free_space) {
01655 
01656       /* Ok, there will be enough available space on the
01657       half page where the tuple is inserted */
01658 
01659       return(TRUE);
01660     }
01661 
01662     rec = page_rec_get_next_const(rec);
01663   }
01664 
01665   return(FALSE);
01666 }
01667 
01668 /*******************************************************/
01671 UNIV_INTERN
01672 void
01673 btr_insert_on_non_leaf_level_func(
01674 /*==============================*/
01675   dict_index_t* index,  
01676   ulint   level,  
01677   dtuple_t* tuple,  
01678   const char* file, 
01679   ulint   line, 
01680   mtr_t*    mtr)  
01681 {
01682   big_rec_t*  dummy_big_rec;
01683   btr_cur_t cursor;
01684   ulint   err;
01685   rec_t*    rec;
01686 
01687   ut_ad(level > 0);
01688 
01689   btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
01690             BTR_CONT_MODIFY_TREE,
01691             &cursor, 0, file, line, mtr);
01692 
01693   err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
01694            | BTR_KEEP_SYS_FLAG
01695            | BTR_NO_UNDO_LOG_FLAG,
01696            &cursor, tuple, &rec,
01697            &dummy_big_rec, 0, NULL, mtr);
01698   ut_a(err == DB_SUCCESS);
01699 }
01700 
01701 /**************************************************************/
01704 static
01705 void
01706 btr_attach_half_pages(
01707 /*==================*/
01708   dict_index_t* index,    
01709   buf_block_t*  block,    
01710   rec_t*    split_rec,  
01712   buf_block_t*  new_block,  
01713   ulint   direction,  
01714   mtr_t*    mtr)    
01715 {
01716   ulint   space;
01717   ulint   zip_size;
01718   ulint   prev_page_no;
01719   ulint   next_page_no;
01720   ulint   level;
01721   page_t*   page    = buf_block_get_frame(block);
01722   page_t*   lower_page;
01723   page_t*   upper_page;
01724   ulint   lower_page_no;
01725   ulint   upper_page_no;
01726   page_zip_des_t* lower_page_zip;
01727   page_zip_des_t* upper_page_zip;
01728   dtuple_t* node_ptr_upper;
01729   mem_heap_t* heap;
01730 
01731   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
01732   ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
01733 
01734   /* Create a memory heap where the data tuple is stored */
01735   heap = mem_heap_create(1024);
01736 
01737   /* Based on split direction, decide upper and lower pages */
01738   if (direction == FSP_DOWN) {
01739 
01740     btr_cur_t cursor;
01741     ulint*    offsets;
01742 
01743     lower_page = buf_block_get_frame(new_block);
01744     lower_page_no = buf_block_get_page_no(new_block);
01745     lower_page_zip = buf_block_get_page_zip(new_block);
01746     upper_page = buf_block_get_frame(block);
01747     upper_page_no = buf_block_get_page_no(block);
01748     upper_page_zip = buf_block_get_page_zip(block);
01749 
01750     /* Look up the index for the node pointer to page */
01751     offsets = btr_page_get_father_block(NULL, heap, index,
01752                 block, mtr, &cursor);
01753 
01754     /* Replace the address of the old child node (= page) with the
01755     address of the new lower half */
01756 
01757     btr_node_ptr_set_child_page_no(
01758       btr_cur_get_rec(&cursor),
01759       btr_cur_get_page_zip(&cursor),
01760       offsets, lower_page_no, mtr);
01761     mem_heap_empty(heap);
01762   } else {
01763     lower_page = buf_block_get_frame(block);
01764     lower_page_no = buf_block_get_page_no(block);
01765     lower_page_zip = buf_block_get_page_zip(block);
01766     upper_page = buf_block_get_frame(new_block);
01767     upper_page_no = buf_block_get_page_no(new_block);
01768     upper_page_zip = buf_block_get_page_zip(new_block);
01769   }
01770 
01771   /* Get the level of the split pages */
01772   level = btr_page_get_level(buf_block_get_frame(block), mtr);
01773   ut_ad(level
01774         == btr_page_get_level(buf_block_get_frame(new_block), mtr));
01775 
01776   /* Build the node pointer (= node key and page address) for the upper
01777   half */
01778 
01779   node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
01780                upper_page_no, heap, level);
01781 
01782   /* Insert it next to the pointer to the lower half. Note that this
01783   may generate recursion leading to a split on the higher level. */
01784 
01785   btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
01786 
01787   /* Free the memory heap */
01788   mem_heap_free(heap);
01789 
01790   /* Get the previous and next pages of page */
01791 
01792   prev_page_no = btr_page_get_prev(page, mtr);
01793   next_page_no = btr_page_get_next(page, mtr);
01794   space = buf_block_get_space(block);
01795   zip_size = buf_block_get_zip_size(block);
01796 
01797   /* Update page links of the level */
01798 
01799   if (prev_page_no != FIL_NULL) {
01800     buf_block_t*  prev_block = btr_block_get(space, zip_size,
01801                  prev_page_no,
01802                  RW_X_LATCH, mtr);
01803 #ifdef UNIV_BTR_DEBUG
01804     ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
01805     ut_a(btr_page_get_next(prev_block->frame, mtr)
01806          == buf_block_get_page_no(block));
01807 #endif /* UNIV_BTR_DEBUG */
01808 
01809     btr_page_set_next(buf_block_get_frame(prev_block),
01810           buf_block_get_page_zip(prev_block),
01811           lower_page_no, mtr);
01812   }
01813 
01814   if (next_page_no != FIL_NULL) {
01815     buf_block_t*  next_block = btr_block_get(space, zip_size,
01816                  next_page_no,
01817                  RW_X_LATCH, mtr);
01818 #ifdef UNIV_BTR_DEBUG
01819     ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
01820     ut_a(btr_page_get_prev(next_block->frame, mtr)
01821          == page_get_page_no(page));
01822 #endif /* UNIV_BTR_DEBUG */
01823 
01824     btr_page_set_prev(buf_block_get_frame(next_block),
01825           buf_block_get_page_zip(next_block),
01826           upper_page_no, mtr);
01827   }
01828 
01829   btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
01830   btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
01831 
01832   btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
01833   btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
01834 }
01835 
01836 /*************************************************************/
01839 static
01840 ibool
01841 btr_page_tuple_smaller(
01842 /*===================*/
01843   btr_cur_t*  cursor, 
01844   const dtuple_t* tuple,  
01845   ulint*    offsets,
01846   ulint   n_uniq, 
01848   mem_heap_t**  heap) 
01849 {
01850   buf_block_t*  block;
01851   const rec_t*  first_rec;
01852   page_cur_t  pcur;
01853 
01854   /* Read the first user record in the page. */
01855   block = btr_cur_get_block(cursor);
01856   page_cur_set_before_first(block, &pcur);
01857   page_cur_move_to_next(&pcur);
01858   first_rec = page_cur_get_rec(&pcur);
01859 
01860   offsets = rec_get_offsets(
01861     first_rec, cursor->index, offsets,
01862     n_uniq, heap);
01863 
01864   return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
01865 }
01866 
01867 /*************************************************************/
01876 UNIV_INTERN
01877 rec_t*
01878 btr_page_split_and_insert(
01879 /*======================*/
01880   btr_cur_t*  cursor, 
01883   const dtuple_t* tuple,  
01884   ulint   n_ext,  
01885   mtr_t*    mtr)  
01886 {
01887   buf_block_t*  block;
01888   page_t*   page;
01889   page_zip_des_t* page_zip;
01890   ulint   page_no;
01891   byte    direction;
01892   ulint   hint_page_no;
01893   buf_block_t*  new_block;
01894   page_t*   new_page;
01895   page_zip_des_t* new_page_zip;
01896   rec_t*    split_rec;
01897   buf_block_t*  left_block;
01898   buf_block_t*  right_block;
01899   buf_block_t*  insert_block;
01900   page_cur_t* page_cursor;
01901   rec_t*    first_rec;
01902   byte*   buf = 0; /* remove warning */
01903   rec_t*    move_limit;
01904   ibool   insert_will_fit;
01905   ibool   insert_left;
01906   ulint   n_iterations = 0;
01907   rec_t*    rec;
01908   mem_heap_t* heap;
01909   ulint   n_uniq;
01910   ulint*    offsets;
01911 
01912   heap = mem_heap_create(1024);
01913   n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
01914 func_start:
01915   mem_heap_empty(heap);
01916   offsets = NULL;
01917 
01918   ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
01919         MTR_MEMO_X_LOCK));
01920 #ifdef UNIV_SYNC_DEBUG
01921   ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
01922 #endif /* UNIV_SYNC_DEBUG */
01923 
01924   block = btr_cur_get_block(cursor);
01925   page = buf_block_get_frame(block);
01926   page_zip = buf_block_get_page_zip(block);
01927 
01928   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
01929   ut_ad(page_get_n_recs(page) >= 1);
01930 
01931   page_no = buf_block_get_page_no(block);
01932 
01933   /* 1. Decide the split record; split_rec == NULL means that the
01934   tuple to be inserted should be the first record on the upper
01935   half-page */
01936   insert_left = FALSE;
01937 
01938   if (n_iterations > 0) {
01939     direction = FSP_UP;
01940     hint_page_no = page_no + 1;
01941                 split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
01942 
01943     if (UNIV_UNLIKELY(split_rec == NULL)) {
01944       insert_left = btr_page_tuple_smaller(
01945         cursor, tuple, offsets, n_uniq, &heap);
01946     }
01947   } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
01948     direction = FSP_UP;
01949     hint_page_no = page_no + 1;
01950   } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
01951     direction = FSP_DOWN;
01952     hint_page_no = page_no - 1;
01953                 ut_ad(split_rec);
01954   } else {
01955     direction = FSP_UP;
01956     hint_page_no = page_no + 1;
01957     /* If there is only one record in the index page, we
01958     can't split the node in the middle by default. We need
01959     to determine whether the new record will be inserted
01960     to the left or right. */
01961 
01962     if (page_get_n_recs(page) > 1) {
01963                   split_rec = page_get_middle_rec(page);
01964     } else if (btr_page_tuple_smaller(cursor, tuple,
01965               offsets, n_uniq, &heap)) {
01966       split_rec = page_rec_get_next(
01967         page_get_infimum_rec(page));
01968     } else {
01969       split_rec = NULL;
01970     }
01971   }
01972 
01973   /* 2. Allocate a new page to the index */
01974   new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
01975            btr_page_get_level(page, mtr), mtr);
01976   new_page = buf_block_get_frame(new_block);
01977   new_page_zip = buf_block_get_page_zip(new_block);
01978   btr_page_create(new_block, new_page_zip, cursor->index,
01979       btr_page_get_level(page, mtr), mtr);
01980 
01981   /* 3. Calculate the first record on the upper half-page, and the
01982   first record (move_limit) on original page which ends up on the
01983   upper half */
01984 
01985   if (split_rec) {
01986     first_rec = move_limit = split_rec;
01987 
01988     offsets = rec_get_offsets(split_rec, cursor->index, offsets,
01989             n_uniq, &heap);
01990 
01991     insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
01992 
01993     if (UNIV_UNLIKELY(!insert_left && new_page_zip
01994           && n_iterations > 0)) {
01995       /* If a compressed page has already been split,
01996       avoid further splits by inserting the record
01997       to an empty page. */
01998       split_rec = NULL;
01999       goto insert_empty;
02000     }
02001   } else if (UNIV_UNLIKELY(insert_left)) {
02002     ut_a(n_iterations > 0);
02003     first_rec = page_rec_get_next(page_get_infimum_rec(page));
02004     move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
02005   } else {
02006 insert_empty:
02007     ut_ad(!split_rec);
02008     ut_ad(!insert_left);
02009     buf = (unsigned char *)mem_alloc(rec_get_converted_size(cursor->index,
02010                                                                         tuple, n_ext));
02011 
02012     first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
02013                   tuple, n_ext);
02014     move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
02015   }
02016 
02017   /* 4. Do first the modifications in the tree structure */
02018 
02019   btr_attach_half_pages(cursor->index, block,
02020             first_rec, new_block, direction, mtr);
02021 
02022   /* If the split is made on the leaf level and the insert will fit
02023   on the appropriate half-page, we may release the tree x-latch.
02024   We can then move the records after releasing the tree latch,
02025   thus reducing the tree latch contention. */
02026 
02027   if (split_rec) {
02028     insert_will_fit = !new_page_zip
02029       && btr_page_insert_fits(cursor, split_rec,
02030             offsets, tuple, n_ext, heap);
02031   } else {
02032     if (!insert_left) {
02033       mem_free(buf);
02034       buf = NULL;
02035     }
02036 
02037     insert_will_fit = !new_page_zip
02038       && btr_page_insert_fits(cursor, NULL,
02039             NULL, tuple, n_ext, heap);
02040   }
02041 
02042   if (insert_will_fit && page_is_leaf(page)) {
02043 
02044     mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
02045          MTR_MEMO_X_LOCK);
02046   }
02047 
02048   /* 5. Move then the records to the new page */
02049   if (direction == FSP_DOWN) {
02050     /*    fputs("Split left\n", stderr); */
02051 
02052     if (0
02053 #ifdef UNIV_ZIP_COPY
02054         || page_zip
02055 #endif /* UNIV_ZIP_COPY */
02056         || UNIV_UNLIKELY
02057         (!page_move_rec_list_start(new_block, block, move_limit,
02058                  cursor->index, mtr))) {
02059       /* For some reason, compressing new_page failed,
02060       even though it should contain fewer records than
02061       the original page.  Copy the page byte for byte
02062       and then delete the records from both pages
02063       as appropriate.  Deleting will always succeed. */
02064       ut_a(new_page_zip);
02065 
02066       page_zip_copy_recs(new_page_zip, new_page,
02067              page_zip, page, cursor->index, mtr);
02068       page_delete_rec_list_end(move_limit - page + new_page,
02069              new_block, cursor->index,
02070              ULINT_UNDEFINED,
02071              ULINT_UNDEFINED, mtr);
02072 
02073       /* Update the lock table and possible hash index. */
02074 
02075       lock_move_rec_list_start(
02076         new_block, block, move_limit,
02077         new_page + PAGE_NEW_INFIMUM);
02078 
02079       btr_search_move_or_delete_hash_entries(
02080         new_block, block, cursor->index);
02081 
02082       /* Delete the records from the source page. */
02083 
02084       page_delete_rec_list_start(move_limit, block,
02085                cursor->index, mtr);
02086     }
02087 
02088     left_block = new_block;
02089     right_block = block;
02090 
02091     lock_update_split_left(right_block, left_block);
02092   } else {
02093     /*    fputs("Split right\n", stderr); */
02094 
02095     if (0
02096 #ifdef UNIV_ZIP_COPY
02097         || page_zip
02098 #endif /* UNIV_ZIP_COPY */
02099         || UNIV_UNLIKELY
02100         (!page_move_rec_list_end(new_block, block, move_limit,
02101                cursor->index, mtr))) {
02102       /* For some reason, compressing new_page failed,
02103       even though it should contain fewer records than
02104       the original page.  Copy the page byte for byte
02105       and then delete the records from both pages
02106       as appropriate.  Deleting will always succeed. */
02107       ut_a(new_page_zip);
02108 
02109       page_zip_copy_recs(new_page_zip, new_page,
02110              page_zip, page, cursor->index, mtr);
02111       page_delete_rec_list_start(move_limit - page
02112                + new_page, new_block,
02113                cursor->index, mtr);
02114 
02115       /* Update the lock table and possible hash index. */
02116 
02117       lock_move_rec_list_end(new_block, block, move_limit);
02118 
02119       btr_search_move_or_delete_hash_entries(
02120         new_block, block, cursor->index);
02121 
02122       /* Delete the records from the source page. */
02123 
02124       page_delete_rec_list_end(move_limit, block,
02125              cursor->index,
02126              ULINT_UNDEFINED,
02127              ULINT_UNDEFINED, mtr);
02128     }
02129 
02130     left_block = block;
02131     right_block = new_block;
02132 
02133     lock_update_split_right(right_block, left_block);
02134   }
02135 
02136 #ifdef UNIV_ZIP_DEBUG
02137   if (UNIV_LIKELY_NULL(page_zip)) {
02138     ut_a(page_zip_validate(page_zip, page));
02139     ut_a(page_zip_validate(new_page_zip, new_page));
02140   }
02141 #endif /* UNIV_ZIP_DEBUG */
02142 
02143   /* At this point, split_rec, move_limit and first_rec may point
02144   to garbage on the old page. */
02145 
02146   /* 6. The split and the tree modification is now completed. Decide the
02147   page where the tuple should be inserted */
02148 
02149   if (insert_left) {
02150     insert_block = left_block;
02151   } else {
02152     insert_block = right_block;
02153   }
02154 
02155   /* 7. Reposition the cursor for insert and try insertion */
02156   page_cursor = btr_cur_get_page_cur(cursor);
02157 
02158   page_cur_search(insert_block, cursor->index, tuple,
02159       PAGE_CUR_LE, page_cursor);
02160 
02161   rec = page_cur_tuple_insert(page_cursor, tuple,
02162             cursor->index, n_ext, mtr);
02163 
02164 #ifdef UNIV_ZIP_DEBUG
02165   {
02166     page_t*   insert_page
02167       = buf_block_get_frame(insert_block);
02168 
02169     page_zip_des_t* insert_page_zip
02170       = buf_block_get_page_zip(insert_block);
02171 
02172     ut_a(!insert_page_zip
02173          || page_zip_validate(insert_page_zip, insert_page));
02174   }
02175 #endif /* UNIV_ZIP_DEBUG */
02176 
02177   if (UNIV_LIKELY(rec != NULL)) {
02178 
02179     goto func_exit;
02180   }
02181 
02182   /* 8. If insert did not fit, try page reorganization */
02183 
02184   if (UNIV_UNLIKELY
02185       (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
02186 
02187     goto insert_failed;
02188   }
02189 
02190   page_cur_search(insert_block, cursor->index, tuple,
02191       PAGE_CUR_LE, page_cursor);
02192   rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
02193             n_ext, mtr);
02194 
02195   if (UNIV_UNLIKELY(rec == NULL)) {
02196     /* The insert did not fit on the page: loop back to the
02197     start of the function for a new split */
02198 insert_failed:
02199     /* We play safe and reset the free bits for new_page */
02200     if (!dict_index_is_clust(cursor->index)) {
02201       ibuf_reset_free_bits(new_block);
02202     }
02203 
02204     /* fprintf(stderr, "Split second round %lu\n",
02205     page_get_page_no(page)); */
02206     n_iterations++;
02207     ut_ad(n_iterations < 2
02208           || buf_block_get_page_zip(insert_block));
02209     ut_ad(!insert_will_fit);
02210 
02211     goto func_start;
02212   }
02213 
02214 func_exit:
02215   /* Insert fit on the page: update the free bits for the
02216   left and right pages in the same mtr */
02217 
02218   if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
02219     ibuf_update_free_bits_for_two_pages_low(
02220       buf_block_get_zip_size(left_block),
02221       left_block, right_block, mtr);
02222   }
02223 
02224 #if 0
02225   fprintf(stderr, "Split and insert done %lu %lu\n",
02226     buf_block_get_page_no(left_block),
02227     buf_block_get_page_no(right_block));
02228 #endif
02229 
02230   ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
02231   ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
02232 
02233   mem_heap_free(heap);
02234   return(rec);
02235 }
02236 
02237 /*************************************************************/
02239 static
02240 void
02241 btr_level_list_remove(
02242 /*==================*/
02243   ulint   space,  
02244   ulint   zip_size,
02246   page_t*   page, 
02247   mtr_t*    mtr)  
02248 {
02249   ulint prev_page_no;
02250   ulint next_page_no;
02251 
02252   ut_ad(page && mtr);
02253   ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
02254   ut_ad(space == page_get_space_id(page));
02255   /* Get the previous and next page numbers of page */
02256 
02257   prev_page_no = btr_page_get_prev(page, mtr);
02258   next_page_no = btr_page_get_next(page, mtr);
02259 
02260   /* Update page links of the level */
02261 
02262   if (prev_page_no != FIL_NULL) {
02263     buf_block_t*  prev_block
02264       = btr_block_get(space, zip_size, prev_page_no,
02265           RW_X_LATCH, mtr);
02266     page_t*   prev_page
02267       = buf_block_get_frame(prev_block);
02268 #ifdef UNIV_BTR_DEBUG
02269     ut_a(page_is_comp(prev_page) == page_is_comp(page));
02270     ut_a(btr_page_get_next(prev_page, mtr)
02271          == page_get_page_no(page));
02272 #endif /* UNIV_BTR_DEBUG */
02273 
02274     btr_page_set_next(prev_page,
02275           buf_block_get_page_zip(prev_block),
02276           next_page_no, mtr);
02277   }
02278 
02279   if (next_page_no != FIL_NULL) {
02280     buf_block_t*  next_block
02281       = btr_block_get(space, zip_size, next_page_no,
02282           RW_X_LATCH, mtr);
02283     page_t*   next_page
02284       = buf_block_get_frame(next_block);
02285 #ifdef UNIV_BTR_DEBUG
02286     ut_a(page_is_comp(next_page) == page_is_comp(page));
02287     ut_a(btr_page_get_prev(next_page, mtr)
02288          == page_get_page_no(page));
02289 #endif /* UNIV_BTR_DEBUG */
02290 
02291     btr_page_set_prev(next_page,
02292           buf_block_get_page_zip(next_block),
02293           prev_page_no, mtr);
02294   }
02295 }
02296 
02297 /****************************************************************/
02300 UNIV_INLINE
02301 void
02302 btr_set_min_rec_mark_log(
02303 /*=====================*/
02304   rec_t*  rec,  
02305   byte  type, 
02306   mtr_t*  mtr)  
02307 {
02308   mlog_write_initial_log_record(rec, type, mtr);
02309 
02310   /* Write rec offset as a 2-byte ulint */
02311   mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
02312 }
02313 #else /* !UNIV_HOTBACKUP */
02314 # define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
02315 #endif /* !UNIV_HOTBACKUP */
02316 
02317 /****************************************************************/
02321 UNIV_INTERN
02322 byte*
02323 btr_parse_set_min_rec_mark(
02324 /*=======================*/
02325   byte* ptr,  
02326   byte* end_ptr,
02327   ulint comp, 
02328   page_t* page, 
02329   mtr_t*  mtr)  
02330 {
02331   rec_t*  rec;
02332 
02333   if (end_ptr < ptr + 2) {
02334 
02335     return(NULL);
02336   }
02337 
02338   if (page) {
02339     ut_a(!page_is_comp(page) == !comp);
02340 
02341     rec = page + mach_read_from_2(ptr);
02342 
02343     btr_set_min_rec_mark(rec, mtr);
02344   }
02345 
02346   return(ptr + 2);
02347 }
02348 
02349 /****************************************************************/
02351 UNIV_INTERN
02352 void
02353 btr_set_min_rec_mark(
02354 /*=================*/
02355   rec_t*  rec,  
02356   mtr_t*  mtr)  
02357 {
02358   ulint info_bits;
02359 
02360   if (UNIV_LIKELY(page_rec_is_comp(rec))) {
02361     info_bits = rec_get_info_bits(rec, TRUE);
02362 
02363     rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
02364 
02365     btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
02366   } else {
02367     info_bits = rec_get_info_bits(rec, FALSE);
02368 
02369     rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
02370 
02371     btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
02372   }
02373 }
02374 
02375 #ifndef UNIV_HOTBACKUP
02376 /*************************************************************/
02378 UNIV_INTERN
02379 void
02380 btr_node_ptr_delete(
02381 /*================*/
02382   dict_index_t* index,  
02383   buf_block_t*  block,  
02384   mtr_t*    mtr)  
02385 {
02386   btr_cur_t cursor;
02387   ibool   compressed;
02388   ulint   err;
02389 
02390   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
02391 
02392   /* Delete node pointer on father page */
02393   btr_page_get_father(index, block, mtr, &cursor);
02394 
02395   compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
02396             mtr);
02397   ut_a(err == DB_SUCCESS);
02398 
02399   if (!compressed) {
02400     btr_cur_compress_if_useful(&cursor, mtr);
02401   }
02402 }
02403 
02404 /*************************************************************/
02407 static
02408 void
02409 btr_lift_page_up(
02410 /*=============*/
02411   dict_index_t* index,  
02412   buf_block_t*  block,  
02416   mtr_t*    mtr)  
02417 {
02418   buf_block_t*  father_block;
02419   page_t*   father_page;
02420   ulint   page_level;
02421   page_zip_des_t* father_page_zip;
02422   page_t*   page    = buf_block_get_frame(block);
02423   ulint   root_page_no;
02424   buf_block_t*  blocks[BTR_MAX_LEVELS];
02425   ulint   n_blocks; 
02426   ulint   i;
02427 
02428   ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
02429   ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
02430   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
02431 
02432   page_level = btr_page_get_level(page, mtr);
02433   root_page_no = dict_index_get_page(index);
02434 
02435   {
02436     btr_cur_t cursor;
02437     mem_heap_t* heap  = mem_heap_create(100);
02438     ulint*    offsets;
02439     buf_block_t*  b;
02440 
02441     offsets = btr_page_get_father_block(NULL, heap, index,
02442                 block, mtr, &cursor);
02443     father_block = btr_cur_get_block(&cursor);
02444     father_page_zip = buf_block_get_page_zip(father_block);
02445     father_page = buf_block_get_frame(father_block);
02446 
02447     n_blocks = 0;
02448 
02449     /* Store all ancestor pages so we can reset their
02450     levels later on.  We have to do all the searches on
02451     the tree now because later on, after we've replaced
02452     the first level, the tree is in an inconsistent state
02453     and can not be searched. */
02454     for (b = father_block;
02455          buf_block_get_page_no(b) != root_page_no; ) {
02456       ut_a(n_blocks < BTR_MAX_LEVELS);
02457 
02458       offsets = btr_page_get_father_block(offsets, heap,
02459                   index, b,
02460                   mtr, &cursor);
02461 
02462       blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
02463     }
02464 
02465     mem_heap_free(heap);
02466   }
02467 
02468   btr_search_drop_page_hash_index(block);
02469 
02470   /* Make the father empty */
02471   btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
02472 
02473   /* Copy the records to the father page one by one. */
02474   if (0
02475 #ifdef UNIV_ZIP_COPY
02476       || father_page_zip
02477 #endif /* UNIV_ZIP_COPY */
02478       || UNIV_UNLIKELY
02479       (!page_copy_rec_list_end(father_block, block,
02480              page_get_infimum_rec(page),
02481              index, mtr))) {
02482     const page_zip_des_t* page_zip
02483       = buf_block_get_page_zip(block);
02484     ut_a(father_page_zip);
02485     ut_a(page_zip);
02486 
02487     /* Copy the page byte for byte. */
02488     page_zip_copy_recs(father_page_zip, father_page,
02489            page_zip, page, index, mtr);
02490 
02491     /* Update the lock table and possible hash index. */
02492 
02493     lock_move_rec_list_end(father_block, block,
02494                page_get_infimum_rec(page));
02495 
02496     btr_search_move_or_delete_hash_entries(father_block, block,
02497                    index);
02498   }
02499 
02500   lock_update_copy_and_discard(father_block, block);
02501 
02502   /* Go upward to root page, decrementing levels by one. */
02503   for (i = 0; i < n_blocks; i++, page_level++) {
02504     page_t*   inner_page  = buf_block_get_frame(blocks[i]);
02505     page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
02506 
02507     ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
02508 
02509     btr_page_set_level(inner_page, page_zip, page_level, mtr);
02510 #ifdef UNIV_ZIP_DEBUG
02511     ut_a(!page_zip || page_zip_validate(page_zip, inner_page));
02512 #endif /* UNIV_ZIP_DEBUG */
02513   }
02514 
02515   /* Free the file page */
02516   btr_page_free(index, block, mtr);
02517 
02518   /* We play it safe and reset the free bits for the father */
02519   if (!dict_index_is_clust(index)) {
02520     ibuf_reset_free_bits(father_block);
02521   }
02522   ut_ad(page_validate(father_page, index));
02523   ut_ad(btr_check_node_ptr(index, father_block, mtr));
02524 }
02525 
02526 /*************************************************************/
02536 UNIV_INTERN
02537 ibool
02538 btr_compress(
02539 /*=========*/
02540   btr_cur_t*  cursor, 
02544   mtr_t*    mtr)  
02545 {
02546   dict_index_t* index;
02547   ulint   space;
02548   ulint   zip_size;
02549   ulint   left_page_no;
02550   ulint   right_page_no;
02551   buf_block_t*  merge_block;
02552   page_t*   merge_page;
02553   page_zip_des_t* merge_page_zip;
02554   ibool   is_left;
02555   buf_block_t*  block;
02556   page_t*   page;
02557   btr_cur_t father_cursor;
02558   mem_heap_t* heap;
02559   ulint*    offsets;
02560   ulint   data_size;
02561   ulint   n_recs;
02562   ulint   max_ins_size;
02563   ulint   max_ins_size_reorg;
02564 
02565   block = btr_cur_get_block(cursor);
02566   page = btr_cur_get_page(cursor);
02567   index = btr_cur_get_index(cursor);
02568   ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
02569 
02570   ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
02571         MTR_MEMO_X_LOCK));
02572   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
02573   space = dict_index_get_space(index);
02574   zip_size = dict_table_zip_size(index->table);
02575 
02576   left_page_no = btr_page_get_prev(page, mtr);
02577   right_page_no = btr_page_get_next(page, mtr);
02578 
02579 #if 0
02580   fprintf(stderr, "Merge left page %lu right %lu \n",
02581     left_page_no, right_page_no);
02582 #endif
02583 
02584   heap = mem_heap_create(100);
02585   offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
02586               &father_cursor);
02587 
02588   /* Decide the page to which we try to merge and which will inherit
02589   the locks */
02590 
02591   is_left = left_page_no != FIL_NULL;
02592 
02593   if (is_left) {
02594 
02595     merge_block = btr_block_get(space, zip_size, left_page_no,
02596               RW_X_LATCH, mtr);
02597     merge_page = buf_block_get_frame(merge_block);
02598 #ifdef UNIV_BTR_DEBUG
02599     ut_a(btr_page_get_next(merge_page, mtr)
02600          == buf_block_get_page_no(block));
02601 #endif /* UNIV_BTR_DEBUG */
02602   } else if (right_page_no != FIL_NULL) {
02603 
02604     merge_block = btr_block_get(space, zip_size, right_page_no,
02605               RW_X_LATCH, mtr);
02606     merge_page = buf_block_get_frame(merge_block);
02607 #ifdef UNIV_BTR_DEBUG
02608     ut_a(btr_page_get_prev(merge_page, mtr)
02609          == buf_block_get_page_no(block));
02610 #endif /* UNIV_BTR_DEBUG */
02611   } else {
02612     /* The page is the only one on the level, lift the records
02613     to the father */
02614     btr_lift_page_up(index, block, mtr);
02615     mem_heap_free(heap);
02616     return(TRUE);
02617   }
02618 
02619   n_recs = page_get_n_recs(page);
02620   data_size = page_get_data_size(page);
02621 #ifdef UNIV_BTR_DEBUG
02622   ut_a(page_is_comp(merge_page) == page_is_comp(page));
02623 #endif /* UNIV_BTR_DEBUG */
02624 
02625   max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
02626     merge_page, n_recs);
02627   if (data_size > max_ins_size_reorg) {
02628 
02629     /* No space for merge */
02630 err_exit:
02631     /* We play it safe and reset the free bits. */
02632     if (zip_size
02633         && page_is_leaf(merge_page)
02634         && !dict_index_is_clust(index)) {
02635       ibuf_reset_free_bits(merge_block);
02636     }
02637 
02638     mem_heap_free(heap);
02639     return(FALSE);
02640   }
02641 
02642   ut_ad(page_validate(merge_page, index));
02643 
02644   max_ins_size = page_get_max_insert_size(merge_page, n_recs);
02645 
02646   if (UNIV_UNLIKELY(data_size > max_ins_size)) {
02647 
02648     /* We have to reorganize merge_page */
02649 
02650     if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
02651                    index, mtr))) {
02652 
02653       goto err_exit;
02654     }
02655 
02656     max_ins_size = page_get_max_insert_size(merge_page, n_recs);
02657 
02658     ut_ad(page_validate(merge_page, index));
02659     ut_ad(max_ins_size == max_ins_size_reorg);
02660 
02661     if (UNIV_UNLIKELY(data_size > max_ins_size)) {
02662 
02663       /* Add fault tolerance, though this should
02664       never happen */
02665 
02666       goto err_exit;
02667     }
02668   }
02669 
02670   merge_page_zip = buf_block_get_page_zip(merge_block);
02671 #ifdef UNIV_ZIP_DEBUG
02672   if (UNIV_LIKELY_NULL(merge_page_zip)) {
02673     const page_zip_des_t* page_zip
02674       = buf_block_get_page_zip(block);
02675     ut_a(page_zip);
02676     ut_a(page_zip_validate(merge_page_zip, merge_page));
02677     ut_a(page_zip_validate(page_zip, page));
02678   }
02679 #endif /* UNIV_ZIP_DEBUG */
02680 
02681   /* Move records to the merge page */
02682   if (is_left) {
02683     rec_t*  orig_pred = page_copy_rec_list_start(
02684       merge_block, block, page_get_supremum_rec(page),
02685       index, mtr);
02686 
02687     if (UNIV_UNLIKELY(!orig_pred)) {
02688       goto err_exit;
02689     }
02690 
02691     btr_search_drop_page_hash_index(block);
02692 
02693     /* Remove the page from the level list */
02694     btr_level_list_remove(space, zip_size, page, mtr);
02695 
02696     btr_node_ptr_delete(index, block, mtr);
02697     lock_update_merge_left(merge_block, orig_pred, block);
02698   } else {
02699     rec_t*    orig_succ;
02700 #ifdef UNIV_BTR_DEBUG
02701     byte    fil_page_prev[4];
02702 #endif /* UNIV_BTR_DEBUG */
02703 
02704     if (UNIV_LIKELY_NULL(merge_page_zip)) {
02705       /* The function page_zip_compress(), which will be
02706       invoked by page_copy_rec_list_end() below,
02707       requires that FIL_PAGE_PREV be FIL_NULL.
02708       Clear the field, but prepare to restore it. */
02709 #ifdef UNIV_BTR_DEBUG
02710       memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
02711 #endif /* UNIV_BTR_DEBUG */
02712 #if FIL_NULL != 0xffffffff
02713 # error "FIL_NULL != 0xffffffff"
02714 #endif
02715       memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
02716     }
02717 
02718     orig_succ = page_copy_rec_list_end(merge_block, block,
02719                page_get_infimum_rec(page),
02720                cursor->index, mtr);
02721 
02722     if (UNIV_UNLIKELY(!orig_succ)) {
02723       ut_a(merge_page_zip);
02724 #ifdef UNIV_BTR_DEBUG
02725       /* FIL_PAGE_PREV was restored from merge_page_zip. */
02726       ut_a(!memcmp(fil_page_prev,
02727              merge_page + FIL_PAGE_PREV, 4));
02728 #endif /* UNIV_BTR_DEBUG */
02729       goto err_exit;
02730     }
02731 
02732     btr_search_drop_page_hash_index(block);
02733 
02734 #ifdef UNIV_BTR_DEBUG
02735     if (UNIV_LIKELY_NULL(merge_page_zip)) {
02736       /* Restore FIL_PAGE_PREV in order to avoid an assertion
02737       failure in btr_level_list_remove(), which will set
02738       the field again to FIL_NULL.  Even though this makes
02739       merge_page and merge_page_zip inconsistent for a
02740       split second, it is harmless, because the pages
02741       are X-latched. */
02742       memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
02743     }
02744 #endif /* UNIV_BTR_DEBUG */
02745 
02746     /* Remove the page from the level list */
02747     btr_level_list_remove(space, zip_size, page, mtr);
02748 
02749     /* Replace the address of the old child node (= page) with the
02750     address of the merge page to the right */
02751 
02752     btr_node_ptr_set_child_page_no(
02753       btr_cur_get_rec(&father_cursor),
02754       btr_cur_get_page_zip(&father_cursor),
02755       offsets, right_page_no, mtr);
02756     btr_node_ptr_delete(index, merge_block, mtr);
02757 
02758     lock_update_merge_right(merge_block, orig_succ, block);
02759   }
02760 
02761   mem_heap_free(heap);
02762 
02763   if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
02764     /* Update the free bits of the B-tree page in the
02765     insert buffer bitmap.  This has to be done in a
02766     separate mini-transaction that is committed before the
02767     main mini-transaction.  We cannot update the insert
02768     buffer bitmap in this mini-transaction, because
02769     btr_compress() can be invoked recursively without
02770     committing the mini-transaction in between.  Since
02771     insert buffer bitmap pages have a lower rank than
02772     B-tree pages, we must not access other pages in the
02773     same mini-transaction after accessing an insert buffer
02774     bitmap page. */
02775 
02776     /* The free bits in the insert buffer bitmap must
02777     never exceed the free space on a page.  It is safe to
02778     decrement or reset the bits in the bitmap in a
02779     mini-transaction that is committed before the
02780     mini-transaction that affects the free space. */
02781 
02782     /* It is unsafe to increment the bits in a separately
02783     committed mini-transaction, because in crash recovery,
02784     the free bits could momentarily be set too high. */
02785 
02786     if (zip_size) {
02787       /* Because the free bits may be incremented
02788       and we cannot update the insert buffer bitmap
02789       in the same mini-transaction, the only safe
02790       thing we can do here is the pessimistic
02791       approach: reset the free bits. */
02792       ibuf_reset_free_bits(merge_block);
02793     } else {
02794       /* On uncompressed pages, the free bits will
02795       never increase here.  Thus, it is safe to
02796       write the bits accurately in a separate
02797       mini-transaction. */
02798       ibuf_update_free_bits_if_full(merge_block,
02799                   UNIV_PAGE_SIZE,
02800                   ULINT_UNDEFINED);
02801     }
02802   }
02803 
02804   ut_ad(page_validate(merge_page, index));
02805 #ifdef UNIV_ZIP_DEBUG
02806   ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
02807 #endif /* UNIV_ZIP_DEBUG */
02808 
02809   /* Free the file page */
02810   btr_page_free(index, block, mtr);
02811 
02812   ut_ad(btr_check_node_ptr(index, merge_block, mtr));
02813   return(TRUE);
02814 }
02815 
02816 /*************************************************************/
02821 static
02822 void
02823 btr_discard_only_page_on_level(
02824 /*===========================*/
02825   dict_index_t* index,  
02826   buf_block_t*  block,  
02827   mtr_t*    mtr)  
02828 {
02829   ulint   page_level = 0;
02830   trx_id_t  max_trx_id;
02831 
02832   /* Save the PAGE_MAX_TRX_ID from the leaf page. */
02833   max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
02834 
02835   while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
02836     btr_cur_t cursor;
02837     buf_block_t*  father;
02838     const page_t* page  = buf_block_get_frame(block);
02839 
02840     ut_a(page_get_n_recs(page) == 1);
02841     ut_a(page_level == btr_page_get_level(page, mtr));
02842     ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
02843     ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
02844 
02845     ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
02846     btr_search_drop_page_hash_index(block);
02847 
02848     btr_page_get_father(index, block, mtr, &cursor);
02849     father = btr_cur_get_block(&cursor);
02850 
02851     lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
02852 
02853     /* Free the file page */
02854     btr_page_free(index, block, mtr);
02855 
02856     block = father;
02857     page_level++;
02858   }
02859 
02860   /* block is the root page, which must be empty, except
02861   for the node pointer to the (now discarded) block(s). */
02862 
02863 #ifdef UNIV_BTR_DEBUG
02864   if (!dict_index_is_ibuf(index)) {
02865     const page_t* root  = buf_block_get_frame(block);
02866     const ulint space = dict_index_get_space(index);
02867     ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
02868               + root, space));
02869     ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
02870               + root, space));
02871   }
02872 #endif /* UNIV_BTR_DEBUG */
02873 
02874   btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
02875 
02876   if (!dict_index_is_clust(index)) {
02877     /* We play it safe and reset the free bits for the root */
02878     ibuf_reset_free_bits(block);
02879 
02880     if (page_is_leaf(buf_block_get_frame(block))) {
02881       ut_a(max_trx_id);
02882       page_set_max_trx_id(block,
02883               buf_block_get_page_zip(block),
02884               max_trx_id, mtr);
02885     }
02886   }
02887 }
02888 
02889 /*************************************************************/
02893 UNIV_INTERN
02894 void
02895 btr_discard_page(
02896 /*=============*/
02897   btr_cur_t*  cursor, 
02899   mtr_t*    mtr)  
02900 {
02901   dict_index_t* index;
02902   ulint   space;
02903   ulint   zip_size;
02904   ulint   left_page_no;
02905   ulint   right_page_no;
02906   buf_block_t*  merge_block;
02907   page_t*   merge_page;
02908   buf_block_t*  block;
02909   page_t*   page;
02910   rec_t*    node_ptr;
02911 
02912   block = btr_cur_get_block(cursor);
02913   index = btr_cur_get_index(cursor);
02914 
02915   ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
02916   ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
02917         MTR_MEMO_X_LOCK));
02918   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
02919   space = dict_index_get_space(index);
02920   zip_size = dict_table_zip_size(index->table);
02921 
02922   /* Decide the page which will inherit the locks */
02923 
02924   left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
02925   right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
02926 
02927   if (left_page_no != FIL_NULL) {
02928     merge_block = btr_block_get(space, zip_size, left_page_no,
02929               RW_X_LATCH, mtr);
02930     merge_page = buf_block_get_frame(merge_block);
02931 #ifdef UNIV_BTR_DEBUG
02932     ut_a(btr_page_get_next(merge_page, mtr)
02933          == buf_block_get_page_no(block));
02934 #endif /* UNIV_BTR_DEBUG */
02935   } else if (right_page_no != FIL_NULL) {
02936     merge_block = btr_block_get(space, zip_size, right_page_no,
02937               RW_X_LATCH, mtr);
02938     merge_page = buf_block_get_frame(merge_block);
02939 #ifdef UNIV_BTR_DEBUG
02940     ut_a(btr_page_get_prev(merge_page, mtr)
02941          == buf_block_get_page_no(block));
02942 #endif /* UNIV_BTR_DEBUG */
02943   } else {
02944     btr_discard_only_page_on_level(index, block, mtr);
02945 
02946     return;
02947   }
02948 
02949   page = buf_block_get_frame(block);
02950   ut_a(page_is_comp(merge_page) == page_is_comp(page));
02951   btr_search_drop_page_hash_index(block);
02952 
02953   if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
02954 
02955     /* We have to mark the leftmost node pointer on the right
02956     side page as the predefined minimum record */
02957     node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
02958 
02959     ut_ad(page_rec_is_user_rec(node_ptr));
02960 
02961     /* This will make page_zip_validate() fail on merge_page
02962     until btr_level_list_remove() completes.  This is harmless,
02963     because everything will take place within a single
02964     mini-transaction and because writing to the redo log
02965     is an atomic operation (performed by mtr_commit()). */
02966     btr_set_min_rec_mark(node_ptr, mtr);
02967   }
02968 
02969   btr_node_ptr_delete(index, block, mtr);
02970 
02971   /* Remove the page from the level list */
02972   btr_level_list_remove(space, zip_size, page, mtr);
02973 #ifdef UNIV_ZIP_DEBUG
02974   {
02975     page_zip_des_t* merge_page_zip
02976       = buf_block_get_page_zip(merge_block);
02977     ut_a(!merge_page_zip
02978          || page_zip_validate(merge_page_zip, merge_page));
02979   }
02980 #endif /* UNIV_ZIP_DEBUG */
02981 
02982   if (left_page_no != FIL_NULL) {
02983     lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
02984             block);
02985   } else {
02986     lock_update_discard(merge_block,
02987             lock_get_min_heap_no(merge_block),
02988             block);
02989   }
02990 
02991   /* Free the file page */
02992   btr_page_free(index, block, mtr);
02993 
02994   ut_ad(btr_check_node_ptr(index, merge_block, mtr));
02995 }
02996 
02997 #ifdef UNIV_BTR_PRINT
02998 /*************************************************************/
03000 UNIV_INTERN
03001 void
03002 btr_print_size(
03003 /*===========*/
03004   dict_index_t* index)  
03005 {
03006   page_t*   root;
03007   fseg_header_t*  seg;
03008   mtr_t   mtr;
03009 
03010   if (dict_index_is_ibuf(index)) {
03011     fputs("Sorry, cannot print info of an ibuf tree:"
03012           " use ibuf functions\n", stderr);
03013 
03014     return;
03015   }
03016 
03017   mtr_start(&mtr);
03018 
03019   root = btr_root_get(index, &mtr);
03020 
03021   seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
03022 
03023   fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
03024   fseg_print(seg, &mtr);
03025 
03026   if (!(index->type & DICT_UNIVERSAL)) {
03027 
03028     seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
03029 
03030     fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
03031     fseg_print(seg, &mtr);
03032   }
03033 
03034   mtr_commit(&mtr);
03035 }
03036 
03037 /************************************************************/
03039 static
03040 void
03041 btr_print_recursive(
03042 /*================*/
03043   dict_index_t* index,  
03044   buf_block_t*  block,  
03045   ulint   width,  
03047   mem_heap_t**  heap, 
03048   ulint**   offsets,
03049   mtr_t*    mtr)  
03050 {
03051   const page_t* page  = buf_block_get_frame(block);
03052   page_cur_t  cursor;
03053   ulint   n_recs;
03054   ulint   i = 0;
03055   mtr_t   mtr2;
03056 
03057   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
03058   fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
03059     (ulong) btr_page_get_level(page, mtr),
03060     (ulong) buf_block_get_page_no(block));
03061 
03062   page_print(block, index, width, width);
03063 
03064   n_recs = page_get_n_recs(page);
03065 
03066   page_cur_set_before_first(block, &cursor);
03067   page_cur_move_to_next(&cursor);
03068 
03069   while (!page_cur_is_after_last(&cursor)) {
03070 
03071     if (page_is_leaf(page)) {
03072 
03073       /* If this is the leaf level, do nothing */
03074 
03075     } else if ((i <= width) || (i >= n_recs - width)) {
03076 
03077       const rec_t*  node_ptr;
03078 
03079       mtr_start(&mtr2);
03080 
03081       node_ptr = page_cur_get_rec(&cursor);
03082 
03083       *offsets = rec_get_offsets(node_ptr, index, *offsets,
03084                ULINT_UNDEFINED, heap);
03085       btr_print_recursive(index,
03086               btr_node_ptr_get_child(node_ptr,
03087                    index,
03088                    *offsets,
03089                    &mtr2),
03090               width, heap, offsets, &mtr2);
03091       mtr_commit(&mtr2);
03092     }
03093 
03094     page_cur_move_to_next(&cursor);
03095     i++;
03096   }
03097 }
03098 
03099 /**************************************************************/
03101 UNIV_INTERN
03102 void
03103 btr_print_index(
03104 /*============*/
03105   dict_index_t* index,  
03106   ulint   width)  
03108 {
03109   mtr_t   mtr;
03110   buf_block_t*  root;
03111   mem_heap_t* heap  = NULL;
03112   ulint   offsets_[REC_OFFS_NORMAL_SIZE];
03113   ulint*    offsets = offsets_;
03114   rec_offs_init(offsets_);
03115 
03116   fputs("--------------------------\n"
03117         "INDEX TREE PRINT\n", stderr);
03118 
03119   mtr_start(&mtr);
03120 
03121   root = btr_root_block_get(index, &mtr);
03122 
03123   btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
03124   if (UNIV_LIKELY_NULL(heap)) {
03125     mem_heap_free(heap);
03126   }
03127 
03128   mtr_commit(&mtr);
03129 
03130   btr_validate_index(index, NULL);
03131 }
03132 #endif /* UNIV_BTR_PRINT */
03133 
03134 #ifdef UNIV_DEBUG
03135 /************************************************************/
03138 UNIV_INTERN
03139 ibool
03140 btr_check_node_ptr(
03141 /*===============*/
03142   dict_index_t* index,  
03143   buf_block_t*  block,  
03144   mtr_t*    mtr)  
03145 {
03146   mem_heap_t* heap;
03147   dtuple_t* tuple;
03148   ulint*    offsets;
03149   btr_cur_t cursor;
03150   page_t*   page = buf_block_get_frame(block);
03151 
03152   ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
03153   if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
03154 
03155     return(TRUE);
03156   }
03157 
03158   heap = mem_heap_create(256);
03159   offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
03160               &cursor);
03161 
03162   if (page_is_leaf(page)) {
03163 
03164     goto func_exit;
03165   }
03166 
03167   tuple = dict_index_build_node_ptr(
03168     index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
03169     btr_page_get_level(page, mtr));
03170 
03171   ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
03172 func_exit:
03173   mem_heap_free(heap);
03174 
03175   return(TRUE);
03176 }
03177 #endif /* UNIV_DEBUG */
03178 
03179 /************************************************************/
03181 static
03182 void
03183 btr_index_rec_validate_report(
03184 /*==========================*/
03185   const page_t*   page, 
03186   const rec_t*    rec,  
03187   const dict_index_t* index)  
03188 {
03189   fputs("InnoDB: Record in ", stderr);
03190   dict_index_name_print(stderr, NULL, index);
03191   fprintf(stderr, ", page %lu, at offset %lu\n",
03192     page_get_page_no(page), (ulint) page_offset(rec));
03193 }
03194 
03195 /************************************************************/
03199 UNIV_INTERN
03200 ibool
03201 btr_index_rec_validate(
03202 /*===================*/
03203   const rec_t*    rec,    
03204   const dict_index_t* index,    
03205   ibool     dump_on_error)  
03208 {
03209   ulint   len;
03210   ulint   n;
03211   ulint   i;
03212   const page_t* page;
03213   mem_heap_t* heap  = NULL;
03214   ulint   offsets_[REC_OFFS_NORMAL_SIZE];
03215   ulint*    offsets = offsets_;
03216   rec_offs_init(offsets_);
03217 
03218   page = page_align(rec);
03219 
03220   if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
03221     /* The insert buffer index tree can contain records from any
03222     other index: we cannot check the number of fields or
03223     their length */
03224 
03225     return(TRUE);
03226   }
03227 
03228   if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
03229         != dict_table_is_comp(index->table))) {
03230     btr_index_rec_validate_report(page, rec, index);
03231     fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
03232       (ulong) !!page_is_comp(page),
03233       (ulong) dict_table_is_comp(index->table));
03234 
03235     return(FALSE);
03236   }
03237 
03238   n = dict_index_get_n_fields(index);
03239 
03240   if (!page_is_comp(page)
03241       && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
03242     btr_index_rec_validate_report(page, rec, index);
03243     fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
03244       (ulong) rec_get_n_fields_old(rec), (ulong) n);
03245 
03246     if (dump_on_error) {
03247       buf_page_print(page, 0);
03248 
03249       fputs("InnoDB: corrupt record ", stderr);
03250       rec_print_old(stderr, rec);
03251       putc('\n', stderr);
03252     }
03253     return(FALSE);
03254   }
03255 
03256   offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
03257 
03258   for (i = 0; i < n; i++) {
03259     ulint fixed_size = dict_col_get_fixed_size(
03260       dict_index_get_nth_col(index, i), page_is_comp(page));
03261 
03262     rec_get_nth_field_offs(offsets, i, &len);
03263 
03264     /* Note that if fixed_size != 0, it equals the
03265     length of a fixed-size column in the clustered index.
03266     A prefix index of the column is of fixed, but different
03267     length.  When fixed_size == 0, prefix_len is the maximum
03268     length of the prefix index column. */
03269 
03270     if ((dict_index_get_nth_field(index, i)->prefix_len == 0
03271          && len != UNIV_SQL_NULL && fixed_size
03272          && len != fixed_size)
03273         || (dict_index_get_nth_field(index, i)->prefix_len > 0
03274       && len != UNIV_SQL_NULL
03275       && len
03276       > dict_index_get_nth_field(index, i)->prefix_len)) {
03277 
03278       btr_index_rec_validate_report(page, rec, index);
03279       fprintf(stderr,
03280         "InnoDB: field %lu len is %lu,"
03281         " should be %lu\n",
03282         (ulong) i, (ulong) len, (ulong) fixed_size);
03283 
03284       if (dump_on_error) {
03285         buf_page_print(page, 0);
03286 
03287         fputs("InnoDB: corrupt record ", stderr);
03288         rec_print_new(stderr, rec, offsets);
03289         putc('\n', stderr);
03290       }
03291       if (UNIV_LIKELY_NULL(heap)) {
03292         mem_heap_free(heap);
03293       }
03294       return(FALSE);
03295     }
03296   }
03297 
03298   if (UNIV_LIKELY_NULL(heap)) {
03299     mem_heap_free(heap);
03300   }
03301   return(TRUE);
03302 }
03303 
03304 /************************************************************/
03308 static
03309 ibool
03310 btr_index_page_validate(
03311 /*====================*/
03312   buf_block_t*  block,  
03313   dict_index_t* index)  
03314 {
03315   page_cur_t  cur;
03316   ibool   ret = TRUE;
03317 
03318   page_cur_set_before_first(block, &cur);
03319   page_cur_move_to_next(&cur);
03320 
03321   for (;;) {
03322     if (page_cur_is_after_last(&cur)) {
03323 
03324       break;
03325     }
03326 
03327     if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
03328 
03329       return(FALSE);
03330     }
03331 
03332     page_cur_move_to_next(&cur);
03333   }
03334 
03335   return(ret);
03336 }
03337 
03338 /************************************************************/
03340 static
03341 void
03342 btr_validate_report1(
03343 /*=================*/
03344   dict_index_t*   index,  
03345   ulint     level,  
03346   const buf_block_t*  block)  
03347 {
03348   fprintf(stderr, "InnoDB: Error in page %lu of ",
03349     buf_block_get_page_no(block));
03350   dict_index_name_print(stderr, NULL, index);
03351   if (level) {
03352     fprintf(stderr, ", index tree level %lu", level);
03353   }
03354   putc('\n', stderr);
03355 }
03356 
03357 /************************************************************/
03359 static
03360 void
03361 btr_validate_report2(
03362 /*=================*/
03363   const dict_index_t* index,  
03364   ulint     level,  
03365   const buf_block_t*  block1, 
03366   const buf_block_t*  block2) 
03367 {
03368   fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
03369     buf_block_get_page_no(block1),
03370     buf_block_get_page_no(block2));
03371   dict_index_name_print(stderr, NULL, index);
03372   if (level) {
03373     fprintf(stderr, ", index tree level %lu", level);
03374   }
03375   putc('\n', stderr);
03376 }
03377 
03378 /************************************************************/
03381 static
03382 ibool
03383 btr_validate_level(
03384 /*===============*/
03385   dict_index_t* index,  
03386   trx_t*    trx,  
03387   ulint   level)  
03388 {
03389   ulint   space;
03390   ulint   zip_size;
03391   buf_block_t*  block;
03392   page_t*   page;
03393   buf_block_t*  right_block = 0; /* remove warning */
03394   page_t*   right_page = 0; /* remove warning */
03395   page_t*   father_page;
03396   btr_cur_t node_cur;
03397   btr_cur_t right_node_cur;
03398   rec_t*    rec;
03399   ulint   right_page_no;
03400   ulint   left_page_no;
03401   page_cur_t  cursor;
03402   dtuple_t* node_ptr_tuple;
03403   ibool   ret = TRUE;
03404   mtr_t   mtr;
03405   mem_heap_t* heap  = mem_heap_create(256);
03406   ulint*    offsets = NULL;
03407   ulint*    offsets2= NULL;
03408 #ifdef UNIV_ZIP_DEBUG
03409   page_zip_des_t* page_zip;
03410 #endif /* UNIV_ZIP_DEBUG */
03411 
03412   mtr_start(&mtr);
03413 
03414   mtr_x_lock(dict_index_get_lock(index), &mtr);
03415 
03416   block = btr_root_block_get(index, &mtr);
03417   page = buf_block_get_frame(block);
03418 
03419   space = dict_index_get_space(index);
03420   zip_size = dict_table_zip_size(index->table);
03421 
03422   while (level != btr_page_get_level(page, &mtr)) {
03423     const rec_t*  node_ptr;
03424 
03425     ut_a(space == buf_block_get_space(block));
03426     ut_a(space == page_get_space_id(page));
03427 #ifdef UNIV_ZIP_DEBUG
03428     page_zip = buf_block_get_page_zip(block);
03429     ut_a(!page_zip || page_zip_validate(page_zip, page));
03430 #endif /* UNIV_ZIP_DEBUG */
03431     ut_a(!page_is_leaf(page));
03432 
03433     page_cur_set_before_first(block, &cursor);
03434     page_cur_move_to_next(&cursor);
03435 
03436     node_ptr = page_cur_get_rec(&cursor);
03437     offsets = rec_get_offsets(node_ptr, index, offsets,
03438             ULINT_UNDEFINED, &heap);
03439     block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
03440     page = buf_block_get_frame(block);
03441   }
03442 
03443   /* Now we are on the desired level. Loop through the pages on that
03444   level. */
03445 loop:
03446   if (trx_is_interrupted(trx)) {
03447     mtr_commit(&mtr);
03448     mem_heap_free(heap);
03449     return(ret);
03450   }
03451   mem_heap_empty(heap);
03452   offsets = offsets2 = NULL;
03453   mtr_x_lock(dict_index_get_lock(index), &mtr);
03454 
03455 #ifdef UNIV_ZIP_DEBUG
03456   page_zip = buf_block_get_page_zip(block);
03457   ut_a(!page_zip || page_zip_validate(page_zip, page));
03458 #endif /* UNIV_ZIP_DEBUG */
03459 
03460   /* Check ordering etc. of records */
03461 
03462   if (!page_validate(page, index)) {
03463     btr_validate_report1(index, level, block);
03464 
03465     ret = FALSE;
03466   } else if (level == 0) {
03467     /* We are on level 0. Check that the records have the right
03468     number of fields, and field lengths are right. */
03469 
03470     if (!btr_index_page_validate(block, index)) {
03471 
03472       ret = FALSE;
03473     }
03474   }
03475 
03476   ut_a(btr_page_get_level(page, &mtr) == level);
03477 
03478   right_page_no = btr_page_get_next(page, &mtr);
03479   left_page_no = btr_page_get_prev(page, &mtr);
03480 
03481   ut_a(page_get_n_recs(page) > 0 || (level == 0
03482              && page_get_page_no(page)
03483              == dict_index_get_page(index)));
03484 
03485   if (right_page_no != FIL_NULL) {
03486     const rec_t*  right_rec;
03487     right_block = btr_block_get(space, zip_size, right_page_no,
03488               RW_X_LATCH, &mtr);
03489     right_page = buf_block_get_frame(right_block);
03490     if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
03491           != page_get_page_no(page))) {
03492       btr_validate_report2(index, level, block, right_block);
03493       fputs("InnoDB: broken FIL_PAGE_NEXT"
03494             " or FIL_PAGE_PREV links\n", stderr);
03495       buf_page_print(page, 0);
03496       buf_page_print(right_page, 0);
03497 
03498       ret = FALSE;
03499     }
03500 
03501     if (UNIV_UNLIKELY(page_is_comp(right_page)
03502           != page_is_comp(page))) {
03503       btr_validate_report2(index, level, block, right_block);
03504       fputs("InnoDB: 'compact' flag mismatch\n", stderr);
03505       buf_page_print(page, 0);
03506       buf_page_print(right_page, 0);
03507 
03508       ret = FALSE;
03509 
03510       goto node_ptr_fails;
03511     }
03512 
03513     rec = page_rec_get_prev(page_get_supremum_rec(page));
03514     right_rec = page_rec_get_next(page_get_infimum_rec(
03515                   right_page));
03516     offsets = rec_get_offsets(rec, index,
03517             offsets, ULINT_UNDEFINED, &heap);
03518     offsets2 = rec_get_offsets(right_rec, index,
03519              offsets2, ULINT_UNDEFINED, &heap);
03520     if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
03521                 offsets, offsets2,
03522                 index) >= 0)) {
03523 
03524       btr_validate_report2(index, level, block, right_block);
03525 
03526       fputs("InnoDB: records in wrong order"
03527             " on adjacent pages\n", stderr);
03528 
03529       buf_page_print(page, 0);
03530       buf_page_print(right_page, 0);
03531 
03532       fputs("InnoDB: record ", stderr);
03533       rec = page_rec_get_prev(page_get_supremum_rec(page));
03534       rec_print(stderr, rec, index);
03535       putc('\n', stderr);
03536       fputs("InnoDB: record ", stderr);
03537       rec = page_rec_get_next(
03538         page_get_infimum_rec(right_page));
03539       rec_print(stderr, rec, index);
03540       putc('\n', stderr);
03541 
03542       ret = FALSE;
03543     }
03544   }
03545 
03546   if (level > 0 && left_page_no == FIL_NULL) {
03547     ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
03548            page_rec_get_next(page_get_infimum_rec(page)),
03549            page_is_comp(page)));
03550   }
03551 
03552   if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
03553 
03554     /* Check father node pointers */
03555 
03556     rec_t*  node_ptr;
03557 
03558     offsets = btr_page_get_father_block(offsets, heap, index,
03559                 block, &mtr, &node_cur);
03560     father_page = btr_cur_get_page(&node_cur);
03561     node_ptr = btr_cur_get_rec(&node_cur);
03562 
03563     btr_cur_position(
03564       index, page_rec_get_prev(page_get_supremum_rec(page)),
03565       block, &node_cur);
03566     offsets = btr_page_get_father_node_ptr(offsets, heap,
03567                    &node_cur, &mtr);
03568 
03569     if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
03570         || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
03571                     offsets)
03572              != buf_block_get_page_no(block))) {
03573 
03574       btr_validate_report1(index, level, block);
03575 
03576       fputs("InnoDB: node pointer to the page is wrong\n",
03577             stderr);
03578 
03579       buf_page_print(father_page, 0);
03580       buf_page_print(page, 0);
03581 
03582       fputs("InnoDB: node ptr ", stderr);
03583       rec_print(stderr, node_ptr, index);
03584 
03585       rec = btr_cur_get_rec(&node_cur);
03586       fprintf(stderr, "\n"
03587         "InnoDB: node ptr child page n:o %lu\n",
03588         (ulong) btr_node_ptr_get_child_page_no(
03589           rec, offsets));
03590 
03591       fputs("InnoDB: record on page ", stderr);
03592       rec_print_new(stderr, rec, offsets);
03593       putc('\n', stderr);
03594       ret = FALSE;
03595 
03596       goto node_ptr_fails;
03597     }
03598 
03599     if (!page_is_leaf(page)) {
03600       node_ptr_tuple = dict_index_build_node_ptr(
03601         index,
03602         page_rec_get_next(page_get_infimum_rec(page)),
03603         0, heap, btr_page_get_level(page, &mtr));
03604 
03605       if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
03606              offsets)) {
03607         const rec_t* first_rec = page_rec_get_next(
03608           page_get_infimum_rec(page));
03609 
03610         btr_validate_report1(index, level, block);
03611 
03612         buf_page_print(father_page, 0);
03613         buf_page_print(page, 0);
03614 
03615         fputs("InnoDB: Error: node ptrs differ"
03616               " on levels > 0\n"
03617               "InnoDB: node ptr ", stderr);
03618         rec_print_new(stderr, node_ptr, offsets);
03619         fputs("InnoDB: first rec ", stderr);
03620         rec_print(stderr, first_rec, index);
03621         putc('\n', stderr);
03622         ret = FALSE;
03623 
03624         goto node_ptr_fails;
03625       }
03626     }
03627 
03628     if (left_page_no == FIL_NULL) {
03629       ut_a(node_ptr == page_rec_get_next(
03630              page_get_infimum_rec(father_page)));
03631       ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
03632     }
03633 
03634     if (right_page_no == FIL_NULL) {
03635       ut_a(node_ptr == page_rec_get_prev(
03636              page_get_supremum_rec(father_page)));
03637       ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
03638     } else {
03639       const rec_t*  right_node_ptr
03640         = page_rec_get_next(node_ptr);
03641 
03642       offsets = btr_page_get_father_block(
03643         offsets, heap, index, right_block,
03644         &mtr, &right_node_cur);
03645       if (right_node_ptr
03646           != page_get_supremum_rec(father_page)) {
03647 
03648         if (btr_cur_get_rec(&right_node_cur)
03649             != right_node_ptr) {
03650           ret = FALSE;
03651           fputs("InnoDB: node pointer to"
03652                 " the right page is wrong\n",
03653                 stderr);
03654 
03655           btr_validate_report1(index, level,
03656                    block);
03657 
03658           buf_page_print(father_page, 0);
03659           buf_page_print(page, 0);
03660           buf_page_print(right_page, 0);
03661         }
03662       } else {
03663         page_t* right_father_page
03664           = btr_cur_get_page(&right_node_cur);
03665 
03666         if (btr_cur_get_rec(&right_node_cur)
03667             != page_rec_get_next(
03668               page_get_infimum_rec(
03669                 right_father_page))) {
03670           ret = FALSE;
03671           fputs("InnoDB: node pointer 2 to"
03672                 " the right page is wrong\n",
03673                 stderr);
03674 
03675           btr_validate_report1(index, level,
03676                    block);
03677 
03678           buf_page_print(father_page, 0);
03679           buf_page_print(right_father_page, 0);
03680           buf_page_print(page, 0);
03681           buf_page_print(right_page, 0);
03682         }
03683 
03684         if (page_get_page_no(right_father_page)
03685             != btr_page_get_next(father_page, &mtr)) {
03686 
03687           ret = FALSE;
03688           fputs("InnoDB: node pointer 3 to"
03689                 " the right page is wrong\n",
03690                 stderr);
03691 
03692           btr_validate_report1(index, level,
03693                    block);
03694 
03695           buf_page_print(father_page, 0);
03696           buf_page_print(right_father_page, 0);
03697           buf_page_print(page, 0);
03698           buf_page_print(right_page, 0);
03699         }
03700       }
03701     }
03702   }
03703 
03704 node_ptr_fails:
03705   /* Commit the mini-transaction to release the latch on 'page'.
03706   Re-acquire the latch on right_page, which will become 'page'
03707   on the next loop.  The page has already been checked. */
03708   mtr_commit(&mtr);
03709 
03710   if (right_page_no != FIL_NULL) {
03711     mtr_start(&mtr);
03712 
03713     block = btr_block_get(space, zip_size, right_page_no,
03714               RW_X_LATCH, &mtr);
03715     page = buf_block_get_frame(block);
03716 
03717     goto loop;
03718   }
03719 
03720   mem_heap_free(heap);
03721   return(ret);
03722 }
03723 
03724 /**************************************************************/
03727 UNIV_INTERN
03728 ibool
03729 btr_validate_index(
03730 /*===============*/
03731   dict_index_t* index,  
03732   trx_t*    trx)  
03733 {
03734   mtr_t mtr;
03735   page_t* root;
03736   ulint i;
03737   ulint n;
03738 
03739   mtr_start(&mtr);
03740   mtr_x_lock(dict_index_get_lock(index), &mtr);
03741 
03742   root = btr_root_get(index, &mtr);
03743   n = btr_page_get_level(root, &mtr);
03744 
03745   for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
03746     if (!btr_validate_level(index, trx, n - i)) {
03747 
03748       mtr_commit(&mtr);
03749 
03750       return(FALSE);
03751     }
03752   }
03753 
03754   mtr_commit(&mtr);
03755 
03756   return(TRUE);
03757 }
03758 #endif /* !UNIV_HOTBACKUP */