Drizzled Public API Documentation

ibuf0ibuf.cc

00001 /*****************************************************************************
00002 
00003 Copyright (C) 1997, 2010, Innobase Oy. All Rights Reserved.
00004 
00005 This program is free software; you can redistribute it and/or modify it under
00006 the terms of the GNU General Public License as published by the Free Software
00007 Foundation; version 2 of the License.
00008 
00009 This program is distributed in the hope that it will be useful, but WITHOUT
00010 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00012 
00013 You should have received a copy of the GNU General Public License along with
00014 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00015 St, Fifth Floor, Boston, MA 02110-1301 USA
00016 
00017 *****************************************************************************/
00018 
00019 /**************************************************/
00026 #include "ibuf0ibuf.h"
00027 
00029 #define IBUF_BITS_PER_PAGE  4
00030 #if IBUF_BITS_PER_PAGE % 2
00031 # error "IBUF_BITS_PER_PAGE must be an even number!"
00032 #endif
00033 
00034 #define IBUF_BITMAP   PAGE_DATA
00035 
00036 #ifdef UNIV_NONINL
00037 #include "ibuf0ibuf.ic"
00038 #endif
00039 
00040 #ifndef UNIV_HOTBACKUP
00041 
00042 #include "buf0buf.h"
00043 #include "buf0rea.h"
00044 #include "fsp0fsp.h"
00045 #include "trx0sys.h"
00046 #include "fil0fil.h"
00047 #include "thr0loc.h"
00048 #include "rem0rec.h"
00049 #include "btr0cur.h"
00050 #include "btr0pcur.h"
00051 #include "btr0btr.h"
00052 #include "row0upd.h"
00053 #include "sync0sync.h"
00054 #include "dict0boot.h"
00055 #include "fut0lst.h"
00056 #include "lock0lock.h"
00057 #include "log0recv.h"
00058 #include "que0que.h"
00059 #include "srv0start.h" /* srv_shutdown_state */
00060 
00061 /*  STRUCTURE OF AN INSERT BUFFER RECORD
00062 
00063 In versions < 4.1.x:
00064 
00065 1. The first field is the page number.
00066 2. The second field is an array which stores type info for each subsequent
00067    field. We store the information which affects the ordering of records, and
00068    also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
00069    is 10 bytes.
00070 3. Next we have the fields of the actual index record.
00071 
00072 In versions >= 4.1.x:
00073 
00074 Note that contary to what we planned in the 1990's, there will only be one
00075 insert buffer tree, and that is in the system tablespace of InnoDB.
00076 
00077 1. The first field is the space id.
00078 2. The second field is a one-byte marker (0) which differentiates records from
00079    the < 4.1.x storage format.
00080 3. The third field is the page number.
00081 4. The fourth field contains the type info, where we have also added 2 bytes to
00082    store the charset. In the compressed table format of 5.0.x we must add more
00083    information here so that we can build a dummy 'index' struct which 5.0.x
00084    can use in the binary search on the index page in the ibuf merge phase.
00085 5. The rest of the fields contain the fields of the actual index record.
00086 
00087 In versions >= 5.0.3:
00088 
00089 The first byte of the fourth field is an additional marker (0) if the record
00090 is in the compact format.  The presence of this marker can be detected by
00091 looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
00092 
00093 The high-order bit of the character set field in the type info is the
00094 "nullable" flag for the field.
00095 
00096 In versions >= 5.5:
00097 
00098 The optional marker byte at the start of the fourth field is replaced by
00099 mandatory 3 fields, totaling 4 bytes:
00100 
00101  1. 2 bytes: Counter field, used to sort records within a (space id, page
00102     no) in the order they were added. This is needed so that for example the
00103     sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
00104     correctly.
00105 
00106  2. 1 byte: Operation type (see ibuf_op_t).
00107 
00108  3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
00109 
00110 To ensure older records, which do not have counters to enforce correct
00111 sorting, are merged before any new records, ibuf_insert checks if we're
00112 trying to insert to a position that contains old-style records, and if so,
00113 refuses the insert. Thus, ibuf pages are gradually converted to the new
00114 format as their corresponding buffer pool pages are read into memory.
00115 */
00116 
00117 
00118 /*  PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
00119 
00120 If an OS thread performs any operation that brings in disk pages from
00121 non-system tablespaces into the buffer pool, or creates such a page there,
00122 then the operation may have as a side effect an insert buffer index tree
00123 compression. Thus, the tree latch of the insert buffer tree may be acquired
00124 in the x-mode, and also the file space latch of the system tablespace may
00125 be acquired in the x-mode.
00126 
00127 Also, an insert to an index in a non-system tablespace can have the same
00128 effect. How do we know this cannot lead to a deadlock of OS threads? There
00129 is a problem with the i\o-handler threads: they break the latching order
00130 because they own x-latches to pages which are on a lower level than the
00131 insert buffer tree latch, its page latches, and the tablespace latch an
00132 insert buffer operation can reserve.
00133 
00134 The solution is the following: Let all the tree and page latches connected
00135 with the insert buffer be later in the latching order than the fsp latch and
00136 fsp page latches.
00137 
00138 Insert buffer pages must be such that the insert buffer is never invoked
00139 when these pages are accessed as this would result in a recursion violating
00140 the latching order. We let a special i/o-handler thread take care of i/o to
00141 the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
00142 pages and the first inode page, which contains the inode of the ibuf tree: let
00143 us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
00144 access both non-ibuf and ibuf pages.
00145 
00146 Then an i/o-handler for the insert buffer never needs to access recursively the
00147 insert buffer tree and thus obeys the latching order. On the other hand, other
00148 i/o-handlers for other tablespaces may require access to the insert buffer,
00149 but because all kinds of latches they need to access there are later in the
00150 latching order, no violation of the latching order occurs in this case,
00151 either.
00152 
00153 A problem is how to grow and contract an insert buffer tree. As it is later
00154 in the latching order than the fsp management, we have to reserve the fsp
00155 latch first, before adding or removing pages from the insert buffer tree.
00156 We let the insert buffer tree have its own file space management: a free
00157 list of pages linked to the tree root. To prevent recursive using of the
00158 insert buffer when adding pages to the tree, we must first load these pages
00159 to memory, obtaining a latch on them, and only after that add them to the
00160 free list of the insert buffer tree. More difficult is removing of pages
00161 from the free list. If there is an excess of pages in the free list of the
00162 ibuf tree, they might be needed if some thread reserves the fsp latch,
00163 intending to allocate more file space. So we do the following: if a thread
00164 reserves the fsp latch, we check the writer count field of the latch. If
00165 this field has value 1, it means that the thread did not own the latch
00166 before entering the fsp system, and the mtr of the thread contains no
00167 modifications to the fsp pages. Now we are free to reserve the ibuf latch,
00168 and check if there is an excess of pages in the free list. We can then, in a
00169 separate mini-transaction, take them out of the free list and free them to
00170 the fsp system.
00171 
00172 To avoid deadlocks in the ibuf system, we divide file pages into three levels:
00173 
00174 (1) non-ibuf pages,
00175 (2) ibuf tree pages and the pages in the ibuf tree free list, and
00176 (3) ibuf bitmap pages.
00177 
00178 No OS thread is allowed to access higher level pages if it has latches to
00179 lower level pages; even if the thread owns a B-tree latch it must not access
00180 the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
00181 is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
00182 exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
00183 level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
00184 it uses synchronous aio, it can access any pages, as long as it obeys the
00185 access order rules. */
00186 
00188 #define IBUF_POOL_SIZE_PER_MAX_SIZE 2
00189 
00191 #define IBUF_TABLE_NAME   "SYS_IBUF_TABLE"
00192 
00194 UNIV_INTERN ibuf_use_t  ibuf_use    = IBUF_USE_ALL;
00195 
00196 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
00197 
00198 UNIV_INTERN uint  ibuf_debug;
00199 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
00200 
00202 UNIV_INTERN ibuf_t* ibuf      = NULL;
00203 
00205 UNIV_INTERN ulint ibuf_flush_count  = 0;
00206 
00207 #ifdef UNIV_PFS_MUTEX
00208 UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
00209 UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key;
00210 UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key;
00211 #endif /* UNIV_PFS_MUTEX */
00212 
00213 #ifdef UNIV_IBUF_COUNT_DEBUG
00214 
00215 #define IBUF_COUNT_N_SPACES 4
00216 
00217 #define IBUF_COUNT_N_PAGES  130000
00218 
00220 static ulint  ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
00221 
00222 /******************************************************************/
00224 UNIV_INLINE
00225 void
00226 ibuf_count_check(
00227 /*=============*/
00228   ulint space_id, 
00229   ulint page_no)  
00230 {
00231   if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
00232     return;
00233   }
00234 
00235   fprintf(stderr,
00236     "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
00237     "InnoDB: and breaks crash recovery.\n"
00238     "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
00239     "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
00240     (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
00241     (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
00242   ut_error;
00243 }
00244 #endif
00245 
00247 /* @{ */
00248 #define IBUF_BITMAP_FREE  0 
00250 #define IBUF_BITMAP_BUFFERED  2 
00252 #define IBUF_BITMAP_IBUF  3 
00256 /* @} */
00257 
00258 /* Various constants for checking the type of an ibuf record and extracting
00259 data from it. For details, see the description of the record format at the
00260 top of this file. */
00261 
00265 /* @{ */
00266 #define IBUF_REC_INFO_SIZE  4 
00268 #if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
00269 # error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
00270 #endif
00271 
00272 /* Offsets for the fields at the beginning of the fourth field */
00273 #define IBUF_REC_OFFSET_COUNTER 0 
00274 #define IBUF_REC_OFFSET_TYPE  2 
00275 #define IBUF_REC_OFFSET_FLAGS 3 
00277 /* Record flag masks */
00278 #define IBUF_REC_COMPACT  0x1 
00285 static mutex_t  ibuf_pessimistic_insert_mutex;
00286 
00288 static mutex_t  ibuf_mutex;
00289 
00291 static mutex_t  ibuf_bitmap_mutex;
00292 
00294 #define IBUF_MERGE_AREA     8
00295 
00299 #define IBUF_MERGE_THRESHOLD    4
00300 
00303 #define IBUF_MAX_N_PAGES_MERGED   IBUF_MERGE_AREA
00304 
00308 #define IBUF_CONTRACT_ON_INSERT_NON_SYNC  0
00309 
00313 #define IBUF_CONTRACT_ON_INSERT_SYNC    5
00314 
00318 #define IBUF_CONTRACT_DO_NOT_INSERT   10
00319 
00320 /* TODO: how to cope with drop table if there are records in the insert
00321 buffer for the indexes of the table? Is there actually any problem,
00322 because ibuf merge is done to a page when it is read in, and it is
00323 still physically like the index page even if the index would have been
00324 dropped! So, there seems to be no problem. */
00325 
00326 /******************************************************************/
00329 UNIV_INLINE
00330 void
00331 ibuf_enter(void)
00332 /*============*/
00333 {
00334   ibool*  ptr;
00335 
00336   ptr = thr_local_get_in_ibuf_field();
00337 
00338   ut_ad(*ptr == FALSE);
00339 
00340   *ptr = TRUE;
00341 }
00342 
00343 /******************************************************************/
00346 UNIV_INLINE
00347 void
00348 ibuf_exit(void)
00349 /*===========*/
00350 {
00351   ibool*  ptr;
00352 
00353   ptr = thr_local_get_in_ibuf_field();
00354 
00355   ut_ad(*ptr == TRUE);
00356 
00357   *ptr = FALSE;
00358 }
00359 
00360 /******************************************************************/
00367 UNIV_INTERN
00368 ibool
00369 ibuf_inside(void)
00370 /*=============*/
00371 {
00372   return(*thr_local_get_in_ibuf_field());
00373 }
00374 
00375 /******************************************************************/
00378 static
00379 page_t*
00380 ibuf_header_page_get(
00381 /*=================*/
00382   mtr_t*  mtr)  
00383 {
00384   buf_block_t*  block;
00385 
00386   ut_ad(!ibuf_inside());
00387 
00388   block = buf_page_get(
00389     IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
00390   buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
00391 
00392   return(buf_block_get_frame(block));
00393 }
00394 
00395 /******************************************************************/
00398 static
00399 page_t*
00400 ibuf_tree_root_get(
00401 /*===============*/
00402   mtr_t*    mtr)  
00403 {
00404   buf_block_t*  block;
00405   page_t*   root;
00406 
00407   ut_ad(ibuf_inside());
00408   ut_ad(mutex_own(&ibuf_mutex));
00409 
00410   mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
00411 
00412   block = buf_page_get(
00413     IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
00414 
00415   buf_block_dbg_add_level(block, SYNC_TREE_NODE);
00416 
00417   root = buf_block_get_frame(block);
00418 
00419   ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
00420   ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
00421   ut_ad(ibuf->empty == (page_get_n_recs(root) == 0));
00422 
00423   return(root);
00424 }
00425 
00426 #ifdef UNIV_IBUF_COUNT_DEBUG
00427 /******************************************************************/
00431 UNIV_INTERN
00432 ulint
00433 ibuf_count_get(
00434 /*===========*/
00435   ulint space,  
00436   ulint page_no)
00437 {
00438   ibuf_count_check(space, page_no);
00439 
00440   return(ibuf_counts[space][page_no]);
00441 }
00442 
00443 /******************************************************************/
00445 static
00446 void
00447 ibuf_count_set(
00448 /*===========*/
00449   ulint space,  
00450   ulint page_no,
00451   ulint val)  
00452 {
00453   ibuf_count_check(space, page_no);
00454   ut_a(val < UNIV_PAGE_SIZE);
00455 
00456   ibuf_counts[space][page_no] = val;
00457 }
00458 #endif
00459 
00460 /******************************************************************/
00462 UNIV_INTERN
00463 void
00464 ibuf_close(void)
00465 /*============*/
00466 {
00467   mutex_free(&ibuf_pessimistic_insert_mutex);
00468   memset(&ibuf_pessimistic_insert_mutex,
00469          0x0, sizeof(ibuf_pessimistic_insert_mutex));
00470 
00471   mutex_free(&ibuf_mutex);
00472   memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
00473 
00474   mutex_free(&ibuf_bitmap_mutex);
00475   memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
00476 
00477   mem_free(ibuf);
00478   ibuf = NULL;
00479 }
00480 
00481 /******************************************************************/
00484 static
00485 void
00486 ibuf_size_update(
00487 /*=============*/
00488   const page_t* root, 
00489   mtr_t*    mtr)  
00490 {
00491   ut_ad(mutex_own(&ibuf_mutex));
00492 
00493   ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
00494              + PAGE_BTR_IBUF_FREE_LIST, mtr);
00495 
00496   ibuf->height = 1 + btr_page_get_level(root, mtr);
00497 
00498   /* the '1 +' is the ibuf header page */
00499   ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
00500 }
00501 
00502 /******************************************************************/
00505 UNIV_INTERN
00506 void
00507 ibuf_init_at_db_start(void)
00508 /*=======================*/
00509 {
00510   page_t*   root;
00511   mtr_t   mtr;
00512   dict_table_t* table;
00513   mem_heap_t* heap;
00514   dict_index_t* index;
00515   ulint   n_used;
00516   page_t*   header_page;
00517   ulint   error;
00518 
00519   ibuf = static_cast<ibuf_t *>(mem_alloc(sizeof(ibuf_t)));
00520 
00521   memset(ibuf, 0, sizeof(*ibuf));
00522 
00523   /* Note that also a pessimistic delete can sometimes make a B-tree
00524   grow in size, as the references on the upper levels of the tree can
00525   change */
00526 
00527   ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
00528     / IBUF_POOL_SIZE_PER_MAX_SIZE;
00529 
00530   mutex_create(ibuf_pessimistic_insert_mutex_key,
00531          &ibuf_pessimistic_insert_mutex,
00532          SYNC_IBUF_PESS_INSERT_MUTEX);
00533 
00534   mutex_create(ibuf_mutex_key,
00535          &ibuf_mutex, SYNC_IBUF_MUTEX);
00536 
00537   mutex_create(ibuf_bitmap_mutex_key,
00538          &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
00539 
00540   mtr_start(&mtr);
00541 
00542   mutex_enter(&ibuf_mutex);
00543 
00544   mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
00545 
00546   header_page = ibuf_header_page_get(&mtr);
00547 
00548   fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
00549             &n_used, &mtr);
00550   ibuf_enter();
00551 
00552   ut_ad(n_used >= 2);
00553 
00554   ibuf->seg_size = n_used;
00555 
00556   {
00557     buf_block_t*  block;
00558 
00559     block = buf_page_get(
00560       IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
00561       RW_X_LATCH, &mtr);
00562     buf_block_dbg_add_level(block, SYNC_TREE_NODE);
00563 
00564     root = buf_block_get_frame(block);
00565   }
00566 
00567   ibuf_size_update(root, &mtr);
00568   mutex_exit(&ibuf_mutex);
00569 
00570   ibuf->empty = (page_get_n_recs(root) == 0);
00571   mtr_commit(&mtr);
00572 
00573   ibuf_exit();
00574 
00575   heap = mem_heap_create(450);
00576 
00577   /* Use old-style record format for the insert buffer. */
00578   table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
00579 
00580   dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
00581 
00582   table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
00583 
00584   dict_table_add_to_cache(table, heap);
00585   mem_heap_free(heap);
00586 
00587   index = dict_mem_index_create(
00588     IBUF_TABLE_NAME, "CLUST_IND",
00589     IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
00590 
00591   dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
00592 
00593   index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
00594 
00595   error = dict_index_add_to_cache(table, index,
00596           FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
00597   ut_a(error == DB_SUCCESS);
00598 
00599   ibuf->index = dict_table_get_first_index(table);
00600 }
00601 #endif /* !UNIV_HOTBACKUP */
00602 /*********************************************************************/
00604 UNIV_INTERN
00605 void
00606 ibuf_bitmap_page_init(
00607 /*==================*/
00608   buf_block_t*  block,  
00609   mtr_t*    mtr)  
00610 {
00611   page_t* page;
00612   ulint byte_offset;
00613   ulint zip_size = buf_block_get_zip_size(block);
00614 
00615   ut_a(ut_is_2pow(zip_size));
00616 
00617   page = buf_block_get_frame(block);
00618   fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
00619 
00620   /* Write all zeros to the bitmap */
00621 
00622   if (!zip_size) {
00623     byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
00624                  * IBUF_BITS_PER_PAGE);
00625   } else {
00626     byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
00627   }
00628 
00629   memset(page + IBUF_BITMAP, 0, byte_offset);
00630 
00631   /* The remaining area (up to the page trailer) is uninitialized. */
00632 
00633 #ifndef UNIV_HOTBACKUP
00634   mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
00635 #endif /* !UNIV_HOTBACKUP */
00636 }
00637 
00638 /*********************************************************************/
00641 UNIV_INTERN
00642 byte*
00643 ibuf_parse_bitmap_init(
00644 /*===================*/
00645   byte*   ptr,  
00646   byte*   /*end_ptr __attribute__((unused))*/, 
00647   buf_block_t*  block,  
00648   mtr_t*    mtr)  
00649 {
00650   ut_ad(ptr && end_ptr);
00651 
00652   if (block) {
00653     ibuf_bitmap_page_init(block, mtr);
00654   }
00655 
00656   return(ptr);
00657 }
00658 #ifndef UNIV_HOTBACKUP
00659 /********************************************************************/
00662 UNIV_INLINE
00663 ulint
00664 ibuf_bitmap_page_get_bits(
00665 /*======================*/
00666   const page_t* page, 
00667   ulint   page_no,
00668   ulint   zip_size,
00670   ulint   bit,  
00672   mtr_t*    /*mtr __attribute__((unused))*/)
00675 {
00676   ulint byte_offset;
00677   ulint bit_offset;
00678   ulint map_byte;
00679   ulint value;
00680 
00681   ut_ad(bit < IBUF_BITS_PER_PAGE);
00682 #if IBUF_BITS_PER_PAGE % 2
00683 # error "IBUF_BITS_PER_PAGE % 2 != 0"
00684 #endif
00685   ut_ad(ut_is_2pow(zip_size));
00686   ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
00687 
00688   if (!zip_size) {
00689     bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
00690       + bit;
00691   } else {
00692     bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
00693       + bit;
00694   }
00695 
00696   byte_offset = bit_offset / 8;
00697   bit_offset = bit_offset % 8;
00698 
00699   ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
00700 
00701   map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
00702 
00703   value = ut_bit_get_nth(map_byte, bit_offset);
00704 
00705   if (bit == IBUF_BITMAP_FREE) {
00706     ut_ad(bit_offset + 1 < 8);
00707 
00708     value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
00709   }
00710 
00711   return(value);
00712 }
00713 
00714 /********************************************************************/
00716 static
00717 void
00718 ibuf_bitmap_page_set_bits(
00719 /*======================*/
00720   page_t* page, 
00721   ulint page_no,
00722   ulint zip_size,
00724   ulint bit,  
00725   ulint val,  
00726   mtr_t*  mtr)  
00727 {
00728   ulint byte_offset;
00729   ulint bit_offset;
00730   ulint map_byte;
00731 
00732   ut_ad(bit < IBUF_BITS_PER_PAGE);
00733 #if IBUF_BITS_PER_PAGE % 2
00734 # error "IBUF_BITS_PER_PAGE % 2 != 0"
00735 #endif
00736   ut_ad(ut_is_2pow(zip_size));
00737   ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
00738 #ifdef UNIV_IBUF_COUNT_DEBUG
00739   ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
00740        || (0 == ibuf_count_get(page_get_space_id(page),
00741              page_no)));
00742 #endif
00743   if (!zip_size) {
00744     bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
00745       + bit;
00746   } else {
00747     bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
00748       + bit;
00749   }
00750 
00751   byte_offset = bit_offset / 8;
00752   bit_offset = bit_offset % 8;
00753 
00754   ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
00755 
00756   map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
00757 
00758   if (bit == IBUF_BITMAP_FREE) {
00759     ut_ad(bit_offset + 1 < 8);
00760     ut_ad(val <= 3);
00761 
00762     map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
00763     map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
00764   } else {
00765     ut_ad(val <= 1);
00766     map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
00767   }
00768 
00769   mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
00770        MLOG_1BYTE, mtr);
00771 }
00772 
00773 /********************************************************************/
00776 UNIV_INLINE
00777 ulint
00778 ibuf_bitmap_page_no_calc(
00779 /*=====================*/
00780   ulint zip_size, 
00782   ulint page_no)  
00783 {
00784   ut_ad(ut_is_2pow(zip_size));
00785 
00786   if (!zip_size) {
00787     return(FSP_IBUF_BITMAP_OFFSET
00788            + (page_no & ~(UNIV_PAGE_SIZE - 1)));
00789   } else {
00790     return(FSP_IBUF_BITMAP_OFFSET
00791            + (page_no & ~(zip_size - 1)));
00792   }
00793 }
00794 
00795 /********************************************************************/
00801 static
00802 page_t*
00803 ibuf_bitmap_get_map_page_func(
00804 /*==========================*/
00805   ulint   space,  
00806   ulint   page_no,
00807   ulint   zip_size,
00809   const char* file, 
00810   ulint   line, 
00811   mtr_t*    mtr)  
00812 {
00813   buf_block_t*  block;
00814 
00815   block = buf_page_get_gen(space, zip_size,
00816          ibuf_bitmap_page_no_calc(zip_size, page_no),
00817          RW_X_LATCH, NULL, BUF_GET,
00818          file, line, mtr);
00819   buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
00820 
00821   return(buf_block_get_frame(block));
00822 }
00823 
00824 /********************************************************************/
00834 #define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr)   \
00835   ibuf_bitmap_get_map_page_func(space, page_no, zip_size,   \
00836               __FILE__, __LINE__, mtr)
00837 
00838 /************************************************************************/
00843 UNIV_INLINE
00844 void
00845 ibuf_set_free_bits_low(
00846 /*===================*/
00847   ulint     zip_size,
00849   const buf_block_t*  block,  
00852   ulint     val,  
00853   mtr_t*      mtr)  
00854 {
00855   page_t* bitmap_page;
00856   ulint space;
00857   ulint page_no;
00858 
00859   if (!page_is_leaf(buf_block_get_frame(block))) {
00860 
00861     return;
00862   }
00863 
00864   space = buf_block_get_space(block);
00865   page_no = buf_block_get_page_no(block);
00866   bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
00867 #ifdef UNIV_IBUF_DEBUG
00868 # if 0
00869   fprintf(stderr,
00870     "Setting space %lu page %lu free bits to %lu should be %lu\n",
00871     space, page_no, val,
00872     ibuf_index_page_calc_free(zip_size, block));
00873 # endif
00874 
00875   ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
00876 #endif /* UNIV_IBUF_DEBUG */
00877   ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
00878           IBUF_BITMAP_FREE, val, mtr);
00879 }
00880 
00881 /************************************************************************/
00886 UNIV_INTERN
00887 void
00888 ibuf_set_free_bits_func(
00889 /*====================*/
00890   buf_block_t*  block,  
00892 #ifdef UNIV_IBUF_DEBUG
00893   ulint   max_val,
00896 #endif /* UNIV_IBUF_DEBUG */
00897   ulint   val)  
00898 {
00899   mtr_t mtr;
00900   page_t* page;
00901   page_t* bitmap_page;
00902   ulint space;
00903   ulint page_no;
00904   ulint zip_size;
00905 
00906   page = buf_block_get_frame(block);
00907 
00908   if (!page_is_leaf(page)) {
00909 
00910     return;
00911   }
00912 
00913   mtr_start(&mtr);
00914 
00915   space = buf_block_get_space(block);
00916   page_no = buf_block_get_page_no(block);
00917   zip_size = buf_block_get_zip_size(block);
00918   bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
00919 
00920 #ifdef UNIV_IBUF_DEBUG
00921   if (max_val != ULINT_UNDEFINED) {
00922     ulint old_val;
00923 
00924     old_val = ibuf_bitmap_page_get_bits(
00925       bitmap_page, page_no, zip_size,
00926       IBUF_BITMAP_FREE, &mtr);
00927 # if 0
00928     if (old_val != max_val) {
00929       fprintf(stderr,
00930         "Ibuf: page %lu old val %lu max val %lu\n",
00931         page_get_page_no(page),
00932         old_val, max_val);
00933     }
00934 # endif
00935 
00936     ut_a(old_val <= max_val);
00937   }
00938 # if 0
00939   fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
00940     page_get_page_no(page), val,
00941     ibuf_index_page_calc_free(zip_size, block));
00942 # endif
00943 
00944   ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
00945 #endif /* UNIV_IBUF_DEBUG */
00946   ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
00947           IBUF_BITMAP_FREE, val, &mtr);
00948   mtr_commit(&mtr);
00949 }
00950 
00951 /************************************************************************/
00960 UNIV_INTERN
00961 void
00962 ibuf_reset_free_bits(
00963 /*=================*/
00964   buf_block_t*  block)  
00967 {
00968   ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
00969 }
00970 
00971 /**********************************************************************/
00979 UNIV_INTERN
00980 void
00981 ibuf_update_free_bits_low(
00982 /*======================*/
00983   const buf_block_t*  block,    
00984   ulint     max_ins_size, 
00989   mtr_t*      mtr)    
00990 {
00991   ulint before;
00992   ulint after;
00993 
00994   ut_a(!buf_block_get_page_zip(block));
00995 
00996   before = ibuf_index_page_calc_free_bits(0, max_ins_size);
00997 
00998   after = ibuf_index_page_calc_free(0, block);
00999 
01000   /* This approach cannot be used on compressed pages, since the
01001   computed value of "before" often does not match the current
01002   state of the bitmap.  This is because the free space may
01003   increase or decrease when a compressed page is reorganized. */
01004   if (before != after) {
01005     ibuf_set_free_bits_low(0, block, after, mtr);
01006   }
01007 }
01008 
01009 /**********************************************************************/
01017 UNIV_INTERN
01018 void
01019 ibuf_update_free_bits_zip(
01020 /*======================*/
01021   buf_block_t*  block,  
01022   mtr_t*    mtr)  
01023 {
01024   page_t* bitmap_page;
01025   ulint space;
01026   ulint page_no;
01027   ulint zip_size;
01028   ulint after;
01029 
01030   space = buf_block_get_space(block);
01031   page_no = buf_block_get_page_no(block);
01032   zip_size = buf_block_get_zip_size(block);
01033 
01034   ut_a(page_is_leaf(buf_block_get_frame(block)));
01035   ut_a(zip_size);
01036 
01037   bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
01038 
01039   after = ibuf_index_page_calc_free_zip(zip_size, block);
01040 
01041   if (after == 0) {
01042     /* We move the page to the front of the buffer pool LRU list:
01043     the purpose of this is to prevent those pages to which we
01044     cannot make inserts using the insert buffer from slipping
01045     out of the buffer pool */
01046 
01047     buf_page_make_young(&block->page);
01048   }
01049 
01050   ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
01051           IBUF_BITMAP_FREE, after, mtr);
01052 }
01053 
01054 /**********************************************************************/
01061 UNIV_INTERN
01062 void
01063 ibuf_update_free_bits_for_two_pages_low(
01064 /*====================================*/
01065   ulint   zip_size,
01067   buf_block_t*  block1, 
01068   buf_block_t*  block2, 
01069   mtr_t*    mtr)  
01070 {
01071   ulint state;
01072 
01073   /* As we have to x-latch two random bitmap pages, we have to acquire
01074   the bitmap mutex to prevent a deadlock with a similar operation
01075   performed by another OS thread. */
01076 
01077   mutex_enter(&ibuf_bitmap_mutex);
01078 
01079   state = ibuf_index_page_calc_free(zip_size, block1);
01080 
01081   ibuf_set_free_bits_low(zip_size, block1, state, mtr);
01082 
01083   state = ibuf_index_page_calc_free(zip_size, block2);
01084 
01085   ibuf_set_free_bits_low(zip_size, block2, state, mtr);
01086 
01087   mutex_exit(&ibuf_bitmap_mutex);
01088 }
01089 
01090 /**********************************************************************/
01093 UNIV_INLINE
01094 ibool
01095 ibuf_fixed_addr_page(
01096 /*=================*/
01097   ulint space,  
01098   ulint zip_size,
01100   ulint page_no)
01101 {
01102   return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
01103          || ibuf_bitmap_page(zip_size, page_no));
01104 }
01105 
01106 /***********************************************************************/
01110 UNIV_INTERN
01111 ibool
01112 ibuf_page(
01113 /*======*/
01114   ulint space,  
01115   ulint zip_size,
01116   ulint page_no,
01117   mtr_t*  mtr)  
01121 {
01122   ibool ret;
01123   mtr_t local_mtr;
01124   page_t* bitmap_page;
01125 
01126   ut_ad(!recv_no_ibuf_operations);
01127 
01128   if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
01129 
01130     return(TRUE);
01131   } else if (space != IBUF_SPACE_ID) {
01132 
01133     return(FALSE);
01134   }
01135 
01136   ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
01137 
01138   if (mtr == NULL) {
01139     mtr = &local_mtr;
01140     mtr_start(mtr);
01141   }
01142 
01143   bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
01144 
01145   ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
01146           IBUF_BITMAP_IBUF, mtr);
01147 
01148   if (mtr == &local_mtr) {
01149     mtr_commit(mtr);
01150   }
01151 
01152   return(ret);
01153 }
01154 
01155 /********************************************************************/
01158 static
01159 ulint
01160 ibuf_rec_get_page_no(
01161 /*=================*/
01162   const rec_t*  rec)  
01163 {
01164   const byte* field;
01165   ulint   len;
01166 
01167   ut_ad(ibuf_inside());
01168   ut_ad(rec_get_n_fields_old(rec) > 2);
01169 
01170   field = rec_get_nth_field_old(rec, 1, &len);
01171 
01172   if (len == 1) {
01173     /* This is of the >= 4.1.x record format */
01174     ut_a(trx_sys_multiple_tablespace_format);
01175 
01176     field = rec_get_nth_field_old(rec, 2, &len);
01177   } else {
01178     ut_a(trx_doublewrite_must_reset_space_ids);
01179     ut_a(!trx_sys_multiple_tablespace_format);
01180 
01181     field = rec_get_nth_field_old(rec, 0, &len);
01182   }
01183 
01184   ut_a(len == 4);
01185 
01186   return(mach_read_from_4(field));
01187 }
01188 
01189 /********************************************************************/
01193 static
01194 ulint
01195 ibuf_rec_get_space(
01196 /*===============*/
01197   const rec_t*  rec)  
01198 {
01199   const byte* field;
01200   ulint   len;
01201 
01202   ut_ad(ibuf_inside());
01203   ut_ad(rec_get_n_fields_old(rec) > 2);
01204 
01205   field = rec_get_nth_field_old(rec, 1, &len);
01206 
01207   if (len == 1) {
01208     /* This is of the >= 4.1.x record format */
01209 
01210     ut_a(trx_sys_multiple_tablespace_format);
01211     field = rec_get_nth_field_old(rec, 0, &len);
01212     ut_a(len == 4);
01213 
01214     return(mach_read_from_4(field));
01215   }
01216 
01217   ut_a(trx_doublewrite_must_reset_space_ids);
01218   ut_a(!trx_sys_multiple_tablespace_format);
01219 
01220   return(0);
01221 }
01222 
01223 /****************************************************************/
01225 static
01226 void
01227 ibuf_rec_get_info(
01228 /*==============*/
01229   const rec_t*  rec,    
01230   ibuf_op_t*  op,   
01231   ibool*    comp,   
01232   ulint*    info_len, 
01235   ulint*    counter)  
01236 {
01237   const byte* types;
01238   ulint   fields;
01239   ulint   len;
01240 
01241   /* Local variables to shadow arguments. */
01242   ibuf_op_t op_local;
01243   ibool   comp_local;
01244   ulint   info_len_local;
01245   ulint   counter_local;
01246 
01247   ut_ad(ibuf_inside());
01248   fields = rec_get_n_fields_old(rec);
01249   ut_a(fields > 4);
01250 
01251   types = rec_get_nth_field_old(rec, 3, &len);
01252 
01253   info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
01254 
01255   switch (info_len_local) {
01256   case 0:
01257   case 1:
01258     op_local = IBUF_OP_INSERT;
01259     comp_local = info_len_local;
01260     ut_ad(!counter);
01261     counter_local = ULINT_UNDEFINED;
01262     break;
01263 
01264   case IBUF_REC_INFO_SIZE:
01265     op_local = (ibuf_op_t)types[IBUF_REC_OFFSET_TYPE];
01266     comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
01267     counter_local = mach_read_from_2(
01268       types + IBUF_REC_OFFSET_COUNTER);
01269     break;
01270 
01271   default:
01272     ut_error;
01273   }
01274 
01275   ut_a(op_local < IBUF_OP_COUNT);
01276   ut_a((len - info_len_local) ==
01277        (fields - 4) * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
01278 
01279   if (op) {
01280     *op = op_local;
01281   }
01282 
01283   if (comp) {
01284     *comp = comp_local;
01285   }
01286 
01287   if (info_len) {
01288     *info_len = info_len_local;
01289   }
01290 
01291   if (counter) {
01292     *counter = counter_local;
01293   }
01294 }
01295 
01296 /****************************************************************/
01299 static
01300 ibuf_op_t
01301 ibuf_rec_get_op_type(
01302 /*=================*/
01303   const rec_t*  rec)  
01304 {
01305   ulint   len;
01306 
01307   ut_ad(ibuf_inside());
01308   ut_ad(rec_get_n_fields_old(rec) > 2);
01309 
01310   (void) rec_get_nth_field_old(rec, 1, &len);
01311 
01312   if (len > 1) {
01313     /* This is a < 4.1.x format record */
01314 
01315     return(IBUF_OP_INSERT);
01316   } else {
01317     ibuf_op_t op;
01318 
01319     ibuf_rec_get_info(rec, &op, NULL, NULL, NULL);
01320 
01321     return(op);
01322   }
01323 }
01324 
01325 /****************************************************************/
01330 UNIV_INTERN
01331 ulint
01332 ibuf_rec_get_counter(
01333 /*=================*/
01334   const rec_t*  rec)  
01335 {
01336   const byte* ptr;
01337   ulint   len;
01338 
01339   if (rec_get_n_fields_old(rec) < 4) {
01340 
01341     return(ULINT_UNDEFINED);
01342   }
01343 
01344   ptr = rec_get_nth_field_old(rec, 3, &len);
01345 
01346   if (len >= 2) {
01347 
01348     return(mach_read_from_2(ptr));
01349   } else {
01350 
01351     return(ULINT_UNDEFINED);
01352   }
01353 }
01354 
01355 /****************************************************************/
01358 static
01359 void
01360 ibuf_add_ops(
01361 /*=========*/
01362   ulint*    arr,  
01363   const ulint*  ops)  
01365 {
01366   ulint i;
01367 
01368 #ifndef HAVE_ATOMIC_BUILTINS
01369   ut_ad(mutex_own(&ibuf_mutex));
01370 #endif /* !HAVE_ATOMIC_BUILTINS */
01371 
01372   for (i = 0; i < IBUF_OP_COUNT; i++) {
01373 #ifdef HAVE_ATOMIC_BUILTINS
01374     os_atomic_increment_ulint(&arr[i], ops[i]);
01375 #else /* HAVE_ATOMIC_BUILTINS */
01376     arr[i] += ops[i];
01377 #endif /* HAVE_ATOMIC_BUILTINS */
01378   }
01379 }
01380 
01381 /****************************************************************/
01383 static
01384 void
01385 ibuf_print_ops(
01386 /*===========*/
01387   const ulint*  ops,  
01388   FILE*   file) 
01389 {
01390   static const char* op_names[] = {
01391     "insert",
01392     "delete mark",
01393     "delete"
01394   };
01395   ulint i;
01396 
01397   ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
01398 
01399   for (i = 0; i < IBUF_OP_COUNT; i++) {
01400     fprintf(file, "%s %lu%s", op_names[i],
01401       (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
01402   }
01403 
01404   putc('\n', file);
01405 }
01406 
01407 /********************************************************************/
01410 static
01411 dict_index_t*
01412 ibuf_dummy_index_create(
01413 /*====================*/
01414   ulint   n,  
01415   ibool   comp) 
01416 {
01417   dict_table_t* table;
01418   dict_index_t* index;
01419 
01420   table = dict_mem_table_create("IBUF_DUMMY",
01421               DICT_HDR_SPACE, n,
01422               comp ? DICT_TF_COMPACT : 0);
01423 
01424   index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
01425               DICT_HDR_SPACE, 0, n);
01426 
01427   index->table = table;
01428 
01429   /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
01430   index->cached = TRUE;
01431 
01432   return(index);
01433 }
01434 /********************************************************************/
01436 static
01437 void
01438 ibuf_dummy_index_add_col(
01439 /*=====================*/
01440   dict_index_t* index,  
01441   const dtype_t*  type, 
01442   ulint   len)  
01443 {
01444   ulint i = index->table->n_def;
01445   dict_mem_table_add_col(index->table, NULL, NULL,
01446              dtype_get_mtype(type),
01447              dtype_get_prtype(type),
01448              dtype_get_len(type));
01449   dict_index_add_col(index, index->table,
01450          dict_table_get_nth_col(index->table, i), len);
01451 }
01452 /********************************************************************/
01454 static
01455 void
01456 ibuf_dummy_index_free(
01457 /*==================*/
01458   dict_index_t* index)  
01459 {
01460   dict_table_t* table = index->table;
01461 
01462   dict_mem_index_free(index);
01463   dict_mem_table_free(table);
01464 }
01465 
01466 /*********************************************************************/
01474 UNIV_INLINE
01475 dtuple_t*
01476 ibuf_build_entry_pre_4_1_x(
01477 /*=======================*/
01478   const rec_t*  ibuf_rec, 
01479   mem_heap_t* heap,   
01480   dict_index_t**  pindex)   
01482 {
01483   ulint   i;
01484   ulint   len;
01485   const byte* types;
01486   dtuple_t* tuple;
01487   ulint   n_fields;
01488 
01489   ut_a(trx_doublewrite_must_reset_space_ids);
01490   ut_a(!trx_sys_multiple_tablespace_format);
01491 
01492   n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
01493   tuple = dtuple_create(heap, n_fields);
01494   types = rec_get_nth_field_old(ibuf_rec, 1, &len);
01495 
01496   ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
01497 
01498   for (i = 0; i < n_fields; i++) {
01499     const byte* data;
01500     dfield_t* field;
01501 
01502     field = dtuple_get_nth_field(tuple, i);
01503 
01504     data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
01505 
01506     dfield_set_data(field, data, len);
01507 
01508     dtype_read_for_order_and_null_size(
01509       dfield_get_type(field),
01510       types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
01511   }
01512 
01513   *pindex = ibuf_dummy_index_create(n_fields, FALSE);
01514 
01515   return(tuple);
01516 }
01517 
01518 /*********************************************************************/
01534 static
01535 dtuple_t*
01536 ibuf_build_entry_from_ibuf_rec(
01537 /*===========================*/
01538   const rec_t*  ibuf_rec, 
01539   mem_heap_t* heap,   
01540   dict_index_t**  pindex)   
01542 {
01543   dtuple_t* tuple;
01544   dfield_t* field;
01545   ulint   n_fields;
01546   const byte* types;
01547   const byte* data;
01548   ulint   len;
01549   ulint   info_len;
01550   ulint   i;
01551   ulint   comp;
01552   dict_index_t* index;
01553 
01554   data = rec_get_nth_field_old(ibuf_rec, 1, &len);
01555 
01556   if (len > 1) {
01557     /* This a < 4.1.x format record */
01558 
01559     return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
01560   }
01561 
01562   /* This a >= 4.1.x format record */
01563 
01564   ut_a(trx_sys_multiple_tablespace_format);
01565   ut_a(*data == 0);
01566   ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
01567 
01568   n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
01569 
01570   tuple = dtuple_create(heap, n_fields);
01571 
01572   types = rec_get_nth_field_old(ibuf_rec, 3, &len);
01573 
01574   ibuf_rec_get_info(ibuf_rec, NULL, &comp, &info_len, NULL);
01575 
01576   index = ibuf_dummy_index_create(n_fields, comp);
01577 
01578   len -= info_len;
01579   types += info_len;
01580 
01581   ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
01582 
01583   for (i = 0; i < n_fields; i++) {
01584     field = dtuple_get_nth_field(tuple, i);
01585 
01586     data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
01587 
01588     dfield_set_data(field, data, len);
01589 
01590     dtype_new_read_for_order_and_null_size(
01591       dfield_get_type(field),
01592       types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
01593 
01594     ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
01595   }
01596 
01597   /* Prevent an ut_ad() failure in page_zip_write_rec() by
01598   adding system columns to the dummy table pointed to by the
01599   dummy secondary index.  The insert buffer is only used for
01600   secondary indexes, whose records never contain any system
01601   columns, such as DB_TRX_ID. */
01602   ut_d(dict_table_add_system_columns(index->table, index->table->heap));
01603 
01604   *pindex = index;
01605 
01606   return(tuple);
01607 }
01608 
01609 /******************************************************************/
01612 UNIV_INLINE
01613 ulint
01614 ibuf_rec_get_size(
01615 /*==============*/
01616   const rec_t*  rec,      
01617   const byte* types,      
01618   ulint   n_fields,   
01619   ibool   pre_4_1,    
01621   ulint   comp)     
01623 {
01624   ulint i;
01625   ulint field_offset;
01626   ulint types_offset;
01627   ulint size = 0;
01628 
01629   if (pre_4_1) {
01630     field_offset = 2;
01631     types_offset = DATA_ORDER_NULL_TYPE_BUF_SIZE;
01632   } else {
01633     field_offset = 4;
01634     types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
01635   }
01636 
01637   for (i = 0; i < n_fields; i++) {
01638     ulint   len;
01639     dtype_t   dtype;
01640 
01641     rec_get_nth_field_offs_old(rec, i + field_offset, &len);
01642 
01643     if (len != UNIV_SQL_NULL) {
01644       size += len;
01645     } else if (pre_4_1) {
01646       dtype_read_for_order_and_null_size(&dtype, types);
01647 
01648       size += dtype_get_sql_null_size(&dtype, comp);
01649     } else {
01650       dtype_new_read_for_order_and_null_size(&dtype, types);
01651 
01652       size += dtype_get_sql_null_size(&dtype, comp);
01653     }
01654 
01655     types += types_offset;
01656   }
01657 
01658   return(size);
01659 }
01660 
01661 /********************************************************************/
01666 static
01667 ulint
01668 ibuf_rec_get_volume(
01669 /*================*/
01670   const rec_t*  ibuf_rec)
01671 {
01672   ulint   len;
01673   const byte* data;
01674   const byte* types;
01675   ulint   n_fields;
01676   ulint   data_size;
01677   ibool   pre_4_1;
01678   ulint   comp;
01679 
01680   ut_ad(ibuf_inside());
01681   ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
01682 
01683   data = rec_get_nth_field_old(ibuf_rec, 1, &len);
01684   pre_4_1 = (len > 1);
01685 
01686   if (pre_4_1) {
01687     /* < 4.1.x format record */
01688 
01689     ut_a(trx_doublewrite_must_reset_space_ids);
01690     ut_a(!trx_sys_multiple_tablespace_format);
01691 
01692     n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
01693 
01694     types = rec_get_nth_field_old(ibuf_rec, 1, &len);
01695 
01696     ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
01697     comp = 0;
01698   } else {
01699     /* >= 4.1.x format record */
01700     ibuf_op_t op;
01701     ulint   info_len;
01702 
01703     ut_a(trx_sys_multiple_tablespace_format);
01704     ut_a(*data == 0);
01705 
01706     types = rec_get_nth_field_old(ibuf_rec, 3, &len);
01707 
01708     ibuf_rec_get_info(ibuf_rec, &op, &comp, &info_len, NULL);
01709 
01710     if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
01711       /* Delete-marking a record doesn't take any
01712       additional space, and while deleting a record
01713       actually frees up space, we have to play it safe and
01714       pretend it takes no additional space (the record
01715       might not exist, etc.).  */
01716 
01717       return(0);
01718     } else if (comp) {
01719       dtuple_t* entry;
01720       ulint   volume;
01721       dict_index_t* dummy_index;
01722       mem_heap_t* heap = mem_heap_create(500);
01723 
01724       entry = ibuf_build_entry_from_ibuf_rec(
01725         ibuf_rec, heap, &dummy_index);
01726 
01727       volume = rec_get_converted_size(dummy_index, entry, 0);
01728 
01729       ibuf_dummy_index_free(dummy_index);
01730       mem_heap_free(heap);
01731 
01732       return(volume + page_dir_calc_reserved_space(1));
01733     }
01734 
01735     types += info_len;
01736     n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
01737   }
01738 
01739   data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp);
01740 
01741   return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
01742          + page_dir_calc_reserved_space(1));
01743 }
01744 
01745 /*********************************************************************/
01753 static
01754 dtuple_t*
01755 ibuf_entry_build(
01756 /*=============*/
01757   ibuf_op_t op, 
01758   dict_index_t* index,  
01759   const dtuple_t* entry,  
01760   ulint   space,  
01761   ulint   page_no,
01763   ulint   counter,
01765   mem_heap_t* heap) 
01766 {
01767   dtuple_t* tuple;
01768   dfield_t* field;
01769   const dfield_t* entry_field;
01770   ulint   n_fields;
01771   byte*   buf;
01772   byte*   ti;
01773   byte*   type_info;
01774   ulint   i;
01775 
01776   ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
01777   ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
01778   ut_ad(op < IBUF_OP_COUNT);
01779 
01780   /* We have to build a tuple with the following fields:
01781 
01782   1-4) These are described at the top of this file.
01783 
01784   5) The rest of the fields are copied from the entry.
01785 
01786   All fields in the tuple are ordered like the type binary in our
01787   insert buffer tree. */
01788 
01789   n_fields = dtuple_get_n_fields(entry);
01790 
01791   tuple = dtuple_create(heap, n_fields + 4);
01792 
01793   /* 1) Space Id */
01794 
01795   field = dtuple_get_nth_field(tuple, 0);
01796 
01797   buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
01798 
01799   mach_write_to_4(buf, space);
01800 
01801   dfield_set_data(field, buf, 4);
01802 
01803   /* 2) Marker byte */
01804 
01805   field = dtuple_get_nth_field(tuple, 1);
01806 
01807   buf = static_cast<byte *>(mem_heap_alloc(heap, 1));
01808 
01809   /* We set the marker byte zero */
01810 
01811   mach_write_to_1(buf, 0);
01812 
01813   dfield_set_data(field, buf, 1);
01814 
01815   /* 3) Page number */
01816 
01817   field = dtuple_get_nth_field(tuple, 2);
01818 
01819   buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
01820 
01821   mach_write_to_4(buf, page_no);
01822 
01823   dfield_set_data(field, buf, 4);
01824 
01825   /* 4) Type info, part #1 */
01826 
01827   if (counter == ULINT_UNDEFINED) {
01828     i = dict_table_is_comp(index->table) ? 1 : 0;
01829   } else {
01830     ut_ad(counter <= 0xFFFF);
01831     i = IBUF_REC_INFO_SIZE;
01832   }
01833 
01834   ti = type_info = static_cast<byte *>(mem_heap_alloc(heap, i + n_fields
01835           * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE));
01836 
01837   switch (i) {
01838   default:
01839     ut_error;
01840     break;
01841   case 1:
01842     /* set the flag for ROW_FORMAT=COMPACT */
01843     *ti++ = 0;
01844     /* fall through */
01845   case 0:
01846     /* the old format does not allow delete buffering */
01847     ut_ad(op == IBUF_OP_INSERT);
01848     break;
01849   case IBUF_REC_INFO_SIZE:
01850     mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
01851 
01852     ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
01853     ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
01854       ? IBUF_REC_COMPACT : 0;
01855     ti += IBUF_REC_INFO_SIZE;
01856     break;
01857   }
01858 
01859   /* 5+) Fields from the entry */
01860 
01861   for (i = 0; i < n_fields; i++) {
01862     ulint     fixed_len;
01863     const dict_field_t* ifield;
01864 
01865     /* We add 4 below because we have the 4 extra fields at the
01866     start of an ibuf record */
01867 
01868     field = dtuple_get_nth_field(tuple, i + 4);
01869     entry_field = dtuple_get_nth_field(entry, i);
01870     dfield_copy(field, entry_field);
01871 
01872     ifield = dict_index_get_nth_field(index, i);
01873     /* Prefix index columns of fixed-length columns are of
01874     fixed length.  However, in the function call below,
01875     dfield_get_type(entry_field) contains the fixed length
01876     of the column in the clustered index.  Replace it with
01877     the fixed length of the secondary index column. */
01878     fixed_len = ifield->fixed_len;
01879 
01880 #ifdef UNIV_DEBUG
01881     if (fixed_len) {
01882       /* dict_index_add_col() should guarantee these */
01883       ut_ad(fixed_len <= (ulint)
01884             dfield_get_type(entry_field)->len);
01885       if (ifield->prefix_len) {
01886         ut_ad(ifield->prefix_len == fixed_len);
01887       } else {
01888         ut_ad(fixed_len == (ulint)
01889               dfield_get_type(entry_field)->len);
01890       }
01891     }
01892 #endif /* UNIV_DEBUG */
01893 
01894     dtype_new_store_for_order_and_null_size(
01895       ti, dfield_get_type(entry_field), fixed_len);
01896     ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
01897   }
01898 
01899   /* 4) Type info, part #2 */
01900 
01901   field = dtuple_get_nth_field(tuple, 3);
01902 
01903   dfield_set_data(field, type_info, ti - type_info);
01904 
01905   /* Set all the types in the new tuple binary */
01906 
01907   dtuple_set_types_binary(tuple, n_fields + 4);
01908 
01909   return(tuple);
01910 }
01911 
01912 /*********************************************************************/
01916 static
01917 dtuple_t*
01918 ibuf_search_tuple_build(
01919 /*====================*/
01920   ulint   space,  
01921   ulint   page_no,
01922   mem_heap_t* heap) 
01923 {
01924   dtuple_t* tuple;
01925   dfield_t* field;
01926   byte*   buf;
01927 
01928   ut_a(space == 0);
01929   ut_a(trx_doublewrite_must_reset_space_ids);
01930   ut_a(!trx_sys_multiple_tablespace_format);
01931 
01932   tuple = dtuple_create(heap, 1);
01933 
01934   /* Store the page number in tuple */
01935 
01936   field = dtuple_get_nth_field(tuple, 0);
01937 
01938   buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
01939 
01940   mach_write_to_4(buf, page_no);
01941 
01942   dfield_set_data(field, buf, 4);
01943 
01944   dtuple_set_types_binary(tuple, 1);
01945 
01946   return(tuple);
01947 }
01948 
01949 /*********************************************************************/
01953 static
01954 dtuple_t*
01955 ibuf_new_search_tuple_build(
01956 /*========================*/
01957   ulint   space,  
01958   ulint   page_no,
01959   mem_heap_t* heap) 
01960 {
01961   dtuple_t* tuple;
01962   dfield_t* field;
01963   byte*   buf;
01964 
01965   ut_a(trx_sys_multiple_tablespace_format);
01966 
01967   tuple = dtuple_create(heap, 3);
01968 
01969   /* Store the space id in tuple */
01970 
01971   field = dtuple_get_nth_field(tuple, 0);
01972 
01973   buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
01974 
01975   mach_write_to_4(buf, space);
01976 
01977   dfield_set_data(field, buf, 4);
01978 
01979   /* Store the new format record marker byte */
01980 
01981   field = dtuple_get_nth_field(tuple, 1);
01982 
01983   buf = static_cast<byte *>(mem_heap_alloc(heap, 1));
01984 
01985   mach_write_to_1(buf, 0);
01986 
01987   dfield_set_data(field, buf, 1);
01988 
01989   /* Store the page number in tuple */
01990 
01991   field = dtuple_get_nth_field(tuple, 2);
01992 
01993   buf = static_cast<byte *>(mem_heap_alloc(heap, 4));
01994 
01995   mach_write_to_4(buf, page_no);
01996 
01997   dfield_set_data(field, buf, 4);
01998 
01999   dtuple_set_types_binary(tuple, 3);
02000 
02001   return(tuple);
02002 }
02003 
02004 /*********************************************************************/
02008 UNIV_INLINE
02009 ibool
02010 ibuf_data_enough_free_for_insert(void)
02011 /*==================================*/
02012 {
02013   ut_ad(mutex_own(&ibuf_mutex));
02014 
02015   /* We want a big margin of free pages, because a B-tree can sometimes
02016   grow in size also if records are deleted from it, as the node pointers
02017   can change, and we must make sure that we are able to delete the
02018   inserts buffered for pages that we read to the buffer pool, without
02019   any risk of running out of free space in the insert buffer. */
02020 
02021   return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
02022 }
02023 
02024 /*********************************************************************/
02028 UNIV_INLINE
02029 ibool
02030 ibuf_data_too_much_free(void)
02031 /*=========================*/
02032 {
02033   ut_ad(mutex_own(&ibuf_mutex));
02034 
02035   return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
02036 }
02037 
02038 /*********************************************************************/
02042 static
02043 ibool
02044 ibuf_add_free_page(void)
02045 /*====================*/
02046 {
02047   mtr_t mtr;
02048   page_t* header_page;
02049   ulint flags;
02050   ulint zip_size;
02051   ulint page_no;
02052   page_t* page;
02053   page_t* root;
02054   page_t* bitmap_page;
02055 
02056   mtr_start(&mtr);
02057 
02058   /* Acquire the fsp latch before the ibuf header, obeying the latching
02059   order */
02060   mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
02061   zip_size = dict_table_flags_to_zip_size(flags);
02062 
02063   header_page = ibuf_header_page_get(&mtr);
02064 
02065   /* Allocate a new page: NOTE that if the page has been a part of a
02066   non-clustered index which has subsequently been dropped, then the
02067   page may have buffered inserts in the insert buffer, and these
02068   should be deleted from there. These get deleted when the page
02069   allocation creates the page in buffer. Thus the call below may end
02070   up calling the insert buffer routines and, as we yet have no latches
02071   to insert buffer tree pages, these routines can run without a risk
02072   of a deadlock. This is the reason why we created a special ibuf
02073   header page apart from the ibuf tree. */
02074 
02075   page_no = fseg_alloc_free_page(
02076     header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
02077     &mtr);
02078 
02079   if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
02080     mtr_commit(&mtr);
02081 
02082     return(FALSE);
02083   }
02084 
02085   {
02086     buf_block_t*  block;
02087 
02088     block = buf_page_get(
02089       IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
02090 
02091     buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
02092 
02093 
02094     page = buf_block_get_frame(block);
02095   }
02096 
02097   ibuf_enter();
02098 
02099   mutex_enter(&ibuf_mutex);
02100 
02101   root = ibuf_tree_root_get(&mtr);
02102 
02103   /* Add the page to the free list and update the ibuf size data */
02104 
02105   flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
02106           page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
02107 
02108   mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
02109        MLOG_2BYTES, &mtr);
02110 
02111   ibuf->seg_size++;
02112   ibuf->free_list_len++;
02113 
02114   /* Set the bit indicating that this page is now an ibuf tree page
02115   (level 2 page) */
02116 
02117   bitmap_page = ibuf_bitmap_get_map_page(
02118     IBUF_SPACE_ID, page_no, zip_size, &mtr);
02119 
02120   mutex_exit(&ibuf_mutex);
02121 
02122   ibuf_bitmap_page_set_bits(
02123     bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
02124 
02125   mtr_commit(&mtr);
02126 
02127   ibuf_exit();
02128 
02129   return(TRUE);
02130 }
02131 
02132 /*********************************************************************/
02134 static
02135 void
02136 ibuf_remove_free_page(void)
02137 /*=======================*/
02138 {
02139   mtr_t mtr;
02140   mtr_t mtr2;
02141   page_t* header_page;
02142   ulint flags;
02143   ulint zip_size;
02144   ulint page_no;
02145   page_t* page;
02146   page_t* root;
02147   page_t* bitmap_page;
02148 
02149   mtr_start(&mtr);
02150 
02151   /* Acquire the fsp latch before the ibuf header, obeying the latching
02152   order */
02153   mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
02154   zip_size = dict_table_flags_to_zip_size(flags);
02155 
02156   header_page = ibuf_header_page_get(&mtr);
02157 
02158   /* Prevent pessimistic inserts to insert buffer trees for a while */
02159   ibuf_enter();
02160   mutex_enter(&ibuf_pessimistic_insert_mutex);
02161   mutex_enter(&ibuf_mutex);
02162 
02163   if (!ibuf_data_too_much_free()) {
02164 
02165     mutex_exit(&ibuf_mutex);
02166     mutex_exit(&ibuf_pessimistic_insert_mutex);
02167 
02168     ibuf_exit();
02169 
02170     mtr_commit(&mtr);
02171 
02172     return;
02173   }
02174 
02175   mtr_start(&mtr2);
02176 
02177   root = ibuf_tree_root_get(&mtr2);
02178 
02179   mutex_exit(&ibuf_mutex);
02180 
02181   page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
02182         &mtr2).page;
02183 
02184   /* NOTE that we must release the latch on the ibuf tree root
02185   because in fseg_free_page we access level 1 pages, and the root
02186   is a level 2 page. */
02187 
02188   mtr_commit(&mtr2);
02189 
02190   ibuf_exit();
02191 
02192   /* Since pessimistic inserts were prevented, we know that the
02193   page is still in the free list. NOTE that also deletes may take
02194   pages from the free list, but they take them from the start, and
02195   the free list was so long that they cannot have taken the last
02196   page from it. */
02197 
02198   fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
02199            IBUF_SPACE_ID, page_no, &mtr);
02200 
02201 #ifdef UNIV_DEBUG_FILE_ACCESSES
02202   buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
02203 #endif
02204 
02205   ibuf_enter();
02206 
02207   mutex_enter(&ibuf_mutex);
02208 
02209   root = ibuf_tree_root_get(&mtr);
02210 
02211   ut_ad(page_no == flst_get_last(root + PAGE_HEADER
02212                + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
02213 
02214   {
02215     buf_block_t*  block;
02216 
02217     block = buf_page_get(
02218       IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
02219 
02220     buf_block_dbg_add_level(block, SYNC_TREE_NODE);
02221 
02222 
02223     page = buf_block_get_frame(block);
02224   }
02225 
02226   /* Remove the page from the free list and update the ibuf size data */
02227 
02228   flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
02229         page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
02230 
02231   mutex_exit(&ibuf_pessimistic_insert_mutex);
02232 
02233   ibuf->seg_size--;
02234   ibuf->free_list_len--;
02235 
02236   /* Set the bit indicating that this page is no more an ibuf tree page
02237   (level 2 page) */
02238 
02239   bitmap_page = ibuf_bitmap_get_map_page(
02240     IBUF_SPACE_ID, page_no, zip_size, &mtr);
02241 
02242   mutex_exit(&ibuf_mutex);
02243 
02244   ibuf_bitmap_page_set_bits(
02245     bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
02246 
02247 #ifdef UNIV_DEBUG_FILE_ACCESSES
02248   buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
02249 #endif
02250   mtr_commit(&mtr);
02251 
02252   ibuf_exit();
02253 }
02254 
02255 /***********************************************************************/
02259 UNIV_INTERN
02260 void
02261 ibuf_free_excess_pages(void)
02262 /*========================*/
02263 {
02264   ulint   i;
02265 
02266 #ifdef UNIV_SYNC_DEBUG
02267   ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
02268         RW_LOCK_EX));
02269 #endif /* UNIV_SYNC_DEBUG */
02270 
02271   ut_ad(rw_lock_get_x_lock_count(
02272     fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
02273 
02274   ut_ad(!ibuf_inside());
02275 
02276   /* NOTE: We require that the thread did not own the latch before,
02277   because then we know that we can obey the correct latching order
02278   for ibuf latches */
02279 
02280   if (!ibuf) {
02281     /* Not yet initialized; not sure if this is possible, but
02282     does no harm to check for it. */
02283 
02284     return;
02285   }
02286 
02287   /* Free at most a few pages at a time, so that we do not delay the
02288   requested service too much */
02289 
02290   for (i = 0; i < 4; i++) {
02291 
02292     ibool too_much_free;
02293 
02294     mutex_enter(&ibuf_mutex);
02295     too_much_free = ibuf_data_too_much_free();
02296     mutex_exit(&ibuf_mutex);
02297 
02298     if (!too_much_free) {
02299       return;
02300     }
02301 
02302     ibuf_remove_free_page();
02303   }
02304 }
02305 
02306 /*********************************************************************/
02310 static
02311 ulint
02312 ibuf_get_merge_page_nos(
02313 /*====================*/
02314   ibool   contract,
02318   rec_t*    rec,  
02320   ulint*    space_ids,
02321   ib_int64_t* space_versions,
02324   ulint*    page_nos,
02327   ulint*    n_stored)
02329 {
02330   ulint prev_page_no;
02331   ulint prev_space_id;
02332   ulint first_page_no;
02333   ulint first_space_id;
02334   ulint rec_page_no;
02335   ulint rec_space_id;
02336   ulint sum_volumes;
02337   ulint volume_for_page;
02338   ulint rec_volume;
02339   ulint limit;
02340   ulint n_pages;
02341 
02342   *n_stored = 0;
02343 
02344   limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);
02345 
02346   if (page_rec_is_supremum(rec)) {
02347 
02348     rec = page_rec_get_prev(rec);
02349   }
02350 
02351   if (page_rec_is_infimum(rec)) {
02352 
02353     rec = page_rec_get_next(rec);
02354   }
02355 
02356   if (page_rec_is_supremum(rec)) {
02357 
02358     return(0);
02359   }
02360 
02361   first_page_no = ibuf_rec_get_page_no(rec);
02362   first_space_id = ibuf_rec_get_space(rec);
02363   n_pages = 0;
02364   prev_page_no = 0;
02365   prev_space_id = 0;
02366 
02367   /* Go backwards from the first rec until we reach the border of the
02368   'merge area', or the page start or the limit of storeable pages is
02369   reached */
02370 
02371   while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
02372 
02373     rec_page_no = ibuf_rec_get_page_no(rec);
02374     rec_space_id = ibuf_rec_get_space(rec);
02375 
02376     if (rec_space_id != first_space_id
02377         || (rec_page_no / IBUF_MERGE_AREA)
02378         != (first_page_no / IBUF_MERGE_AREA)) {
02379 
02380       break;
02381     }
02382 
02383     if (rec_page_no != prev_page_no
02384         || rec_space_id != prev_space_id) {
02385       n_pages++;
02386     }
02387 
02388     prev_page_no = rec_page_no;
02389     prev_space_id = rec_space_id;
02390 
02391     rec = page_rec_get_prev(rec);
02392   }
02393 
02394   rec = page_rec_get_next(rec);
02395 
02396   /* At the loop start there is no prev page; we mark this with a pair
02397   of space id, page no (0, 0) for which there can never be entries in
02398   the insert buffer */
02399 
02400   prev_page_no = 0;
02401   prev_space_id = 0;
02402   sum_volumes = 0;
02403   volume_for_page = 0;
02404 
02405   while (*n_stored < limit) {
02406     if (page_rec_is_supremum(rec)) {
02407       /* When no more records available, mark this with
02408       another 'impossible' pair of space id, page no */
02409       rec_page_no = 1;
02410       rec_space_id = 0;
02411     } else {
02412       rec_page_no = ibuf_rec_get_page_no(rec);
02413       rec_space_id = ibuf_rec_get_space(rec);
02414       ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
02415     }
02416 
02417 #ifdef UNIV_IBUF_DEBUG
02418     ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
02419 #endif
02420     if ((rec_space_id != prev_space_id
02421          || rec_page_no != prev_page_no)
02422         && (prev_space_id != 0 || prev_page_no != 0)) {
02423 
02424       if ((prev_page_no == first_page_no
02425            && prev_space_id == first_space_id)
02426           || contract
02427           || (volume_for_page
02428         > ((IBUF_MERGE_THRESHOLD - 1)
02429            * 4 * UNIV_PAGE_SIZE
02430            / IBUF_PAGE_SIZE_PER_FREE_SPACE)
02431         / IBUF_MERGE_THRESHOLD)) {
02432 
02433         space_ids[*n_stored] = prev_space_id;
02434         space_versions[*n_stored]
02435           = fil_space_get_version(prev_space_id);
02436         page_nos[*n_stored] = prev_page_no;
02437 
02438         (*n_stored)++;
02439 
02440         sum_volumes += volume_for_page;
02441       }
02442 
02443       if (rec_space_id != first_space_id
02444           || rec_page_no / IBUF_MERGE_AREA
02445           != first_page_no / IBUF_MERGE_AREA) {
02446 
02447         break;
02448       }
02449 
02450       volume_for_page = 0;
02451     }
02452 
02453     if (rec_page_no == 1 && rec_space_id == 0) {
02454       /* Supremum record */
02455 
02456       break;
02457     }
02458 
02459     rec_volume = ibuf_rec_get_volume(rec);
02460 
02461     volume_for_page += rec_volume;
02462 
02463     prev_page_no = rec_page_no;
02464     prev_space_id = rec_space_id;
02465 
02466     rec = page_rec_get_next(rec);
02467   }
02468 
02469 #ifdef UNIV_IBUF_DEBUG
02470   ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
02471 #endif
02472 #if 0
02473   fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
02474     *n_stored, sum_volumes);
02475 #endif
02476   return(sum_volumes);
02477 }
02478 
02479 /*********************************************************************/
02484 static
02485 ulint
02486 ibuf_contract_ext(
02487 /*==============*/
02488   ulint*  n_pages,
02489   ibool sync) 
02492 {
02493   btr_pcur_t  pcur;
02494   ulint   page_nos[IBUF_MAX_N_PAGES_MERGED];
02495   ulint   space_ids[IBUF_MAX_N_PAGES_MERGED];
02496   ib_int64_t  space_versions[IBUF_MAX_N_PAGES_MERGED];
02497   ulint   sum_sizes;
02498   mtr_t   mtr;
02499 
02500   *n_pages = 0;
02501   ut_ad(!ibuf_inside());
02502 
02503   /* We perform a dirty read of ibuf->empty, without latching
02504   the insert buffer root page. We trust this dirty read except
02505   when a slow shutdown is being executed. During a slow
02506   shutdown, the insert buffer merge must be completed. */
02507 
02508   if (UNIV_UNLIKELY(ibuf->empty)
02509       && UNIV_LIKELY(!srv_shutdown_state)) {
02510 ibuf_is_empty:
02511 
02512 #if 0 /* TODO */
02513     if (srv_shutdown_state) {
02514       /* If the insert buffer becomes empty during
02515       shutdown, note it in the system tablespace. */
02516 
02517       trx_sys_set_ibuf_format(TRX_SYS_IBUF_EMPTY);
02518     }
02519 
02520     /* TO DO: call trx_sys_set_ibuf_format() at startup
02521     and whenever ibuf_use is changed to allow buffered
02522     delete-marking or deleting.  Never downgrade the
02523     stamped format except when the insert buffer becomes
02524     empty. */
02525 #endif
02526 
02527     return(0);
02528   }
02529 
02530   mtr_start(&mtr);
02531 
02532   ibuf_enter();
02533 
02534   /* Open a cursor to a randomly chosen leaf of the tree, at a random
02535   position within the leaf */
02536 
02537   btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
02538 
02539   ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
02540 
02541   if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
02542     /* If a B-tree page is empty, it must be the root page
02543     and the whole B-tree must be empty. InnoDB does not
02544     allow empty B-tree pages other than the root. */
02545     ut_ad(ibuf->empty);
02546     ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
02547           == IBUF_SPACE_ID);
02548     ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
02549           == FSP_IBUF_TREE_ROOT_PAGE_NO);
02550 
02551     ibuf_exit();
02552 
02553     mtr_commit(&mtr);
02554     btr_pcur_close(&pcur);
02555 
02556     goto ibuf_is_empty;
02557   }
02558 
02559   sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
02560               space_ids, space_versions,
02561               page_nos, n_pages);
02562 #if 0 /* defined UNIV_IBUF_DEBUG */
02563   fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
02564     sync, *n_pages, sum_sizes);
02565 #endif
02566   ibuf_exit();
02567 
02568   mtr_commit(&mtr);
02569   btr_pcur_close(&pcur);
02570 
02571   buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
02572           *n_pages);
02573 
02574   return(sum_sizes + 1);
02575 }
02576 
02577 /*********************************************************************/
02582 UNIV_INTERN
02583 ulint
02584 ibuf_contract(
02585 /*==========*/
02586   ibool sync) 
02589 {
02590   ulint n_pages;
02591 
02592   return(ibuf_contract_ext(&n_pages, sync));
02593 }
02594 
02595 /*********************************************************************/
02600 UNIV_INTERN
02601 ulint
02602 ibuf_contract_for_n_pages(
02603 /*======================*/
02604   ibool sync, 
02607   ulint n_pages)
02610 {
02611   ulint sum_bytes = 0;
02612   ulint sum_pages = 0;
02613   ulint n_bytes;
02614   ulint n_pag2;
02615 
02616   while (sum_pages < n_pages) {
02617     n_bytes = ibuf_contract_ext(&n_pag2, sync);
02618 
02619     if (n_bytes == 0) {
02620       return(sum_bytes);
02621     }
02622 
02623     sum_bytes += n_bytes;
02624     sum_pages += n_pag2;
02625   }
02626 
02627   return(sum_bytes);
02628 }
02629 
02630 /*********************************************************************/
02632 UNIV_INLINE
02633 void
02634 ibuf_contract_after_insert(
02635 /*=======================*/
02636   ulint entry_size) 
02638 {
02639   ibool sync;
02640   ulint sum_sizes;
02641   ulint size;
02642   ulint max_size;
02643 
02644   /* Perform dirty reads of ibuf->size and ibuf->max_size, to
02645   reduce ibuf_mutex contention. ibuf->max_size remains constant
02646   after ibuf_init_at_db_start(), but ibuf->size should be
02647   protected by ibuf_mutex. Given that ibuf->size fits in a
02648   machine word, this should be OK; at worst we are doing some
02649   excessive ibuf_contract() or occasionally skipping a
02650   ibuf_contract(). */
02651   size = ibuf->size;
02652   max_size = ibuf->max_size;
02653 
02654   if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
02655     return;
02656   }
02657 
02658   sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
02659 
02660   /* Contract at least entry_size many bytes */
02661   sum_sizes = 0;
02662   size = 1;
02663 
02664   do {
02665 
02666     size = ibuf_contract(sync);
02667     sum_sizes += size;
02668   } while (size > 0 && sum_sizes < entry_size);
02669 }
02670 
02671 /*********************************************************************/
02674 static
02675 ibool
02676 ibuf_get_volume_buffered_hash(
02677 /*==========================*/
02678   const rec_t*  rec,  
02679   const byte* types,  
02680   const byte* data, 
02681   ulint   comp, 
02683   ulint*    hash, 
02684   ulint   size) 
02685 {
02686   ulint   len;
02687   ulint   fold;
02688   ulint   bitmask;
02689 
02690   len = ibuf_rec_get_size(rec, types, rec_get_n_fields_old(rec) - 4,
02691         FALSE, comp);
02692   fold = ut_fold_binary(data, len);
02693 
02694   hash += (fold / (8 * sizeof *hash)) % size; // 8 = bits in byte
02695   bitmask = 1 << (fold % (8 * sizeof *hash));
02696 
02697   if (*hash & bitmask) {
02698 
02699     return(FALSE);
02700   }
02701 
02702   /* We have not seen this record yet.  Insert it. */
02703   *hash |= bitmask;
02704 
02705   return(TRUE);
02706 }
02707 
02708 /*********************************************************************/
02713 static
02714 ulint
02715 ibuf_get_volume_buffered_count(
02716 /*===========================*/
02717   const rec_t*  rec,  
02718   ulint*    hash, 
02719   ulint   size, 
02720   lint*   n_recs) 
02722 {
02723   ulint   len;
02724   ibuf_op_t ibuf_op;
02725   const byte* types;
02726   ulint   n_fields  = rec_get_n_fields_old(rec);
02727 
02728   ut_ad(ibuf_inside());
02729   ut_ad(n_fields > 4);
02730   n_fields -= 4;
02731 
02732   rec_get_nth_field_offs_old(rec, 1, &len);
02733   /* This function is only invoked when buffering new
02734   operations.  All pre-4.1 records should have been merged
02735   when the database was started up. */
02736   ut_a(len == 1);
02737   ut_ad(trx_sys_multiple_tablespace_format);
02738 
02739   types = rec_get_nth_field_old(rec, 3, &len);
02740 
02741   switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
02742           IBUF_REC_INFO_SIZE)) {
02743   default:
02744     ut_error;
02745   case 0:
02746     /* This ROW_TYPE=REDUNDANT record does not include an
02747     operation counter.  Exclude it from the *n_recs,
02748     because deletes cannot be buffered if there are
02749     old-style inserts buffered for the page. */
02750 
02751     len = ibuf_rec_get_size(rec, types, n_fields, FALSE, 0);
02752 
02753     return(len
02754            + rec_get_converted_extra_size(len, n_fields, 0)
02755            + page_dir_calc_reserved_space(1));
02756   case 1:
02757     /* This ROW_TYPE=COMPACT record does not include an
02758     operation counter.  Exclude it from the *n_recs,
02759     because deletes cannot be buffered if there are
02760     old-style inserts buffered for the page. */
02761     goto get_volume_comp;
02762 
02763   case IBUF_REC_INFO_SIZE:
02764     ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
02765     break;
02766   }
02767 
02768   switch (ibuf_op) {
02769   case IBUF_OP_INSERT:
02770     /* Inserts can be done by updating a delete-marked record.
02771     Because delete-mark and insert operations can be pointing to
02772     the same records, we must not count duplicates. */
02773   case IBUF_OP_DELETE_MARK:
02774     /* There must be a record to delete-mark.
02775     See if this record has been already buffered. */
02776     if (n_recs && ibuf_get_volume_buffered_hash(
02777           rec, types + IBUF_REC_INFO_SIZE,
02778           types + len,
02779           types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
02780           hash, size)) {
02781       (*n_recs)++;
02782     }
02783 
02784     if (ibuf_op == IBUF_OP_DELETE_MARK) {
02785       /* Setting the delete-mark flag does not
02786       affect the available space on the page. */
02787       return(0);
02788     }
02789     break;
02790   case IBUF_OP_DELETE:
02791     /* A record will be removed from the page. */
02792     if (n_recs) {
02793       (*n_recs)--;
02794     }
02795     /* While deleting a record actually frees up space,
02796     we have to play it safe and pretend that it takes no
02797     additional space (the record might not exist, etc.). */
02798     return(0);
02799   default:
02800     ut_error;
02801   }
02802 
02803   ut_ad(ibuf_op == IBUF_OP_INSERT);
02804 
02805 get_volume_comp:
02806   {
02807     dtuple_t* entry;
02808     ulint   volume;
02809     dict_index_t* dummy_index;
02810     mem_heap_t* heap = mem_heap_create(500);
02811 
02812     entry = ibuf_build_entry_from_ibuf_rec(
02813       rec, heap, &dummy_index);
02814 
02815     volume = rec_get_converted_size(dummy_index, entry, 0);
02816 
02817     ibuf_dummy_index_free(dummy_index);
02818     mem_heap_free(heap);
02819 
02820     return(volume + page_dir_calc_reserved_space(1));
02821   }
02822 }
02823 
02824 /*********************************************************************/
02830 static
02831 ulint
02832 ibuf_get_volume_buffered(
02833 /*=====================*/
02834   btr_pcur_t* pcur, 
02839   ulint   space,  
02840   ulint   page_no,
02841   lint*   n_recs, 
02844   mtr_t*    mtr)  
02845 {
02846   ulint volume;
02847   rec_t*  rec;
02848   page_t* page;
02849   ulint prev_page_no;
02850   page_t* prev_page;
02851   ulint next_page_no;
02852   page_t* next_page;
02853   ulint hash_bitmap[128 / sizeof(ulint)]; /* bitmap of buffered recs */
02854 
02855   ut_a(trx_sys_multiple_tablespace_format);
02856 
02857   ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
02858         || (pcur->latch_mode == BTR_MODIFY_TREE));
02859 
02860   /* Count the volume of inserts earlier in the alphabetical order than
02861   pcur */
02862 
02863   volume = 0;
02864 
02865   if (n_recs) {
02866     memset(hash_bitmap, 0, sizeof hash_bitmap);
02867   }
02868 
02869   rec = btr_pcur_get_rec(pcur);
02870   page = page_align(rec);
02871   ut_ad(page_validate(page, ibuf->index));
02872 
02873   if (page_rec_is_supremum(rec)) {
02874     rec = page_rec_get_prev(rec);
02875   }
02876 
02877   for (;;) {
02878     if (page_rec_is_infimum(rec)) {
02879 
02880       break;
02881     }
02882 
02883     if (page_no != ibuf_rec_get_page_no(rec)
02884         || space != ibuf_rec_get_space(rec)) {
02885 
02886       goto count_later;
02887     }
02888 
02889     volume += ibuf_get_volume_buffered_count(
02890       rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
02891 
02892     rec = page_rec_get_prev(rec);
02893     ut_ad(page_align(rec) == page);
02894   }
02895 
02896   /* Look at the previous page */
02897 
02898   prev_page_no = btr_page_get_prev(page, mtr);
02899 
02900   if (prev_page_no == FIL_NULL) {
02901 
02902     goto count_later;
02903   }
02904 
02905   {
02906     buf_block_t*  block;
02907 
02908     block = buf_page_get(
02909       IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
02910 
02911     buf_block_dbg_add_level(block, SYNC_TREE_NODE);
02912 
02913 
02914     prev_page = buf_block_get_frame(block);
02915     ut_ad(page_validate(prev_page, ibuf->index));
02916   }
02917 
02918 #ifdef UNIV_BTR_DEBUG
02919   ut_a(btr_page_get_next(prev_page, mtr)
02920        == page_get_page_no(page));
02921 #endif /* UNIV_BTR_DEBUG */
02922 
02923   rec = page_get_supremum_rec(prev_page);
02924   rec = page_rec_get_prev(rec);
02925 
02926   for (;;) {
02927     if (page_rec_is_infimum(rec)) {
02928 
02929       /* We cannot go to yet a previous page, because we
02930       do not have the x-latch on it, and cannot acquire one
02931       because of the latching order: we have to give up */
02932 
02933       return(UNIV_PAGE_SIZE);
02934     }
02935 
02936     if (page_no != ibuf_rec_get_page_no(rec)
02937         || space != ibuf_rec_get_space(rec)) {
02938 
02939       goto count_later;
02940     }
02941 
02942     volume += ibuf_get_volume_buffered_count(
02943       rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
02944 
02945     rec = page_rec_get_prev(rec);
02946     ut_ad(page_align(rec) == prev_page);
02947   }
02948 
02949 count_later:
02950   rec = btr_pcur_get_rec(pcur);
02951 
02952   if (!page_rec_is_supremum(rec)) {
02953     rec = page_rec_get_next(rec);
02954   }
02955 
02956   for (;;) {
02957     if (page_rec_is_supremum(rec)) {
02958 
02959       break;
02960     }
02961 
02962     if (page_no != ibuf_rec_get_page_no(rec)
02963         || space != ibuf_rec_get_space(rec)) {
02964 
02965       return(volume);
02966     }
02967 
02968     volume += ibuf_get_volume_buffered_count(
02969       rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
02970 
02971     rec = page_rec_get_next(rec);
02972   }
02973 
02974   /* Look at the next page */
02975 
02976   next_page_no = btr_page_get_next(page, mtr);
02977 
02978   if (next_page_no == FIL_NULL) {
02979 
02980     return(volume);
02981   }
02982 
02983   {
02984     buf_block_t*  block;
02985 
02986     block = buf_page_get(
02987       IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
02988 
02989     buf_block_dbg_add_level(block, SYNC_TREE_NODE);
02990 
02991 
02992     next_page = buf_block_get_frame(block);
02993     ut_ad(page_validate(next_page, ibuf->index));
02994   }
02995 
02996 #ifdef UNIV_BTR_DEBUG
02997   ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
02998 #endif /* UNIV_BTR_DEBUG */
02999 
03000   rec = page_get_infimum_rec(next_page);
03001   rec = page_rec_get_next(rec);
03002 
03003   for (;;) {
03004     if (page_rec_is_supremum(rec)) {
03005 
03006       /* We give up */
03007 
03008       return(UNIV_PAGE_SIZE);
03009     }
03010 
03011     if (page_no != ibuf_rec_get_page_no(rec)
03012         || space != ibuf_rec_get_space(rec)) {
03013 
03014       return(volume);
03015     }
03016 
03017     volume += ibuf_get_volume_buffered_count(
03018       rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
03019 
03020     rec = page_rec_get_next(rec);
03021     ut_ad(page_align(rec) == next_page);
03022   }
03023 }
03024 
03025 /*********************************************************************/
03028 UNIV_INTERN
03029 void
03030 ibuf_update_max_tablespace_id(void)
03031 /*===============================*/
03032 {
03033   ulint   max_space_id;
03034   const rec_t*  rec;
03035   const byte* field;
03036   ulint   len;
03037   btr_pcur_t  pcur;
03038   mtr_t   mtr;
03039 
03040   ut_a(!dict_table_is_comp(ibuf->index->table));
03041 
03042   ibuf_enter();
03043 
03044   mtr_start(&mtr);
03045 
03046   btr_pcur_open_at_index_side(
03047     FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
03048 
03049   ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
03050 
03051   btr_pcur_move_to_prev(&pcur, &mtr);
03052 
03053   if (btr_pcur_is_before_first_on_page(&pcur)) {
03054     /* The tree is empty */
03055 
03056     max_space_id = 0;
03057   } else {
03058     rec = btr_pcur_get_rec(&pcur);
03059 
03060     field = rec_get_nth_field_old(rec, 0, &len);
03061 
03062     ut_a(len == 4);
03063 
03064     max_space_id = mach_read_from_4(field);
03065   }
03066 
03067   mtr_commit(&mtr);
03068   ibuf_exit();
03069 
03070   /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
03071 
03072   fil_set_max_space_id_if_bigger(max_space_id);
03073 }
03074 
03075 /****************************************************************/
03080 static
03081 ulint
03082 ibuf_get_entry_counter_low(
03083 /*=======================*/
03084   const rec_t*  rec,    
03085   ulint   space,    
03086   ulint   page_no)  
03087 {
03088   ulint   counter;
03089   const byte* field;
03090   ulint   len;
03091 
03092   ut_ad(ibuf_inside());
03093   ut_ad(rec_get_n_fields_old(rec) > 2);
03094 
03095   field = rec_get_nth_field_old(rec, 1, &len);
03096 
03097   if (UNIV_UNLIKELY(len != 1)) {
03098     /* pre-4.1 format */
03099     ut_a(trx_doublewrite_must_reset_space_ids);
03100     ut_a(!trx_sys_multiple_tablespace_format);
03101 
03102     return(ULINT_UNDEFINED);
03103   }
03104 
03105   ut_a(trx_sys_multiple_tablespace_format);
03106 
03107   /* Check the tablespace identifier. */
03108   field = rec_get_nth_field_old(rec, 0, &len);
03109   ut_a(len == 4);
03110 
03111   if (mach_read_from_4(field) != space) {
03112 
03113     return(0);
03114   }
03115 
03116   /* Check the page offset. */
03117   field = rec_get_nth_field_old(rec, 2, &len);
03118   ut_a(len == 4);
03119 
03120   if (mach_read_from_4(field) != page_no) {
03121 
03122     return(0);
03123   }
03124 
03125   /* Check if the record contains a counter field. */
03126   field = rec_get_nth_field_old(rec, 3, &len);
03127 
03128   switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
03129   default:
03130     ut_error;
03131   case 0: /* ROW_FORMAT=REDUNDANT */
03132   case 1: /* ROW_FORMAT=COMPACT */
03133     return(ULINT_UNDEFINED);
03134 
03135   case IBUF_REC_INFO_SIZE:
03136     counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
03137     ut_a(counter < 0xFFFF);
03138     return(counter + 1);
03139   }
03140 }
03141 
03142 /****************************************************************/
03146 static
03147 ibool
03148 ibuf_set_entry_counter(
03149 /*===================*/
03150   dtuple_t* entry,    
03151   ulint   space,    
03152   ulint   page_no,  
03153   btr_pcur_t* pcur,   
03156   ibool   is_optimistic,  
03157   mtr_t*    mtr)    
03158 {
03159   dfield_t* field;
03160   byte*   data;
03161   ulint   counter = 0;
03162 
03163   /* pcur points to either a user rec or to a page's infimum record. */
03164   ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index));
03165 
03166   if (btr_pcur_is_on_user_rec(pcur)) {
03167 
03168     counter = ibuf_get_entry_counter_low(
03169       btr_pcur_get_rec(pcur), space, page_no);
03170 
03171     if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
03172       /* The record lacks a counter field.
03173       Such old records must be merged before
03174       new records can be buffered. */
03175 
03176       return(FALSE);
03177     }
03178   } else if (btr_pcur_is_before_first_in_tree(pcur, mtr)) {
03179     /* Ibuf tree is either completely empty, or the insert
03180     position is at the very first record of a non-empty tree. In
03181     either case we have no previous records for (space,
03182     page_no). */
03183 
03184     counter = 0;
03185   } else if (btr_pcur_is_before_first_on_page(pcur)) {
03186     btr_cur_t*  cursor = btr_pcur_get_btr_cur(pcur);
03187 
03188     if (cursor->low_match < 3) {
03189       /* If low_match < 3, we know that the father node
03190       pointer did not contain the searched for (space,
03191       page_no), which means that the search ended on the
03192       right page regardless of the counter value, and
03193       since we're at the infimum record, there are no
03194       existing records. */
03195 
03196       counter = 0;
03197     } else {
03198       rec_t*    rec;
03199       const page_t* page;
03200       buf_block_t*  block;
03201       page_t*   prev_page;
03202       ulint   prev_page_no;
03203 
03204       ut_a(cursor->ibuf_cnt != ULINT_UNDEFINED);
03205 
03206       page = btr_pcur_get_page(pcur);
03207       prev_page_no = btr_page_get_prev(page, mtr);
03208 
03209       ut_a(prev_page_no != FIL_NULL);
03210 
03211       block = buf_page_get(
03212         IBUF_SPACE_ID, 0, prev_page_no,
03213         RW_X_LATCH, mtr);
03214 
03215       buf_block_dbg_add_level(block, SYNC_TREE_NODE);
03216 
03217       prev_page = buf_block_get_frame(block);
03218 
03219       rec = page_rec_get_prev(
03220         page_get_supremum_rec(prev_page));
03221 
03222       ut_ad(page_rec_is_user_rec(rec));
03223 
03224       counter = ibuf_get_entry_counter_low(
03225         rec, space, page_no);
03226 
03227       if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
03228         /* The record lacks a counter field.
03229         Such old records must be merged before
03230         new records can be buffered. */
03231 
03232         return(FALSE);
03233       }
03234 
03235       if (counter < cursor->ibuf_cnt) {
03236         /* Search ended on the wrong page. */
03237 
03238         if (is_optimistic) {
03239           /* In an optimistic insert, we can
03240           shift the insert position to the left
03241           page, since it only needs an X-latch
03242           on the page itself, which the
03243           original search acquired for us. */
03244 
03245           btr_cur_position(
03246             ibuf->index, rec, block,
03247             btr_pcur_get_btr_cur(pcur));
03248         } else {
03249           /* We can't shift the insert
03250           position to the left page in a
03251           pessimistic insert since it would
03252           require an X-latch on the left
03253           page's left page, so we have to
03254           abort. */
03255 
03256           return(FALSE);
03257         }
03258       } else {
03259         /* The counter field in the father node is
03260         the same as we would insert; we don't know
03261         whether the insert should go to this page or
03262         the left page (the later fields can differ),
03263         so refuse the insert. */
03264 
03265         return(FALSE);
03266       }
03267     }
03268   } else {
03269     /* The cursor is not positioned at or before a user record. */
03270     return(FALSE);
03271   }
03272 
03273   /* Patch counter value in already built entry. */
03274   field = dtuple_get_nth_field(entry, 3);
03275   data = static_cast<byte *>(dfield_get_data(field));
03276 
03277   mach_write_to_2(data + IBUF_REC_OFFSET_COUNTER, counter);
03278 
03279   return(TRUE);
03280 }
03281 
03282 /*********************************************************************/
03286 static
03287 ulint
03288 ibuf_insert_low(
03289 /*============*/
03290   ulint   mode, 
03291   ibuf_op_t op, 
03292   ibool   no_counter,
03295   const dtuple_t* entry,  
03296   ulint   entry_size,
03298   dict_index_t* index,  
03300   ulint   space,  
03301   ulint   zip_size,
03302   ulint   page_no,
03303   que_thr_t*  thr)  
03304 {
03305   big_rec_t*  dummy_big_rec;
03306   btr_pcur_t  pcur;
03307   btr_cur_t*  cursor;
03308   dtuple_t* ibuf_entry;
03309   mem_heap_t* heap;
03310   ulint   buffered;
03311   lint    min_n_recs;
03312   rec_t*    ins_rec;
03313   ibool   old_bit_value;
03314   page_t*   bitmap_page;
03315   buf_block_t*  block;
03316   page_t*   root;
03317   ulint   err;
03318   ibool   do_merge;
03319   ulint   space_ids[IBUF_MAX_N_PAGES_MERGED];
03320   ib_int64_t  space_versions[IBUF_MAX_N_PAGES_MERGED];
03321   ulint   page_nos[IBUF_MAX_N_PAGES_MERGED];
03322   ulint   n_stored;
03323   mtr_t   mtr;
03324   mtr_t   bitmap_mtr;
03325 
03326   ut_a(!dict_index_is_clust(index));
03327   ut_ad(dtuple_check_typed(entry));
03328   ut_ad(ut_is_2pow(zip_size));
03329   ut_ad(!no_counter || op == IBUF_OP_INSERT);
03330   ut_a(op < IBUF_OP_COUNT);
03331 
03332   ut_a(trx_sys_multiple_tablespace_format);
03333 
03334   do_merge = FALSE;
03335 
03336   /* Perform dirty reads of ibuf->size and ibuf->max_size, to
03337   reduce ibuf_mutex contention. ibuf->max_size remains constant
03338   after ibuf_init_at_db_start(), but ibuf->size should be
03339   protected by ibuf_mutex. Given that ibuf->size fits in a
03340   machine word, this should be OK; at worst we are doing some
03341   excessive ibuf_contract() or occasionally skipping a
03342   ibuf_contract(). */
03343   if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
03344     /* Insert buffer is now too big, contract it but do not try
03345     to insert */
03346 
03347 
03348 #ifdef UNIV_IBUF_DEBUG
03349     fputs("Ibuf too big\n", stderr);
03350 #endif
03351     /* Use synchronous contract (== TRUE) */
03352     ibuf_contract(TRUE);
03353 
03354     return(DB_STRONG_FAIL);
03355   }
03356 
03357   heap = mem_heap_create(512);
03358 
03359   /* Build the entry which contains the space id and the page number
03360   as the first fields and the type information for other fields, and
03361   which will be inserted to the insert buffer. Using a counter value
03362   of 0xFFFF we find the last record for (space, page_no), from which
03363   we can then read the counter value N and use N + 1 in the record we
03364   insert. (We patch the ibuf_entry's counter field to the correct
03365   value just before actually inserting the entry.) */
03366 
03367   ibuf_entry = ibuf_entry_build(
03368     op, index, entry, space, page_no,
03369     no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
03370 
03371   /* Open a cursor to the insert buffer tree to calculate if we can add
03372   the new entry to it without exceeding the free space limit for the
03373   page. */
03374 
03375   if (mode == BTR_MODIFY_TREE) {
03376     for (;;) {
03377       ibuf_enter();
03378       mutex_enter(&ibuf_pessimistic_insert_mutex);
03379       mutex_enter(&ibuf_mutex);
03380 
03381       if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
03382 
03383         break;
03384       }
03385 
03386       mutex_exit(&ibuf_mutex);
03387       mutex_exit(&ibuf_pessimistic_insert_mutex);
03388       ibuf_exit();
03389 
03390       if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
03391 
03392         mem_heap_free(heap);
03393         return(DB_STRONG_FAIL);
03394       }
03395     }
03396   } else {
03397     ibuf_enter();
03398   }
03399 
03400   mtr_start(&mtr);
03401 
03402   btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
03403   ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
03404 
03405   /* Find out the volume of already buffered inserts for the same index
03406   page */
03407   min_n_recs = 0;
03408   buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
03409               op == IBUF_OP_DELETE
03410               ? &min_n_recs
03411               : NULL, &mtr);
03412 
03413   if (op == IBUF_OP_DELETE
03414       && (min_n_recs < 2
03415     || buf_pool_watch_occurred(space, page_no))) {
03416     /* The page could become empty after the record is
03417     deleted, or the page has been read in to the buffer
03418     pool.  Refuse to buffer the operation. */
03419 
03420     /* The buffer pool watch is needed for IBUF_OP_DELETE
03421     because of latching order considerations.  We can
03422     check buf_pool_watch_occurred() only after latching
03423     the insert buffer B-tree pages that contain buffered
03424     changes for the page.  We never buffer IBUF_OP_DELETE,
03425     unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
03426     been previously buffered for the page.  Because there
03427     are buffered operations for the page, the insert
03428     buffer B-tree page latches held by mtr will guarantee
03429     that no changes for the user page will be merged
03430     before mtr_commit(&mtr).  We must not mtr_commit(&mtr)
03431     until after the IBUF_OP_DELETE has been buffered. */
03432 
03433 fail_exit:
03434     if (mode == BTR_MODIFY_TREE) {
03435       mutex_exit(&ibuf_mutex);
03436       mutex_exit(&ibuf_pessimistic_insert_mutex);
03437     }
03438 
03439     err = DB_STRONG_FAIL;
03440     goto func_exit;
03441   }
03442 
03443   /* After this point, the page could still be loaded to the
03444   buffer pool, but we do not have to care about it, since we are
03445   holding a latch on the insert buffer leaf page that contains
03446   buffered changes for (space, page_no).  If the page enters the
03447   buffer pool, buf_page_io_complete() for (space, page_no) will
03448   have to acquire a latch on the same insert buffer leaf page,
03449   which it cannot do until we have buffered the IBUF_OP_DELETE
03450   and done mtr_commit(&mtr) to release the latch. */
03451 
03452 #ifdef UNIV_IBUF_COUNT_DEBUG
03453   ut_a((buffered == 0) || ibuf_count_get(space, page_no));
03454 #endif
03455   mtr_start(&bitmap_mtr);
03456 
03457   bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
03458                  zip_size, &bitmap_mtr);
03459 
03460   /* We check if the index page is suitable for buffered entries */
03461 
03462   if (buf_page_peek(space, page_no)
03463       || lock_rec_expl_exist_on_page(space, page_no)) {
03464 
03465     goto bitmap_fail;
03466   }
03467 
03468   if (op == IBUF_OP_INSERT) {
03469     ulint bits = ibuf_bitmap_page_get_bits(
03470       bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
03471       &bitmap_mtr);
03472 
03473     if (buffered + entry_size + page_dir_calc_reserved_space(1)
03474         > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
03475       /* Release the bitmap page latch early. */
03476       mtr_commit(&bitmap_mtr);
03477 
03478       /* It may not fit */
03479       do_merge = TRUE;
03480 
03481       ibuf_get_merge_page_nos(
03482         FALSE, btr_pcur_get_rec(&pcur),
03483         space_ids, space_versions,
03484         page_nos, &n_stored);
03485 
03486       goto fail_exit;
03487     }
03488   }
03489 
03490   /* Patch correct counter value to the entry to insert. This can
03491   change the insert position, which can result in the need to abort in
03492   some cases. */
03493   if (!no_counter
03494       && !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
03495                mode == BTR_MODIFY_PREV, &mtr)) {
03496 bitmap_fail:
03497     mtr_commit(&bitmap_mtr);
03498 
03499     goto fail_exit;
03500   }
03501 
03502   /* Set the bitmap bit denoting that the insert buffer contains
03503   buffered entries for this index page, if the bit is not set yet */
03504 
03505   old_bit_value = ibuf_bitmap_page_get_bits(
03506     bitmap_page, page_no, zip_size,
03507     IBUF_BITMAP_BUFFERED, &bitmap_mtr);
03508 
03509   if (!old_bit_value) {
03510     ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
03511             IBUF_BITMAP_BUFFERED, TRUE,
03512             &bitmap_mtr);
03513   }
03514 
03515   mtr_commit(&bitmap_mtr);
03516 
03517   cursor = btr_pcur_get_btr_cur(&pcur);
03518 
03519   if (mode == BTR_MODIFY_PREV) {
03520     err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
03521             ibuf_entry, &ins_rec,
03522             &dummy_big_rec, 0, thr, &mtr);
03523     block = btr_cur_get_block(cursor);
03524     ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
03525 
03526     /* If this is the root page, update ibuf->empty. */
03527     if (UNIV_UNLIKELY(buf_block_get_page_no(block)
03528           == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
03529       const page_t* page_root = buf_block_get_frame(block);
03530 
03531       ut_ad(page_get_space_id(page_root) == IBUF_SPACE_ID);
03532       ut_ad(page_get_page_no(page_root)
03533             == FSP_IBUF_TREE_ROOT_PAGE_NO);
03534 
03535       ibuf->empty = (page_get_n_recs(page_root) == 0);
03536     }
03537   } else {
03538     ut_ad(mode == BTR_MODIFY_TREE);
03539 
03540     /* We acquire an x-latch to the root page before the insert,
03541     because a pessimistic insert releases the tree x-latch,
03542     which would cause the x-latching of the root after that to
03543     break the latching order. */
03544 
03545     root = ibuf_tree_root_get(&mtr);
03546 
03547     err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
03548              | BTR_NO_UNDO_LOG_FLAG,
03549              cursor,
03550              ibuf_entry, &ins_rec,
03551              &dummy_big_rec, 0, thr, &mtr);
03552     mutex_exit(&ibuf_pessimistic_insert_mutex);
03553     ibuf_size_update(root, &mtr);
03554     mutex_exit(&ibuf_mutex);
03555     ibuf->empty = (page_get_n_recs(root) == 0);
03556 
03557     block = btr_cur_get_block(cursor);
03558     ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
03559   }
03560 
03561   if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
03562     /* Update the page max trx id field */
03563     page_update_max_trx_id(block, NULL,
03564                thr_get_trx(thr)->id, &mtr);
03565   }
03566 
03567 func_exit:
03568 #ifdef UNIV_IBUF_COUNT_DEBUG
03569   if (err == DB_SUCCESS) {
03570     fprintf(stderr,
03571       "Incrementing ibuf count of space %lu page %lu\n"
03572       "from %lu by 1\n", space, page_no,
03573       ibuf_count_get(space, page_no));
03574 
03575     ibuf_count_set(space, page_no,
03576              ibuf_count_get(space, page_no) + 1);
03577   }
03578 #endif
03579 
03580   mtr_commit(&mtr);
03581   btr_pcur_close(&pcur);
03582   ibuf_exit();
03583 
03584   mem_heap_free(heap);
03585 
03586   if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
03587     ibuf_contract_after_insert(entry_size);
03588   }
03589 
03590   if (do_merge) {
03591 #ifdef UNIV_IBUF_DEBUG
03592     ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
03593 #endif
03594     buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
03595             page_nos, n_stored);
03596   }
03597 
03598   return(err);
03599 }
03600 
03601 /*********************************************************************/
03606 UNIV_INTERN
03607 ibool
03608 ibuf_insert(
03609 /*========*/
03610   ibuf_op_t op, 
03611   const dtuple_t* entry,  
03612   dict_index_t* index,  
03613   ulint   space,  
03614   ulint   zip_size,
03615   ulint   page_no,
03616   que_thr_t*  thr)  
03617 {
03618   ulint   err;
03619   ulint   entry_size;
03620   ibool   no_counter;
03621   /* Read the settable global variable ibuf_use only once in
03622   this function, so that we will have a consistent view of it. */
03623   ibuf_use_t  use   = ibuf_use;
03624 
03625   ut_a(trx_sys_multiple_tablespace_format);
03626   ut_ad(dtuple_check_typed(entry));
03627   ut_ad(ut_is_2pow(zip_size));
03628 
03629   ut_a(!dict_index_is_clust(index));
03630 
03631   no_counter = use <= IBUF_USE_INSERT;
03632 
03633   switch (op) {
03634   case IBUF_OP_INSERT:
03635     switch (use) {
03636     case IBUF_USE_NONE:
03637     case IBUF_USE_DELETE:
03638     case IBUF_USE_DELETE_MARK:
03639       return(FALSE);
03640     case IBUF_USE_INSERT:
03641     case IBUF_USE_INSERT_DELETE_MARK:
03642     case IBUF_USE_ALL:
03643       goto check_watch;
03644     case IBUF_USE_COUNT:
03645       break;
03646     }
03647     break;
03648   case IBUF_OP_DELETE_MARK:
03649     switch (use) {
03650     case IBUF_USE_NONE:
03651     case IBUF_USE_INSERT:
03652       return(FALSE);
03653     case IBUF_USE_DELETE_MARK:
03654     case IBUF_USE_DELETE:
03655     case IBUF_USE_INSERT_DELETE_MARK:
03656     case IBUF_USE_ALL:
03657       ut_ad(!no_counter);
03658       goto check_watch;
03659     case IBUF_USE_COUNT:
03660       break;
03661     }
03662     break;
03663   case IBUF_OP_DELETE:
03664     switch (use) {
03665     case IBUF_USE_NONE:
03666     case IBUF_USE_INSERT:
03667     case IBUF_USE_INSERT_DELETE_MARK:
03668       return(FALSE);
03669     case IBUF_USE_DELETE_MARK:
03670     case IBUF_USE_DELETE:
03671     case IBUF_USE_ALL:
03672       ut_ad(!no_counter);
03673       goto skip_watch;
03674     case IBUF_USE_COUNT:
03675       break;
03676     }
03677     break;
03678   case IBUF_OP_COUNT:
03679     break;
03680   }
03681 
03682   /* unknown op or use */
03683   ut_error;
03684 
03685 check_watch:
03686   /* If a thread attempts to buffer an insert on a page while a
03687   purge is in progress on the same page, the purge must not be
03688   buffered, because it could remove a record that was
03689   re-inserted later.  For simplicity, we block the buffering of
03690   all operations on a page that has a purge pending.
03691 
03692   We do not check this in the IBUF_OP_DELETE case, because that
03693   would always trigger the buffer pool watch during purge and
03694   thus prevent the buffering of delete operations.  We assume
03695   that the issuer of IBUF_OP_DELETE has called
03696   buf_pool_watch_set(space, page_no). */
03697 
03698   {
03699     buf_page_t* bpage;
03700     ulint   fold = buf_page_address_fold(space, page_no);
03701     buf_pool_t* buf_pool = buf_pool_get(space, page_no);
03702 
03703     buf_pool_mutex_enter(buf_pool);
03704     bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
03705     buf_pool_mutex_exit(buf_pool);
03706 
03707     if (UNIV_LIKELY_NULL(bpage)) {
03708       /* A buffer pool watch has been set or the
03709       page has been read into the buffer pool.
03710       Do not buffer the request.  If a purge operation
03711       is being buffered, have this request executed
03712       directly on the page in the buffer pool after the
03713       buffered entries for this page have been merged. */
03714       return(FALSE);
03715     }
03716   }
03717 
03718 skip_watch:
03719   entry_size = rec_get_converted_size(index, entry, 0);
03720 
03721   if (entry_size
03722       >= page_get_free_space_of_empty(dict_table_is_comp(index->table))
03723       / 2) {
03724 
03725     return(FALSE);
03726   }
03727 
03728   err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
03729             entry, entry_size,
03730             index, space, zip_size, page_no, thr);
03731   if (err == DB_FAIL) {
03732     err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
03733               entry, entry_size,
03734               index, space, zip_size, page_no, thr);
03735   }
03736 
03737   if (err == DB_SUCCESS) {
03738 #ifdef UNIV_IBUF_DEBUG
03739     /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
03740     page_no, index->name); */
03741 #endif
03742     return(TRUE);
03743 
03744   } else {
03745     ut_a(err == DB_STRONG_FAIL);
03746 
03747     return(FALSE);
03748   }
03749 }
03750 
03751 /********************************************************************/
03754 static
03755 void
03756 ibuf_insert_to_index_page_low(
03757 /*==========================*/
03758   const dtuple_t* entry,  
03759   buf_block_t*  block,  
03761   dict_index_t* index,  
03762   mtr_t*    mtr,  
03763   page_cur_t* page_cur)
03765 {
03766   const page_t* page;
03767   ulint   space;
03768   ulint   page_no;
03769   ulint   zip_size;
03770   const page_t* bitmap_page;
03771   ulint   old_bits;
03772 
03773   if (UNIV_LIKELY
03774       (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
03775     return;
03776   }
03777 
03778   /* If the record did not fit, reorganize */
03779 
03780   btr_page_reorganize(block, index, mtr);
03781   page_cur_search(block, index, entry, PAGE_CUR_LE, page_cur);
03782 
03783   /* This time the record must fit */
03784 
03785   if (UNIV_LIKELY
03786       (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
03787     return;
03788   }
03789 
03790   page = buf_block_get_frame(block);
03791 
03792   ut_print_timestamp(stderr);
03793 
03794   fprintf(stderr,
03795     "  InnoDB: Error: Insert buffer insert fails;"
03796     " page free %lu, dtuple size %lu\n",
03797     (ulong) page_get_max_insert_size(page, 1),
03798     (ulong) rec_get_converted_size(index, entry, 0));
03799   fputs("InnoDB: Cannot insert index record ", stderr);
03800   dtuple_print(stderr, entry);
03801   fputs("\nInnoDB: The table where this index record belongs\n"
03802         "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
03803         "InnoDB: that table.\n", stderr);
03804 
03805   space = page_get_space_id(page);
03806   zip_size = buf_block_get_zip_size(block);
03807   page_no = page_get_page_no(page);
03808 
03809   bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
03810   old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
03811                IBUF_BITMAP_FREE, mtr);
03812 
03813   fprintf(stderr,
03814     "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
03815     (ulong) space, (ulong) page_no,
03816     (ulong) zip_size, (ulong) old_bits);
03817 
03818   fputs("InnoDB: Submit a detailed bug report"
03819         " to http://bugs.mysql.com\n", stderr);
03820 }
03821 
03822 /************************************************************************
03823 During merge, inserts to an index page a secondary index entry extracted
03824 from the insert buffer. */
03825 static
03826 void
03827 ibuf_insert_to_index_page(
03828 /*======================*/
03829   const dtuple_t* entry,  
03830   buf_block_t*  block,  
03832   dict_index_t* index,  
03833   mtr_t*    mtr)  
03834 {
03835   page_cur_t  page_cur;
03836   ulint   low_match;
03837   page_t*   page    = buf_block_get_frame(block);
03838   rec_t*    rec;
03839 
03840   ut_ad(ibuf_inside());
03841   ut_ad(dtuple_check_typed(entry));
03842   ut_ad(!buf_block_align(page)->is_hashed);
03843 
03844   if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
03845         != (ibool)!!page_is_comp(page))) {
03846     fputs("InnoDB: Trying to insert a record from"
03847           " the insert buffer to an index page\n"
03848           "InnoDB: but the 'compact' flag does not match!\n",
03849           stderr);
03850     goto dump;
03851   }
03852 
03853   rec = page_rec_get_next(page_get_infimum_rec(page));
03854 
03855   if (page_rec_is_supremum(rec)) {
03856     fputs("InnoDB: Trying to insert a record from"
03857           " the insert buffer to an index page\n"
03858           "InnoDB: but the index page is empty!\n",
03859           stderr);
03860     goto dump;
03861   }
03862 
03863   if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
03864         != dtuple_get_n_fields(entry))) {
03865     fputs("InnoDB: Trying to insert a record from"
03866           " the insert buffer to an index page\n"
03867           "InnoDB: but the number of fields does not match!\n",
03868           stderr);
03869 dump:
03870     buf_page_print(page, 0);
03871 
03872     dtuple_print(stderr, entry);
03873 
03874     fputs("InnoDB: The table where where"
03875           " this index record belongs\n"
03876           "InnoDB: is now probably corrupt."
03877           " Please run CHECK TABLE on\n"
03878           "InnoDB: your tables.\n"
03879           "InnoDB: Submit a detailed bug report to"
03880           " http://bugs.mysql.com!\n", stderr);
03881 
03882     return;
03883   }
03884 
03885   low_match = page_cur_search(block, index, entry,
03886             PAGE_CUR_LE, &page_cur);
03887 
03888   if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
03889     mem_heap_t* heap;
03890     upd_t*    update;
03891     ulint*    offsets;
03892     page_zip_des_t* page_zip;
03893 
03894     rec = page_cur_get_rec(&page_cur);
03895 
03896     /* This is based on
03897     row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
03898     ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
03899 
03900     heap = mem_heap_create(1024);
03901 
03902     offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
03903             &heap);
03904     update = row_upd_build_sec_rec_difference_binary(
03905       index, entry, rec, NULL, heap);
03906 
03907     page_zip = buf_block_get_page_zip(block);
03908 
03909     if (update->n_fields == 0) {
03910       /* The records only differ in the delete-mark.
03911       Clear the delete-mark, like we did before
03912       Bug #56680 was fixed. */
03913       btr_cur_set_deleted_flag_for_ibuf(
03914         rec, page_zip, FALSE, mtr);
03915 updated_in_place:
03916       mem_heap_free(heap);
03917       return;
03918     }
03919 
03920     /* Copy the info bits. Clear the delete-mark. */
03921     update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
03922     update->info_bits &= ~REC_INFO_DELETED_FLAG;
03923 
03924     /* We cannot invoke btr_cur_optimistic_update() here,
03925     because we do not have a btr_cur_t or que_thr_t,
03926     as the insert buffer merge occurs at a very low level. */
03927     if (!row_upd_changes_field_size_or_external(index, offsets,
03928                   update)
03929         && (!page_zip || btr_cur_update_alloc_zip(
03930         page_zip, block, index,
03931         rec_offs_size(offsets), FALSE, mtr))) {
03932       /* This is the easy case. Do something similar
03933       to btr_cur_update_in_place(). */
03934       row_upd_rec_in_place(rec, index, offsets,
03935                update, page_zip);
03936       goto updated_in_place;
03937     }
03938 
03939     /* A collation may identify values that differ in
03940     storage length.
03941     Some examples (1 or 2 bytes):
03942     utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
03943     utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
03944     utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
03945 
03946     latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
03947 
03948     Examples of a character (3-byte UTF-8 sequence)
03949     identified with 2 or 4 characters (1-byte UTF-8 sequences):
03950 
03951     utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
03952     utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
03953     */
03954 
03955     /* Delete the different-length record, and insert the
03956     buffered one. */
03957 
03958     lock_rec_store_on_page_infimum(block, rec);
03959     page_cur_delete_rec(&page_cur, index, offsets, mtr);
03960     page_cur_move_to_prev(&page_cur);
03961     mem_heap_free(heap);
03962 
03963     ibuf_insert_to_index_page_low(entry, block, index, mtr,
03964                 &page_cur);
03965     lock_rec_restore_from_page_infimum(block, rec, block);
03966   } else {
03967     ibuf_insert_to_index_page_low(entry, block, index, mtr,
03968                 &page_cur);
03969   }
03970 }
03971 
03972 /****************************************************************/
03975 static
03976 void
03977 ibuf_set_del_mark(
03978 /*==============*/
03979   const dtuple_t*   entry,  
03980   buf_block_t*    block,  
03981   const dict_index_t* index,  
03982   mtr_t*      mtr)  
03983 {
03984   page_cur_t  page_cur;
03985   ulint   low_match;
03986 
03987   ut_ad(ibuf_inside());
03988   ut_ad(dtuple_check_typed(entry));
03989 
03990   low_match = page_cur_search(
03991     block, index, entry, PAGE_CUR_LE, &page_cur);
03992 
03993   if (low_match == dtuple_get_n_fields(entry)) {
03994     rec_t*    rec;
03995     page_zip_des_t* page_zip;
03996 
03997     rec = page_cur_get_rec(&page_cur);
03998     page_zip = page_cur_get_page_zip(&page_cur);
03999 
04000     /* Delete mark the old index record. According to a
04001     comment in row_upd_sec_index_entry(), it can already
04002     have been delete marked if a lock wait occurred in
04003     row_ins_index_entry() in a previous invocation of
04004     row_upd_sec_index_entry(). */
04005 
04006     if (UNIV_LIKELY
04007         (!rec_get_deleted_flag(
04008           rec, dict_table_is_comp(index->table)))) {
04009       btr_cur_set_deleted_flag_for_ibuf(rec, page_zip,
04010                 TRUE, mtr);
04011     }
04012   } else {
04013     ut_print_timestamp(stderr);
04014     fputs("  InnoDB: unable to find a record to delete-mark\n",
04015           stderr);
04016     fputs("InnoDB: tuple ", stderr);
04017     dtuple_print(stderr, entry);
04018     fputs("\n"
04019           "InnoDB: record ", stderr);
04020     rec_print(stderr, page_cur_get_rec(&page_cur), index);
04021     putc('\n', stderr);
04022     fputs("\n"
04023           "InnoDB: Submit a detailed bug report"
04024           " to http://bugs.mysql.com\n", stderr);
04025     ut_ad(0);
04026   }
04027 }
04028 
04029 /****************************************************************/
04031 static
04032 void
04033 ibuf_delete(
04034 /*========*/
04035   const dtuple_t* entry,  
04036   buf_block_t*  block,  
04037   dict_index_t* index,  
04038   mtr_t*    mtr)  
04040 {
04041   page_cur_t  page_cur;
04042   ulint   low_match;
04043 
04044   ut_ad(ibuf_inside());
04045   ut_ad(dtuple_check_typed(entry));
04046 
04047   low_match = page_cur_search(
04048     block, index, entry, PAGE_CUR_LE, &page_cur);
04049 
04050   if (low_match == dtuple_get_n_fields(entry)) {
04051     page_zip_des_t* page_zip= buf_block_get_page_zip(block);
04052     page_t*   page  = buf_block_get_frame(block);
04053     rec_t*    rec = page_cur_get_rec(&page_cur);
04054 
04055     /* TODO: the below should probably be a separate function,
04056     it's a bastardized version of btr_cur_optimistic_delete. */
04057 
04058     ulint   offsets_[REC_OFFS_NORMAL_SIZE];
04059     ulint*    offsets = offsets_;
04060     mem_heap_t* heap = NULL;
04061     ulint   max_ins_size;
04062 
04063     rec_offs_init(offsets_);
04064 
04065     offsets = rec_get_offsets(
04066       rec, index, offsets, ULINT_UNDEFINED, &heap);
04067 
04068     /* Refuse to delete the last record. */
04069     ut_a(page_get_n_recs(page) > 1);
04070 
04071     /* The record should have been marked for deletion. */
04072     ut_ad(REC_INFO_DELETED_FLAG
04073           & rec_get_info_bits(rec, page_is_comp(page)));
04074 
04075     lock_update_delete(block, rec);
04076 
04077     if (!page_zip) {
04078       max_ins_size
04079         = page_get_max_insert_size_after_reorganize(
04080           page, 1);
04081     }
04082 #ifdef UNIV_ZIP_DEBUG
04083     ut_a(!page_zip || page_zip_validate(page_zip, page));
04084 #endif /* UNIV_ZIP_DEBUG */
04085     page_cur_delete_rec(&page_cur, index, offsets, mtr);
04086 #ifdef UNIV_ZIP_DEBUG
04087     ut_a(!page_zip || page_zip_validate(page_zip, page));
04088 #endif /* UNIV_ZIP_DEBUG */
04089 
04090     if (page_zip) {
04091       ibuf_update_free_bits_zip(block, mtr);
04092     } else {
04093       ibuf_update_free_bits_low(block, max_ins_size, mtr);
04094     }
04095 
04096     if (UNIV_LIKELY_NULL(heap)) {
04097       mem_heap_free(heap);
04098     }
04099   } else {
04100     /* The record must have been purged already. */
04101   }
04102 }
04103 
04104 /*********************************************************************/
04107 static __attribute__((nonnull))
04108 ibool
04109 ibuf_restore_pos(
04110 /*=============*/
04111   ulint   space,  
04112   ulint   page_no,
04114   const dtuple_t* search_tuple,
04116   ulint   mode, 
04117   btr_pcur_t* pcur, 
04119   mtr_t*    mtr)  
04120 {
04121   ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
04122 
04123   if (btr_pcur_restore_position(mode, pcur, mtr)) {
04124 
04125     return(TRUE);
04126   }
04127 
04128   if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
04129     /* The tablespace has been dropped.  It is possible
04130     that another thread has deleted the insert buffer
04131     entry.  Do not complain. */
04132     btr_pcur_commit_specify_mtr(pcur, mtr);
04133   } else {
04134     fprintf(stderr,
04135       "InnoDB: ERROR: Submit the output to"
04136       " http://bugs.mysql.com\n"
04137       "InnoDB: ibuf cursor restoration fails!\n"
04138       "InnoDB: ibuf record inserted to page %lu:%lu\n",
04139       (ulong) space, (ulong) page_no);
04140     fflush(stderr);
04141 
04142     rec_print_old(stderr, btr_pcur_get_rec(pcur));
04143     rec_print_old(stderr, pcur->old_rec);
04144     dtuple_print(stderr, search_tuple);
04145 
04146     rec_print_old(stderr,
04147             page_rec_get_next(btr_pcur_get_rec(pcur)));
04148     fflush(stderr);
04149 
04150     btr_pcur_commit_specify_mtr(pcur, mtr);
04151 
04152     fputs("InnoDB: Validating insert buffer tree:\n", stderr);
04153     if (!btr_validate_index(ibuf->index, NULL)) {
04154       ut_error;
04155     }
04156 
04157     fprintf(stderr, "InnoDB: ibuf tree ok\n");
04158     fflush(stderr);
04159   }
04160 
04161   return(FALSE);
04162 }
04163 
04164 /*********************************************************************/
04169 static
04170 ibool
04171 ibuf_delete_rec(
04172 /*============*/
04173   ulint   space,  
04174   ulint   page_no,
04176   btr_pcur_t* pcur, 
04178   const dtuple_t* search_tuple,
04180   mtr_t*    mtr)  
04181 {
04182   ibool   success;
04183   page_t*   root;
04184   ulint   err;
04185 
04186   ut_ad(ibuf_inside());
04187   ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
04188   ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
04189   ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
04190 
04191   success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
04192 
04193   if (success) {
04194     if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
04195       /* If a B-tree page is empty, it must be the root page
04196       and the whole B-tree must be empty. InnoDB does not
04197       allow empty B-tree pages other than the root. */
04198       root = btr_pcur_get_page(pcur);
04199 
04200       ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
04201       ut_ad(page_get_page_no(root)
04202             == FSP_IBUF_TREE_ROOT_PAGE_NO);
04203 
04204       /* ibuf->empty is protected by the root page latch.
04205       Before the deletion, it had to be FALSE. */
04206       ut_ad(!ibuf->empty);
04207       ibuf->empty = TRUE;
04208     }
04209 
04210 #ifdef UNIV_IBUF_COUNT_DEBUG
04211     fprintf(stderr,
04212       "Decrementing ibuf count of space %lu page %lu\n"
04213       "from %lu by 1\n", space, page_no,
04214       ibuf_count_get(space, page_no));
04215     ibuf_count_set(space, page_no,
04216              ibuf_count_get(space, page_no) - 1);
04217 #endif
04218     return(FALSE);
04219   }
04220 
04221   ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
04222   ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
04223   ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
04224 
04225   /* We have to resort to a pessimistic delete from ibuf */
04226   btr_pcur_store_position(pcur, mtr);
04227 
04228   btr_pcur_commit_specify_mtr(pcur, mtr);
04229 
04230   mutex_enter(&ibuf_mutex);
04231 
04232   mtr_start(mtr);
04233 
04234   if (!ibuf_restore_pos(space, page_no, search_tuple,
04235             BTR_MODIFY_TREE, pcur, mtr)) {
04236 
04237     mutex_exit(&ibuf_mutex);
04238     goto func_exit;
04239   }
04240 
04241   root = ibuf_tree_root_get(mtr);
04242 
04243   btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
04244            RB_NONE, mtr);
04245   ut_a(err == DB_SUCCESS);
04246 
04247 #ifdef UNIV_IBUF_COUNT_DEBUG
04248   ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
04249 #endif
04250   ibuf_size_update(root, mtr);
04251   mutex_exit(&ibuf_mutex);
04252 
04253   ibuf->empty = (page_get_n_recs(root) == 0);
04254   btr_pcur_commit_specify_mtr(pcur, mtr);
04255 
04256 func_exit:
04257   btr_pcur_close(pcur);
04258 
04259   return(TRUE);
04260 }
04261 
04262 /*********************************************************************/
04269 UNIV_INTERN
04270 void
04271 ibuf_merge_or_delete_for_page(
04272 /*==========================*/
04273   buf_block_t*  block,  
04276   ulint   space,  
04277   ulint   page_no,
04278   ulint   zip_size,
04280   ibool   update_ibuf_bitmap)
04285 {
04286   mem_heap_t* heap;
04287   btr_pcur_t  pcur;
04288   dtuple_t* search_tuple;
04289 #ifdef UNIV_IBUF_DEBUG
04290   ulint   volume      = 0;
04291 #endif
04292   page_zip_des_t* page_zip    = NULL;
04293   ibool   tablespace_being_deleted = FALSE;
04294   ibool   corruption_noticed  = FALSE;
04295   mtr_t   mtr;
04296 
04297   /* Counts for merged & discarded operations. */
04298   ulint   mops[IBUF_OP_COUNT];
04299   ulint   dops[IBUF_OP_COUNT];
04300 
04301   ut_ad(!block || buf_block_get_space(block) == space);
04302   ut_ad(!block || buf_block_get_page_no(block) == page_no);
04303   ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
04304 
04305   if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
04306       || trx_sys_hdr_page(space, page_no)) {
04307     return;
04308   }
04309 
04310   /* We cannot refer to zip_size in the following, because
04311   zip_size is passed as ULINT_UNDEFINED (it is unknown) when
04312   buf_read_ibuf_merge_pages() is merging (discarding) changes
04313   for a dropped tablespace.  When block != NULL or
04314   update_ibuf_bitmap is specified, the zip_size must be known.
04315   That is why we will repeat the check below, with zip_size in
04316   place of 0.  Passing zip_size as 0 assumes that the
04317   uncompressed page size always is a power-of-2 multiple of the
04318   compressed page size. */
04319 
04320   if (ibuf_fixed_addr_page(space, 0, page_no)
04321       || fsp_descr_page(0, page_no)) {
04322     return;
04323   }
04324 
04325   if (UNIV_LIKELY(update_ibuf_bitmap)) {
04326     ut_a(ut_is_2pow(zip_size));
04327 
04328     if (ibuf_fixed_addr_page(space, zip_size, page_no)
04329         || fsp_descr_page(zip_size, page_no)) {
04330       return;
04331     }
04332 
04333     /* If the following returns FALSE, we get the counter
04334     incremented, and must decrement it when we leave this
04335     function. When the counter is > 0, that prevents tablespace
04336     from being dropped. */
04337 
04338     tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
04339 
04340     if (UNIV_UNLIKELY(tablespace_being_deleted)) {
04341       /* Do not try to read the bitmap page from space;
04342       just delete the ibuf records for the page */
04343 
04344       block = NULL;
04345       update_ibuf_bitmap = FALSE;
04346     } else {
04347       page_t* bitmap_page;
04348 
04349       mtr_start(&mtr);
04350 
04351       bitmap_page = ibuf_bitmap_get_map_page(
04352         space, page_no, zip_size, &mtr);
04353 
04354       if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
04355                    zip_size,
04356                    IBUF_BITMAP_BUFFERED,
04357                    &mtr)) {
04358         /* No inserts buffered for this page */
04359         mtr_commit(&mtr);
04360 
04361         if (!tablespace_being_deleted) {
04362           fil_decr_pending_ibuf_merges(space);
04363         }
04364 
04365         return;
04366       }
04367       mtr_commit(&mtr);
04368     }
04369   } else if (block
04370        && (ibuf_fixed_addr_page(space, zip_size, page_no)
04371           || fsp_descr_page(zip_size, page_no))) {
04372 
04373     return;
04374   }
04375 
04376   ibuf_enter();
04377 
04378   heap = mem_heap_create(512);
04379 
04380   if (!trx_sys_multiple_tablespace_format) {
04381     ut_a(trx_doublewrite_must_reset_space_ids);
04382     search_tuple = ibuf_search_tuple_build(space, page_no, heap);
04383   } else {
04384     search_tuple = ibuf_new_search_tuple_build(space, page_no,
04385                  heap);
04386   }
04387 
04388   if (block) {
04389     /* Move the ownership of the x-latch on the page to this OS
04390     thread, so that we can acquire a second x-latch on it. This
04391     is needed for the insert operations to the index page to pass
04392     the debug checks. */
04393 
04394     rw_lock_x_lock_move_ownership(&(block->lock));
04395     page_zip = buf_block_get_page_zip(block);
04396 
04397     if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
04398           != FIL_PAGE_INDEX)
04399         || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
04400 
04401       page_t* bitmap_page;
04402 
04403       corruption_noticed = TRUE;
04404 
04405       ut_print_timestamp(stderr);
04406 
04407       mtr_start(&mtr);
04408 
04409       fputs("  InnoDB: Dump of the ibuf bitmap page:\n",
04410             stderr);
04411 
04412       bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
04413                      zip_size, &mtr);
04414       buf_page_print(bitmap_page, 0);
04415 
04416       mtr_commit(&mtr);
04417 
04418       fputs("\nInnoDB: Dump of the page:\n", stderr);
04419 
04420       buf_page_print(block->frame, 0);
04421 
04422       fprintf(stderr,
04423         "InnoDB: Error: corruption in the tablespace."
04424         " Bitmap shows insert\n"
04425         "InnoDB: buffer records to page n:o %lu"
04426         " though the page\n"
04427         "InnoDB: type is %lu, which is"
04428         " not an index leaf page!\n"
04429         "InnoDB: We try to resolve the problem"
04430         " by skipping the insert buffer\n"
04431         "InnoDB: merge for this page."
04432         " Please run CHECK TABLE on your tables\n"
04433         "InnoDB: to determine if they are corrupt"
04434         " after this.\n\n"
04435         "InnoDB: Please submit a detailed bug report"
04436         " to http://bugs.mysql.com\n\n",
04437         (ulong) page_no,
04438         (ulong)
04439         fil_page_get_type(block->frame));
04440     }
04441   }
04442 
04443   memset(mops, 0, sizeof(mops));
04444   memset(dops, 0, sizeof(dops));
04445 
04446 loop:
04447   mtr_start(&mtr);
04448 
04449   if (block) {
04450     ibool success;
04451 
04452     success = buf_page_get_known_nowait(
04453       RW_X_LATCH, block,
04454       BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
04455 
04456     ut_a(success);
04457 
04458     buf_block_dbg_add_level(block, SYNC_TREE_NODE);
04459   }
04460 
04461   /* Position pcur in the insert buffer at the first entry for this
04462   index page */
04463   btr_pcur_open_on_user_rec(
04464     ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
04465     &pcur, &mtr);
04466 
04467   if (!btr_pcur_is_on_user_rec(&pcur)) {
04468     ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
04469 
04470     goto reset_bit;
04471   }
04472 
04473   for (;;) {
04474     rec_t*  rec;
04475 
04476     ut_ad(btr_pcur_is_on_user_rec(&pcur));
04477 
04478     rec = btr_pcur_get_rec(&pcur);
04479 
04480     /* Check if the entry is for this index page */
04481     if (ibuf_rec_get_page_no(rec) != page_no
04482         || ibuf_rec_get_space(rec) != space) {
04483 
04484       if (block) {
04485         page_header_reset_last_insert(
04486           block->frame, page_zip, &mtr);
04487       }
04488 
04489       goto reset_bit;
04490     }
04491 
04492     if (UNIV_UNLIKELY(corruption_noticed)) {
04493       fputs("InnoDB: Discarding record\n ", stderr);
04494       rec_print_old(stderr, rec);
04495       fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
04496     } else if (block) {
04497       /* Now we have at pcur a record which should be
04498       applied on the index page; NOTE that the call below
04499       copies pointers to fields in rec, and we must
04500       keep the latch to the rec page until the
04501       insertion is finished! */
04502       dtuple_t* entry;
04503       trx_id_t  max_trx_id;
04504       dict_index_t* dummy_index;
04505       ibuf_op_t op = ibuf_rec_get_op_type(rec);
04506 
04507       max_trx_id = page_get_max_trx_id(page_align(rec));
04508       page_update_max_trx_id(block, page_zip, max_trx_id,
04509                  &mtr);
04510 
04511       ut_ad(page_validate(page_align(rec), ibuf->index));
04512 
04513       entry = ibuf_build_entry_from_ibuf_rec(
04514         rec, heap, &dummy_index);
04515 
04516       ut_ad(page_validate(block->frame, dummy_index));
04517 
04518       switch (op) {
04519         ibool success;
04520       case IBUF_OP_INSERT:
04521 #ifdef UNIV_IBUF_DEBUG
04522         volume += rec_get_converted_size(
04523           dummy_index, entry, 0);
04524 
04525         volume += page_dir_calc_reserved_space(1);
04526 
04527         ut_a(volume <= 4 * UNIV_PAGE_SIZE
04528           / IBUF_PAGE_SIZE_PER_FREE_SPACE);
04529 #endif
04530         ibuf_insert_to_index_page(
04531           entry, block, dummy_index, &mtr);
04532         break;
04533 
04534       case IBUF_OP_DELETE_MARK:
04535         ibuf_set_del_mark(
04536           entry, block, dummy_index, &mtr);
04537         break;
04538 
04539       case IBUF_OP_DELETE:
04540         ibuf_delete(entry, block, dummy_index, &mtr);
04541         /* Because ibuf_delete() will latch an
04542         insert buffer bitmap page, commit mtr
04543         before latching any further pages.
04544         Store and restore the cursor position. */
04545         ut_ad(rec == btr_pcur_get_rec(&pcur));
04546         ut_ad(page_rec_is_user_rec(rec));
04547         ut_ad(ibuf_rec_get_page_no(rec) == page_no);
04548         ut_ad(ibuf_rec_get_space(rec) == space);
04549 
04550         btr_pcur_store_position(&pcur, &mtr);
04551         btr_pcur_commit_specify_mtr(&pcur, &mtr);
04552 
04553         mtr_start(&mtr);
04554 
04555         success = buf_page_get_known_nowait(
04556           RW_X_LATCH, block,
04557           BUF_KEEP_OLD,
04558           __FILE__, __LINE__, &mtr);
04559         ut_a(success);
04560 
04561         buf_block_dbg_add_level(block, SYNC_TREE_NODE);
04562 
04563         if (!ibuf_restore_pos(space, page_no,
04564                   search_tuple,
04565                   BTR_MODIFY_LEAF,
04566                   &pcur, &mtr)) {
04567 
04568           mtr_commit(&mtr);
04569           mops[op]++;
04570           ibuf_dummy_index_free(dummy_index);
04571           goto loop;
04572         }
04573 
04574         break;
04575       default:
04576         ut_error;
04577       }
04578 
04579       mops[op]++;
04580 
04581       ibuf_dummy_index_free(dummy_index);
04582     } else {
04583       dops[ibuf_rec_get_op_type(rec)]++;
04584     }
04585 
04586     /* Delete the record from ibuf */
04587     if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
04588             &mtr)) {
04589       /* Deletion was pessimistic and mtr was committed:
04590       we start from the beginning again */
04591 
04592       goto loop;
04593     } else if (btr_pcur_is_after_last_on_page(&pcur)) {
04594       mtr_commit(&mtr);
04595       btr_pcur_close(&pcur);
04596 
04597       goto loop;
04598     }
04599   }
04600 
04601 reset_bit:
04602   if (UNIV_LIKELY(update_ibuf_bitmap)) {
04603     page_t* bitmap_page;
04604 
04605     bitmap_page = ibuf_bitmap_get_map_page(
04606       space, page_no, zip_size, &mtr);
04607 
04608     ibuf_bitmap_page_set_bits(
04609       bitmap_page, page_no, zip_size,
04610       IBUF_BITMAP_BUFFERED, FALSE, &mtr);
04611 
04612     if (block) {
04613       ulint old_bits = ibuf_bitmap_page_get_bits(
04614         bitmap_page, page_no, zip_size,
04615         IBUF_BITMAP_FREE, &mtr);
04616 
04617       ulint new_bits = ibuf_index_page_calc_free(
04618         zip_size, block);
04619 
04620       if (old_bits != new_bits) {
04621         ibuf_bitmap_page_set_bits(
04622           bitmap_page, page_no, zip_size,
04623           IBUF_BITMAP_FREE, new_bits, &mtr);
04624       }
04625     }
04626   }
04627 
04628   mtr_commit(&mtr);
04629   btr_pcur_close(&pcur);
04630   mem_heap_free(heap);
04631 
04632 #ifdef HAVE_ATOMIC_BUILTINS
04633   os_atomic_increment_ulint(&ibuf->n_merges, 1);
04634   ibuf_add_ops(ibuf->n_merged_ops, mops);
04635   ibuf_add_ops(ibuf->n_discarded_ops, dops);
04636 #else /* HAVE_ATOMIC_BUILTINS */
04637   /* Protect our statistics keeping from race conditions */
04638   mutex_enter(&ibuf_mutex);
04639 
04640   ibuf->n_merges++;
04641   ibuf_add_ops(ibuf->n_merged_ops, mops);
04642   ibuf_add_ops(ibuf->n_discarded_ops, dops);
04643 
04644   mutex_exit(&ibuf_mutex);
04645 #endif /* HAVE_ATOMIC_BUILTINS */
04646 
04647   if (update_ibuf_bitmap && !tablespace_being_deleted) {
04648 
04649     fil_decr_pending_ibuf_merges(space);
04650   }
04651 
04652   ibuf_exit();
04653 
04654 #ifdef UNIV_IBUF_COUNT_DEBUG
04655   ut_a(ibuf_count_get(space, page_no) == 0);
04656 #endif
04657 }
04658 
04659 /*********************************************************************/
04664 UNIV_INTERN
04665 void
04666 ibuf_delete_for_discarded_space(
04667 /*============================*/
04668   ulint space)  
04669 {
04670   mem_heap_t* heap;
04671   btr_pcur_t  pcur;
04672   dtuple_t* search_tuple;
04673   rec_t*    ibuf_rec;
04674   ulint   page_no;
04675   ibool   closed;
04676   mtr_t   mtr;
04677 
04678   /* Counts for discarded operations. */
04679   ulint   dops[IBUF_OP_COUNT];
04680 
04681   heap = mem_heap_create(512);
04682 
04683   /* Use page number 0 to build the search tuple so that we get the
04684   cursor positioned at the first entry for this space id */
04685 
04686   search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
04687 
04688   memset(dops, 0, sizeof(dops));
04689 loop:
04690   ibuf_enter();
04691 
04692   mtr_start(&mtr);
04693 
04694   /* Position pcur in the insert buffer at the first entry for the
04695   space */
04696   btr_pcur_open_on_user_rec(
04697     ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
04698     &pcur, &mtr);
04699 
04700   if (!btr_pcur_is_on_user_rec(&pcur)) {
04701     ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
04702 
04703     goto leave_loop;
04704   }
04705 
04706   for (;;) {
04707     ut_ad(btr_pcur_is_on_user_rec(&pcur));
04708 
04709     ibuf_rec = btr_pcur_get_rec(&pcur);
04710 
04711     /* Check if the entry is for this space */
04712     if (ibuf_rec_get_space(ibuf_rec) != space) {
04713 
04714       goto leave_loop;
04715     }
04716 
04717     page_no = ibuf_rec_get_page_no(ibuf_rec);
04718 
04719     dops[ibuf_rec_get_op_type(ibuf_rec)]++;
04720 
04721     /* Delete the record from ibuf */
04722     closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
04723            &mtr);
04724     if (closed) {
04725       /* Deletion was pessimistic and mtr was committed:
04726       we start from the beginning again */
04727 
04728       ibuf_exit();
04729 
04730       goto loop;
04731     }
04732 
04733     if (btr_pcur_is_after_last_on_page(&pcur)) {
04734       mtr_commit(&mtr);
04735       btr_pcur_close(&pcur);
04736 
04737       ibuf_exit();
04738 
04739       goto loop;
04740     }
04741   }
04742 
04743 leave_loop:
04744   mtr_commit(&mtr);
04745   btr_pcur_close(&pcur);
04746 
04747 #ifdef HAVE_ATOMIC_BUILTINS
04748   ibuf_add_ops(ibuf->n_discarded_ops, dops);
04749 #else /* HAVE_ATOMIC_BUILTINS */
04750   /* Protect our statistics keeping from race conditions */
04751   mutex_enter(&ibuf_mutex);
04752   ibuf_add_ops(ibuf->n_discarded_ops, dops);
04753   mutex_exit(&ibuf_mutex);
04754 #endif /* HAVE_ATOMIC_BUILTINS */
04755 
04756   ibuf_exit();
04757 
04758   mem_heap_free(heap);
04759 }
04760 
04761 /******************************************************************/
04764 UNIV_INTERN
04765 ibool
04766 ibuf_is_empty(void)
04767 /*===============*/
04768 {
04769   ibool   is_empty;
04770   const page_t* root;
04771   mtr_t   mtr;
04772 
04773   ibuf_enter();
04774   mtr_start(&mtr);
04775 
04776   mutex_enter(&ibuf_mutex);
04777   root = ibuf_tree_root_get(&mtr);
04778   mutex_exit(&ibuf_mutex);
04779 
04780   is_empty = (page_get_n_recs(root) == 0);
04781   mtr_commit(&mtr);
04782   ibuf_exit();
04783 
04784   ut_a(is_empty == ibuf->empty);
04785 
04786   return(is_empty);
04787 }
04788 
04789 /******************************************************************/
04791 UNIV_INTERN
04792 void
04793 ibuf_print(
04794 /*=======*/
04795   FILE* file) 
04796 {
04797 #ifdef UNIV_IBUF_COUNT_DEBUG
04798   ulint   i;
04799   ulint   j;
04800 #endif
04801 
04802   mutex_enter(&ibuf_mutex);
04803 
04804   fprintf(file,
04805     "Ibuf: size %lu, free list len %lu,"
04806     " seg size %lu, %lu merges\n",
04807     (ulong) ibuf->size,
04808     (ulong) ibuf->free_list_len,
04809     (ulong) ibuf->seg_size,
04810     (ulong) ibuf->n_merges);
04811 
04812   fputs("merged operations:\n ", file);
04813   ibuf_print_ops(ibuf->n_merged_ops, file);
04814 
04815   fputs("discarded operations:\n ", file);
04816   ibuf_print_ops(ibuf->n_discarded_ops, file);
04817 
04818 #ifdef UNIV_IBUF_COUNT_DEBUG
04819   for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
04820     for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
04821       ulint count = ibuf_count_get(i, j);
04822 
04823       if (count > 0) {
04824         fprintf(stderr,
04825           "Ibuf count for space/page %lu/%lu"
04826           " is %lu\n",
04827           (ulong) i, (ulong) j, (ulong) count);
04828       }
04829     }
04830   }
04831 #endif /* UNIV_IBUF_COUNT_DEBUG */
04832 
04833   mutex_exit(&ibuf_mutex);
04834 }
04835 #endif /* !UNIV_HOTBACKUP */