Drizzled Public API Documentation

buf0buf.cc

00001 /*****************************************************************************
00002 
00003 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
00004 Copyright (C) 2008, Google Inc.
00005 
00006 Portions of this file contain modifications contributed and copyrighted by
00007 Google, Inc. Those modifications are gratefully acknowledged and are described
00008 briefly in the InnoDB documentation. The contributions by Google are
00009 incorporated with their permission, and subject to the conditions contained in
00010 the file COPYING.Google.
00011 
00012 This program is free software; you can redistribute it and/or modify it under
00013 the terms of the GNU General Public License as published by the Free Software
00014 Foundation; version 2 of the License.
00015 
00016 This program is distributed in the hope that it will be useful, but WITHOUT
00017 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00018 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00019 
00020 You should have received a copy of the GNU General Public License along with
00021 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00022 St, Fifth Floor, Boston, MA 02110-1301 USA
00023 
00024 *****************************************************************************/
00025 
00026 /**************************************************/
00033 #include "buf0buf.h"
00034 
00035 #ifdef UNIV_NONINL
00036 #include "buf0buf.ic"
00037 #endif
00038 
00039 #include "mem0mem.h"
00040 #include "btr0btr.h"
00041 #include "fil0fil.h"
00042 #ifndef UNIV_HOTBACKUP
00043 #include "buf0buddy.h"
00044 #include "lock0lock.h"
00045 #include "btr0sea.h"
00046 #include "ibuf0ibuf.h"
00047 #include "trx0undo.h"
00048 #include "log0log.h"
00049 #endif /* !UNIV_HOTBACKUP */
00050 #include "srv0srv.h"
00051 #include "dict0dict.h"
00052 #include "log0recv.h"
00053 #include "page0zip.h"
00054 
00055 #include <drizzled/errmsg_print.h>
00056 
00057 /*
00058     IMPLEMENTATION OF THE BUFFER POOL
00059     =================================
00060 
00061 Performance improvement:
00062 ------------------------
00063 Thread scheduling in NT may be so slow that the OS wait mechanism should
00064 not be used even in waiting for disk reads to complete.
00065 Rather, we should put waiting query threads to the queue of
00066 waiting jobs, and let the OS thread do something useful while the i/o
00067 is processed. In this way we could remove most OS thread switches in
00068 an i/o-intensive benchmark like TPC-C.
00069 
00070 A possibility is to put a user space thread library between the database
00071 and NT. User space thread libraries might be very fast.
00072 
00073 SQL Server 7.0 can be configured to use 'fibers' which are lightweight
00074 threads in NT. These should be studied.
00075 
00076     Buffer frames and blocks
00077     ------------------------
00078 Following the terminology of Gray and Reuter, we call the memory
00079 blocks where file pages are loaded buffer frames. For each buffer
00080 frame there is a control block, or shortly, a block, in the buffer
00081 control array. The control info which does not need to be stored
00082 in the file along with the file page, resides in the control block.
00083 
00084     Buffer pool struct
00085     ------------------
00086 The buffer buf_pool contains a single mutex which protects all the
00087 control data structures of the buf_pool. The content of a buffer frame is
00088 protected by a separate read-write lock in its control block, though.
00089 These locks can be locked and unlocked without owning the buf_pool->mutex.
00090 The OS events in the buf_pool struct can be waited for without owning the
00091 buf_pool->mutex.
00092 
00093 The buf_pool->mutex is a hot-spot in main memory, causing a lot of
00094 memory bus traffic on multiprocessor systems when processors
00095 alternately access the mutex. On our Pentium, the mutex is accessed
00096 maybe every 10 microseconds. We gave up the solution to have mutexes
00097 for each control block, for instance, because it seemed to be
00098 complicated.
00099 
00100 A solution to reduce mutex contention of the buf_pool->mutex is to
00101 create a separate mutex for the page hash table. On Pentium,
00102 accessing the hash table takes 2 microseconds, about half
00103 of the total buf_pool->mutex hold time.
00104 
00105     Control blocks
00106     --------------
00107 
00108 The control block contains, for instance, the bufferfix count
00109 which is incremented when a thread wants a file page to be fixed
00110 in a buffer frame. The bufferfix operation does not lock the
00111 contents of the frame, however. For this purpose, the control
00112 block contains a read-write lock.
00113 
00114 The buffer frames have to be aligned so that the start memory
00115 address of a frame is divisible by the universal page size, which
00116 is a power of two.
00117 
00118 We intend to make the buffer buf_pool size on-line reconfigurable,
00119 that is, the buf_pool size can be changed without closing the database.
00120 Then the database administarator may adjust it to be bigger
00121 at night, for example. The control block array must
00122 contain enough control blocks for the maximum buffer buf_pool size
00123 which is used in the particular database.
00124 If the buf_pool size is cut, we exploit the virtual memory mechanism of
00125 the OS, and just refrain from using frames at high addresses. Then the OS
00126 can swap them to disk.
00127 
00128 The control blocks containing file pages are put to a hash table
00129 according to the file address of the page.
00130 We could speed up the access to an individual page by using
00131 "pointer swizzling": we could replace the page references on
00132 non-leaf index pages by direct pointers to the page, if it exists
00133 in the buf_pool. We could make a separate hash table where we could
00134 chain all the page references in non-leaf pages residing in the buf_pool,
00135 using the page reference as the hash key,
00136 and at the time of reading of a page update the pointers accordingly.
00137 Drawbacks of this solution are added complexity and,
00138 possibly, extra space required on non-leaf pages for memory pointers.
00139 A simpler solution is just to speed up the hash table mechanism
00140 in the database, using tables whose size is a power of 2.
00141 
00142     Lists of blocks
00143     ---------------
00144 
00145 There are several lists of control blocks.
00146 
00147 The free list (buf_pool->free) contains blocks which are currently not
00148 used.
00149 
00150 The common LRU list contains all the blocks holding a file page
00151 except those for which the bufferfix count is non-zero.
00152 The pages are in the LRU list roughly in the order of the last
00153 access to the page, so that the oldest pages are at the end of the
00154 list. We also keep a pointer to near the end of the LRU list,
00155 which we can use when we want to artificially age a page in the
00156 buf_pool. This is used if we know that some page is not needed
00157 again for some time: we insert the block right after the pointer,
00158 causing it to be replaced sooner than would normally be the case.
00159 Currently this aging mechanism is used for read-ahead mechanism
00160 of pages, and it can also be used when there is a scan of a full
00161 table which cannot fit in the memory. Putting the pages near the
00162 end of the LRU list, we make sure that most of the buf_pool stays
00163 in the main memory, undisturbed.
00164 
00165 The unzip_LRU list contains a subset of the common LRU list.  The
00166 blocks on the unzip_LRU list hold a compressed file page and the
00167 corresponding uncompressed page frame.  A block is in unzip_LRU if and
00168 only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
00169 holds.  The blocks in unzip_LRU will be in same order as they are in
00170 the common LRU list.  That is, each manipulation of the common LRU
00171 list will result in the same manipulation of the unzip_LRU list.
00172 
00173 The chain of modified blocks (buf_pool->flush_list) contains the blocks
00174 holding file pages that have been modified in the memory
00175 but not written to disk yet. The block with the oldest modification
00176 which has not yet been written to disk is at the end of the chain.
00177 The access to this list is protected by buf_pool->flush_list_mutex.
00178 
00179 The chain of unmodified compressed blocks (buf_pool->zip_clean)
00180 contains the control blocks (buf_page_t) of those compressed pages
00181 that are not in buf_pool->flush_list and for which no uncompressed
00182 page has been allocated in the buffer pool.  The control blocks for
00183 uncompressed pages are accessible via buf_block_t objects that are
00184 reachable via buf_pool->chunks[].
00185 
00186 The chains of free memory blocks (buf_pool->zip_free[]) are used by
00187 the buddy allocator (buf0buddy.c) to keep track of currently unused
00188 memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
00189 blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
00190 BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
00191 pool.  The buddy allocator is solely used for allocating control
00192 blocks for compressed pages (buf_page_t) and compressed page frames.
00193 
00194     Loading a file page
00195     -------------------
00196 
00197 First, a victim block for replacement has to be found in the
00198 buf_pool. It is taken from the free list or searched for from the
00199 end of the LRU-list. An exclusive lock is reserved for the frame,
00200 the io_fix field is set in the block fixing the block in buf_pool,
00201 and the io-operation for loading the page is queued. The io-handler thread
00202 releases the X-lock on the frame and resets the io_fix field
00203 when the io operation completes.
00204 
00205 A thread may request the above operation using the function
00206 buf_page_get(). It may then continue to request a lock on the frame.
00207 The lock is granted when the io-handler releases the x-lock.
00208 
00209     Read-ahead
00210     ----------
00211 
00212 The read-ahead mechanism is intended to be intelligent and
00213 isolated from the semantically higher levels of the database
00214 index management. From the higher level we only need the
00215 information if a file page has a natural successor or
00216 predecessor page. On the leaf level of a B-tree index,
00217 these are the next and previous pages in the natural
00218 order of the pages.
00219 
00220 Let us first explain the read-ahead mechanism when the leafs
00221 of a B-tree are scanned in an ascending or descending order.
00222 When a read page is the first time referenced in the buf_pool,
00223 the buffer manager checks if it is at the border of a so-called
00224 linear read-ahead area. The tablespace is divided into these
00225 areas of size 64 blocks, for example. So if the page is at the
00226 border of such an area, the read-ahead mechanism checks if
00227 all the other blocks in the area have been accessed in an
00228 ascending or descending order. If this is the case, the system
00229 looks at the natural successor or predecessor of the page,
00230 checks if that is at the border of another area, and in this case
00231 issues read-requests for all the pages in that area. Maybe
00232 we could relax the condition that all the pages in the area
00233 have to be accessed: if data is deleted from a table, there may
00234 appear holes of unused pages in the area.
00235 
00236 A different read-ahead mechanism is used when there appears
00237 to be a random access pattern to a file.
00238 If a new page is referenced in the buf_pool, and several pages
00239 of its random access area (for instance, 32 consecutive pages
00240 in a tablespace) have recently been referenced, we may predict
00241 that the whole area may be needed in the near future, and issue
00242 the read requests for the whole area.
00243 */
00244 
00245 #ifndef UNIV_HOTBACKUP
00246 
00247 static const int WAIT_FOR_READ  = 5000;
00249 static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
00250 
00252 UNIV_INTERN buf_pool_t* buf_pool_ptr;
00253 
00254 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
00255 static ulint  buf_dbg_counter = 0; 
00258 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
00259 #ifdef UNIV_DEBUG
00260 
00262 UNIV_INTERN ibool   buf_debug_prints = FALSE;
00263 #endif /* UNIV_DEBUG */
00264 
00265 #ifdef UNIV_PFS_RWLOCK
00266 /* Keys to register buffer block related rwlocks and mutexes with
00267 performance schema */
00268 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
00269 # ifdef UNIV_SYNC_DEBUG
00270 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
00271 # endif /* UNIV_SYNC_DEBUG */
00272 #endif /* UNIV_PFS_RWLOCK */
00273 
00274 #ifdef UNIV_PFS_MUTEX
00275 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
00276 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
00277 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
00278 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
00279 #endif /* UNIV_PFS_MUTEX */
00280 
00281 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
00282 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
00283 
00284 /* Buffer block mutexes and rwlocks can be registered
00285 in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
00286 is defined, register buffer block mutex and rwlock
00287 in one group after their initialization. */
00288 #  define PFS_GROUP_BUFFER_SYNC
00289 
00290 /* This define caps the number of mutexes/rwlocks can
00291 be registered with performance schema. Developers can
00292 modify this define if necessary. Please note, this would
00293 be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
00294 #  define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER  ULINT_MAX
00295 
00296 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
00297 #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
00298 
00300 struct buf_chunk_struct{
00301   ulint   mem_size; 
00302   ulint   size;   
00303   void*   mem;    
00305   buf_block_t*  blocks;   
00306 };
00307 #endif /* !UNIV_HOTBACKUP */
00308 
00309 /********************************************************************/
00313 UNIV_INTERN
00314 ib_uint64_t
00315 buf_pool_get_oldest_modification(void)
00316 /*==================================*/
00317 {
00318   ulint   i;
00319   buf_page_t* bpage;
00320   ib_uint64_t lsn = 0;
00321   ib_uint64_t oldest_lsn = 0;
00322 
00323   /* When we traverse all the flush lists we don't want another
00324   thread to add a dirty page to any flush list. */
00325   log_flush_order_mutex_enter();
00326 
00327   for (i = 0; i < srv_buf_pool_instances; i++) {
00328     buf_pool_t* buf_pool;
00329 
00330     buf_pool = buf_pool_from_array(i);
00331 
00332     buf_flush_list_mutex_enter(buf_pool);
00333 
00334     bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
00335 
00336     if (bpage != NULL) {
00337       ut_ad(bpage->in_flush_list);
00338       lsn = bpage->oldest_modification;
00339     }
00340 
00341     buf_flush_list_mutex_exit(buf_pool);
00342 
00343     if (!oldest_lsn || oldest_lsn > lsn) {
00344       oldest_lsn = lsn;
00345     }
00346   }
00347 
00348   log_flush_order_mutex_exit();
00349 
00350   /* The returned answer may be out of date: the flush_list can
00351   change after the mutex has been released. */
00352 
00353   return(oldest_lsn);
00354 }
00355 
00356 /********************************************************************/
00358 UNIV_INTERN
00359 void
00360 buf_get_total_list_len(
00361 /*===================*/
00362   ulint*    LRU_len,  
00363   ulint*    free_len, 
00364   ulint*    flush_list_len) 
00365 {
00366   ulint   i;
00367 
00368   *LRU_len = 0;
00369   *free_len = 0;
00370   *flush_list_len = 0;
00371 
00372   for (i = 0; i < srv_buf_pool_instances; i++) {
00373     buf_pool_t* buf_pool;
00374 
00375     buf_pool = buf_pool_from_array(i);
00376     *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
00377     *free_len += UT_LIST_GET_LEN(buf_pool->free);
00378     *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
00379   }
00380 }
00381 
00382 /********************************************************************/
00384 UNIV_INTERN
00385 void
00386 buf_get_total_stat(
00387 /*===============*/
00388   buf_pool_stat_t*  tot_stat) 
00389 {
00390   ulint     i;
00391 
00392   memset(tot_stat, 0, sizeof(*tot_stat));
00393 
00394   for (i = 0; i < srv_buf_pool_instances; i++) {
00395     buf_pool_stat_t*buf_stat;
00396     buf_pool_t* buf_pool;
00397 
00398     buf_pool = buf_pool_from_array(i);
00399 
00400     buf_stat = &buf_pool->stat;
00401     tot_stat->n_page_gets += buf_stat->n_page_gets;
00402     tot_stat->n_pages_read += buf_stat->n_pages_read;
00403     tot_stat->n_pages_written += buf_stat->n_pages_written;
00404     tot_stat->n_pages_created += buf_stat->n_pages_created;
00405     tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
00406     tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
00407     tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
00408 
00409     tot_stat->n_pages_not_made_young +=
00410       buf_stat->n_pages_not_made_young;
00411   }
00412 }
00413 
00414 /********************************************************************/
00417 UNIV_INTERN
00418 buf_block_t*
00419 buf_block_alloc(
00420 /*============*/
00421   buf_pool_t* buf_pool, 
00422   ulint   zip_size) 
00424 {
00425   buf_block_t*  block;
00426   ulint   index;
00427   static ulint  buf_pool_index;
00428 
00429   if (buf_pool == NULL) {
00430     /* We are allocating memory from any buffer pool, ensure
00431     we spread the grace on all buffer pool instances. */
00432     index = buf_pool_index++ % srv_buf_pool_instances;
00433     buf_pool = buf_pool_from_array(index);
00434   }
00435 
00436   block = buf_LRU_get_free_block(buf_pool, zip_size);
00437 
00438   buf_block_set_state(block, BUF_BLOCK_MEMORY);
00439 
00440   return(block);
00441 }
00442 
00443 /********************************************************************/
00448 UNIV_INTERN
00449 ulint
00450 buf_calc_page_new_checksum(
00451 /*=======================*/
00452   const byte* page) 
00453 {
00454   ulint checksum;
00455 
00456   /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
00457   ..._ARCH_LOG_NO, are written outside the buffer pool to the first
00458   pages of data files, we have to skip them in the page checksum
00459   calculation.
00460   We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
00461   checksum is stored, and also the last 8 bytes of page because
00462   there we store the old formula checksum. */
00463 
00464   checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
00465           FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
00466     + ut_fold_binary(page + FIL_PAGE_DATA,
00467          UNIV_PAGE_SIZE - FIL_PAGE_DATA
00468          - FIL_PAGE_END_LSN_OLD_CHKSUM);
00469   checksum = checksum & 0xFFFFFFFFUL;
00470 
00471   return(checksum);
00472 }
00473 
00474 /********************************************************************/
00482 UNIV_INTERN
00483 ulint
00484 buf_calc_page_old_checksum(
00485 /*=======================*/
00486   const byte* page) 
00487 {
00488   ulint checksum;
00489 
00490   checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
00491 
00492   checksum = checksum & 0xFFFFFFFFUL;
00493 
00494   return(checksum);
00495 }
00496 
00497 /********************************************************************/
00500 UNIV_INTERN
00501 ibool
00502 buf_page_is_corrupted(
00503 /*==================*/
00504   const byte* read_buf, 
00505   ulint   zip_size) 
00507 {
00508   ulint   checksum_field;
00509   ulint   old_checksum_field;
00510 
00511   if (UNIV_LIKELY(!zip_size)
00512       && memcmp(read_buf + FIL_PAGE_LSN + 4,
00513           read_buf + UNIV_PAGE_SIZE
00514           - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
00515 
00516     /* Stored log sequence numbers at the start and the end
00517     of page do not match */
00518 
00519     return(TRUE);
00520   }
00521 
00522 #ifndef UNIV_HOTBACKUP
00523   if (recv_lsn_checks_on) {
00524     ib_uint64_t current_lsn;
00525 
00526     if (log_peek_lsn(&current_lsn)
00527         && UNIV_UNLIKELY
00528         (current_lsn
00529          < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
00530       ut_print_timestamp(stderr);
00531 
00532                         drizzled::errmsg_printf(drizzled::error::INFO,
00533                                                 "InnoDB: Error: page %lu log sequence number %"PRIu64". "
00534                                                 "InnoDB: is in the future! Current system log sequence number %"PRIu64". "
00535                                                 "Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
00536                                                 " " REFMAN "forcing-recovery.html for more information. ",
00537                                                 (ulong) mach_read_from_4(read_buf
00538                                                                          + FIL_PAGE_OFFSET),
00539                                                 mach_read_from_8(read_buf + FIL_PAGE_LSN),
00540                                                 current_lsn);
00541     }
00542   }
00543 #endif
00544 
00545   /* If we use checksums validation, make additional check before
00546   returning TRUE to ensure that the checksum is not equal to
00547   BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
00548   disabled. Otherwise, skip checksum calculation and return FALSE */
00549 
00550   if (UNIV_LIKELY(srv_use_checksums)) {
00551     checksum_field = mach_read_from_4(read_buf
00552               + FIL_PAGE_SPACE_OR_CHKSUM);
00553 
00554     if (UNIV_UNLIKELY(zip_size)) {
00555       return(checksum_field != BUF_NO_CHECKSUM_MAGIC
00556              && checksum_field
00557              != page_zip_calc_checksum(read_buf, zip_size));
00558     }
00559 
00560     old_checksum_field = mach_read_from_4(
00561       read_buf + UNIV_PAGE_SIZE
00562       - FIL_PAGE_END_LSN_OLD_CHKSUM);
00563 
00564     /* There are 2 valid formulas for old_checksum_field:
00565 
00566     1. Very old versions of InnoDB only stored 8 byte lsn to the
00567     start and the end of the page.
00568 
00569     2. Newer InnoDB versions store the old formula checksum
00570     there. */
00571 
00572     if (old_checksum_field != mach_read_from_4(read_buf
00573                  + FIL_PAGE_LSN)
00574         && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
00575         && old_checksum_field
00576         != buf_calc_page_old_checksum(read_buf)) {
00577 
00578       return(TRUE);
00579     }
00580 
00581     /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
00582     (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
00583 
00584     if (checksum_field != 0
00585         && checksum_field != BUF_NO_CHECKSUM_MAGIC
00586         && checksum_field
00587         != buf_calc_page_new_checksum(read_buf)) {
00588 
00589       return(TRUE);
00590     }
00591   }
00592 
00593   return(FALSE);
00594 }
00595 
00596 /********************************************************************/
00598 UNIV_INTERN
00599 void
00600 buf_page_print(
00601 /*===========*/
00602   const byte* read_buf, 
00603   ulint   zip_size) 
00605 {
00606 #ifndef UNIV_HOTBACKUP
00607   dict_index_t* index;
00608 #endif /* !UNIV_HOTBACKUP */
00609   ulint   checksum;
00610   ulint   old_checksum;
00611   ulint   size  = zip_size;
00612 
00613   if (!size) {
00614     size = UNIV_PAGE_SIZE;
00615   }
00616 
00617   ut_print_timestamp(stderr);
00618   fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
00619     (ulong) size);
00620   ut_print_buf(stderr, read_buf, size);
00621   fputs("\nInnoDB: End of page dump\n", stderr);
00622 
00623   if (zip_size) {
00624     /* Print compressed page. */
00625 
00626     switch (fil_page_get_type(read_buf)) {
00627     case FIL_PAGE_TYPE_ZBLOB:
00628     case FIL_PAGE_TYPE_ZBLOB2:
00629       checksum = srv_use_checksums
00630         ? page_zip_calc_checksum(read_buf, zip_size)
00631         : BUF_NO_CHECKSUM_MAGIC;
00632       ut_print_timestamp(stderr);
00633       fprintf(stderr,
00634         "  InnoDB: Compressed BLOB page"
00635         " checksum %lu, stored %lu\n"
00636         "InnoDB: Page lsn %lu %lu\n"
00637         "InnoDB: Page number (if stored"
00638         " to page already) %lu,\n"
00639         "InnoDB: space id (if stored"
00640         " to page already) %lu\n",
00641         (ulong) checksum,
00642         (ulong) mach_read_from_4(
00643           read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
00644         (ulong) mach_read_from_4(
00645           read_buf + FIL_PAGE_LSN),
00646         (ulong) mach_read_from_4(
00647           read_buf + (FIL_PAGE_LSN + 4)),
00648         (ulong) mach_read_from_4(
00649           read_buf + FIL_PAGE_OFFSET),
00650         (ulong) mach_read_from_4(
00651           read_buf
00652           + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
00653       return;
00654     default:
00655       ut_print_timestamp(stderr);
00656       fprintf(stderr,
00657         "  InnoDB: unknown page type %lu,"
00658         " assuming FIL_PAGE_INDEX\n",
00659         fil_page_get_type(read_buf));
00660       /* fall through */
00661     case FIL_PAGE_INDEX:
00662       checksum = srv_use_checksums
00663         ? page_zip_calc_checksum(read_buf, zip_size)
00664         : BUF_NO_CHECKSUM_MAGIC;
00665 
00666       ut_print_timestamp(stderr);
00667       fprintf(stderr,
00668         "  InnoDB: Compressed page checksum %lu,"
00669         " stored %lu\n"
00670         "InnoDB: Page lsn %lu %lu\n"
00671         "InnoDB: Page number (if stored"
00672         " to page already) %lu,\n"
00673         "InnoDB: space id (if stored"
00674         " to page already) %lu\n",
00675         (ulong) checksum,
00676         (ulong) mach_read_from_4(
00677           read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
00678         (ulong) mach_read_from_4(
00679           read_buf + FIL_PAGE_LSN),
00680         (ulong) mach_read_from_4(
00681           read_buf + (FIL_PAGE_LSN + 4)),
00682         (ulong) mach_read_from_4(
00683           read_buf + FIL_PAGE_OFFSET),
00684         (ulong) mach_read_from_4(
00685           read_buf
00686           + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
00687       return;
00688     case FIL_PAGE_TYPE_XDES:
00689       /* This is an uncompressed page. */
00690       break;
00691     }
00692   }
00693 
00694   checksum = srv_use_checksums
00695     ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
00696   old_checksum = srv_use_checksums
00697     ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
00698 
00699   ut_print_timestamp(stderr);
00700   fprintf(stderr,
00701     "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
00702     " checksum %lu\n"
00703     "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
00704     " stored checksum %lu\n"
00705     "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
00706     " at page end %lu\n"
00707     "InnoDB: Page number (if stored to page already) %lu,\n"
00708     "InnoDB: space id (if created with >= MySQL-4.1.1"
00709     " and stored already) %lu\n",
00710     (ulong) checksum, (ulong) old_checksum,
00711     (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
00712     (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
00713            - FIL_PAGE_END_LSN_OLD_CHKSUM),
00714     (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
00715     (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
00716     (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
00717            - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
00718     (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
00719     (ulong) mach_read_from_4(read_buf
00720            + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
00721 
00722 #ifndef UNIV_HOTBACKUP
00723   if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
00724       == TRX_UNDO_INSERT) {
00725     fprintf(stderr,
00726       "InnoDB: Page may be an insert undo log page\n");
00727   } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
00728             + TRX_UNDO_PAGE_TYPE)
00729        == TRX_UNDO_UPDATE) {
00730     fprintf(stderr,
00731       "InnoDB: Page may be an update undo log page\n");
00732   }
00733 #endif /* !UNIV_HOTBACKUP */
00734 
00735   switch (fil_page_get_type(read_buf)) {
00736     index_id_t  index_id;
00737   case FIL_PAGE_INDEX:
00738     index_id = btr_page_get_index_id(read_buf);
00739     fprintf(stderr,
00740       "InnoDB: Page may be an index page where"
00741       " index id is %llu\n",
00742       (ullint) index_id);
00743 #ifndef UNIV_HOTBACKUP
00744     index = dict_index_find_on_id_low(index_id);
00745     if (index) {
00746       fputs("InnoDB: (", stderr);
00747       dict_index_name_print(stderr, NULL, index);
00748       fputs(")\n", stderr);
00749     }
00750 #endif /* !UNIV_HOTBACKUP */
00751     break;
00752   case FIL_PAGE_INODE:
00753     fputs("InnoDB: Page may be an 'inode' page\n", stderr);
00754     break;
00755   case FIL_PAGE_IBUF_FREE_LIST:
00756     fputs("InnoDB: Page may be an insert buffer free list page\n",
00757           stderr);
00758     break;
00759   case FIL_PAGE_TYPE_ALLOCATED:
00760     fputs("InnoDB: Page may be a freshly allocated page\n",
00761           stderr);
00762     break;
00763   case FIL_PAGE_IBUF_BITMAP:
00764     fputs("InnoDB: Page may be an insert buffer bitmap page\n",
00765           stderr);
00766     break;
00767   case FIL_PAGE_TYPE_SYS:
00768     fputs("InnoDB: Page may be a system page\n",
00769           stderr);
00770     break;
00771   case FIL_PAGE_TYPE_TRX_SYS:
00772     fputs("InnoDB: Page may be a transaction system page\n",
00773           stderr);
00774     break;
00775   case FIL_PAGE_TYPE_FSP_HDR:
00776     fputs("InnoDB: Page may be a file space header page\n",
00777           stderr);
00778     break;
00779   case FIL_PAGE_TYPE_XDES:
00780     fputs("InnoDB: Page may be an extent descriptor page\n",
00781           stderr);
00782     break;
00783   case FIL_PAGE_TYPE_BLOB:
00784     fputs("InnoDB: Page may be a BLOB page\n",
00785           stderr);
00786     break;
00787   case FIL_PAGE_TYPE_ZBLOB:
00788   case FIL_PAGE_TYPE_ZBLOB2:
00789     fputs("InnoDB: Page may be a compressed BLOB page\n",
00790           stderr);
00791     break;
00792   }
00793 }
00794 
00795 #ifndef UNIV_HOTBACKUP
00796 
00797 # ifdef PFS_GROUP_BUFFER_SYNC
00798 /********************************************************************/
00804 static
00805 void
00806 pfs_register_buffer_block(
00807 /*======================*/
00808   buf_chunk_t*  chunk)    
00809 {
00810   ulint   i;
00811   ulint   num_to_register;
00812   buf_block_t*    block;
00813 
00814   block = chunk->blocks;
00815 
00816   num_to_register = ut_min(chunk->size,
00817          PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
00818 
00819   for (i = 0; i < num_to_register; i++) {
00820     mutex_t*  mutex;
00821     rw_lock_t*  rwlock;
00822 
00823 #  ifdef UNIV_PFS_MUTEX
00824     mutex = &block->mutex;
00825     ut_a(!mutex->pfs_psi);
00826     mutex->pfs_psi = (PSI_server)
00827       ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
00828       : NULL;
00829 #  endif /* UNIV_PFS_MUTEX */
00830 
00831 #  ifdef UNIV_PFS_RWLOCK
00832     rwlock = &block->lock;
00833     ut_a(!rwlock->pfs_psi);
00834     rwlock->pfs_psi = (PSI_server)
00835       ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
00836       : NULL;
00837 #  endif /* UNIV_PFS_RWLOCK */
00838     block++;
00839   }
00840 }
00841 # endif /* PFS_GROUP_BUFFER_SYNC */
00842 
00843 /********************************************************************/
00845 static
00846 void
00847 buf_block_init(
00848 /*===========*/
00849   buf_pool_t* buf_pool, 
00850   buf_block_t*  block,    
00851   byte*   frame)    
00852 {
00853   UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
00854 
00855   block->frame = frame;
00856 
00857   block->page.buf_pool_index = buf_pool_index(buf_pool);
00858   block->page.state = BUF_BLOCK_NOT_USED;
00859   block->page.buf_fix_count = 0;
00860   block->page.io_fix = BUF_IO_NONE;
00861 
00862   block->modify_clock = 0;
00863 
00864 #ifdef UNIV_DEBUG_FILE_ACCESSES
00865   block->page.file_page_was_freed = FALSE;
00866 #endif /* UNIV_DEBUG_FILE_ACCESSES */
00867 
00868   block->check_index_page_at_flush = FALSE;
00869   block->index = NULL;
00870 
00871   block->is_hashed = FALSE;
00872 
00873 #ifdef UNIV_DEBUG
00874   block->page.in_page_hash = FALSE;
00875   block->page.in_zip_hash = FALSE;
00876   block->page.in_flush_list = FALSE;
00877   block->page.in_free_list = FALSE;
00878   block->page.in_LRU_list = FALSE;
00879   block->in_unzip_LRU_list = FALSE;
00880 #endif /* UNIV_DEBUG */
00881 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
00882   block->n_pointers = 0;
00883 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
00884   page_zip_des_init(&block->page.zip);
00885 
00886 #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
00887   /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
00888   of buffer block mutex/rwlock with performance schema. If
00889   PFS_GROUP_BUFFER_SYNC is defined, skip the registration
00890   since buffer block mutex/rwlock will be registered later in
00891   pfs_register_buffer_block() */
00892 
00893   mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
00894   rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
00895 #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
00896   mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
00897   rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
00898 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
00899 
00900   ut_ad(rw_lock_validate(&(block->lock)));
00901 
00902 #ifdef UNIV_SYNC_DEBUG
00903   rw_lock_create(buf_block_debug_latch_key,
00904            &block->debug_latch, SYNC_NO_ORDER_CHECK);
00905 #endif /* UNIV_SYNC_DEBUG */
00906 }
00907 
00908 /********************************************************************/
00911 static
00912 buf_chunk_t*
00913 buf_chunk_init(
00914 /*===========*/
00915   buf_pool_t* buf_pool, 
00916   buf_chunk_t*  chunk,    
00917   ulint   mem_size) 
00918 {
00919   buf_block_t*  block;
00920   byte*   frame;
00921   ulint   i;
00922 
00923   /* Round down to a multiple of page size,
00924   although it already should be. */
00925   mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
00926   /* Reserve space for the block descriptors. */
00927   mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
00928           + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
00929 
00930   chunk->mem_size = mem_size;
00931   chunk->mem = os_mem_alloc_large(&chunk->mem_size);
00932 
00933   if (UNIV_UNLIKELY(chunk->mem == NULL)) {
00934 
00935     return(NULL);
00936   }
00937 
00938   /* Allocate the block descriptors from
00939   the start of the memory block. */
00940   chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
00941 
00942   /* Align a pointer to the first frame.  Note that when
00943   os_large_page_size is smaller than UNIV_PAGE_SIZE,
00944   we may allocate one fewer block than requested.  When
00945   it is bigger, we may allocate more blocks than requested. */
00946 
00947   frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
00948   chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
00949     - (frame != chunk->mem);
00950 
00951   /* Subtract the space needed for block descriptors. */
00952   {
00953     ulint size = chunk->size;
00954 
00955     while (frame < (byte*) (chunk->blocks + size)) {
00956       frame += UNIV_PAGE_SIZE;
00957       size--;
00958     }
00959 
00960     chunk->size = size;
00961   }
00962 
00963   /* Init block structs and assign frames for them. Then we
00964   assign the frames to the first blocks (we already mapped the
00965   memory above). */
00966 
00967   block = chunk->blocks;
00968 
00969   for (i = chunk->size; i--; ) {
00970 
00971     buf_block_init(buf_pool, block, frame);
00972 
00973 #ifdef HAVE_VALGRIND
00974     /* Wipe contents of frame to eliminate a Purify warning */
00975     memset(block->frame, '\0', UNIV_PAGE_SIZE);
00976 #endif
00977     /* Add the block to the free list */
00978     UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
00979 
00980     ut_d(block->page.in_free_list = TRUE);
00981     ut_ad(buf_pool_from_block(block) == buf_pool);
00982 
00983     block++;
00984     frame += UNIV_PAGE_SIZE;
00985   }
00986 
00987 #ifdef PFS_GROUP_BUFFER_SYNC
00988   pfs_register_buffer_block(chunk);
00989 #endif
00990   return(chunk);
00991 }
00992 
00993 #ifdef UNIV_DEBUG
00994 /*********************************************************************/
00998 static
00999 buf_block_t*
01000 buf_chunk_contains_zip(
01001 /*===================*/
01002   buf_chunk_t*  chunk,  
01003   const void* data) 
01004 {
01005   buf_block_t*  block;
01006   ulint   i;
01007 
01008   block = chunk->blocks;
01009 
01010   for (i = chunk->size; i--; block++) {
01011     if (block->page.zip.data == data) {
01012 
01013       return(block);
01014     }
01015   }
01016 
01017   return(NULL);
01018 }
01019 
01020 /*********************************************************************/
01024 UNIV_INTERN
01025 buf_block_t*
01026 buf_pool_contains_zip(
01027 /*==================*/
01028   buf_pool_t* buf_pool, 
01029   const void* data)   
01030 {
01031   ulint   n;
01032   buf_chunk_t*  chunk = buf_pool->chunks;
01033 
01034   ut_ad(buf_pool);
01035   ut_ad(buf_pool_mutex_own(buf_pool));
01036   for (n = buf_pool->n_chunks; n--; chunk++) {
01037 
01038     buf_block_t* block = buf_chunk_contains_zip(chunk, data);
01039 
01040     if (block) {
01041       return(block);
01042     }
01043   }
01044 
01045   return(NULL);
01046 }
01047 #endif /* UNIV_DEBUG */
01048 
01049 /*********************************************************************/
01052 static
01053 const buf_block_t*
01054 buf_chunk_not_freed(
01055 /*================*/
01056   buf_chunk_t*  chunk)  
01057 {
01058   buf_block_t*  block;
01059   ulint   i;
01060 
01061   block = chunk->blocks;
01062 
01063   for (i = chunk->size; i--; block++) {
01064     ibool ready;
01065 
01066     switch (buf_block_get_state(block)) {
01067     case BUF_BLOCK_ZIP_FREE:
01068     case BUF_BLOCK_ZIP_PAGE:
01069     case BUF_BLOCK_ZIP_DIRTY:
01070       /* The uncompressed buffer pool should never
01071       contain compressed block descriptors. */
01072       ut_error;
01073       break;
01074     case BUF_BLOCK_NOT_USED:
01075     case BUF_BLOCK_READY_FOR_USE:
01076     case BUF_BLOCK_MEMORY:
01077     case BUF_BLOCK_REMOVE_HASH:
01078       /* Skip blocks that are not being used for
01079       file pages. */
01080       break;
01081     case BUF_BLOCK_FILE_PAGE:
01082       mutex_enter(&block->mutex);
01083       ready = buf_flush_ready_for_replace(&block->page);
01084       mutex_exit(&block->mutex);
01085 
01086       if (!ready) {
01087 
01088         return(block);
01089       }
01090 
01091       break;
01092     }
01093   }
01094 
01095   return(NULL);
01096 }
01097 
01098 /*********************************************************************/
01101 static
01102 ibool
01103 buf_chunk_all_free(
01104 /*===============*/
01105   const buf_chunk_t*  chunk)  
01106 {
01107   const buf_block_t*  block;
01108   ulint     i;
01109 
01110   block = chunk->blocks;
01111 
01112   for (i = chunk->size; i--; block++) {
01113 
01114     if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
01115 
01116       return(FALSE);
01117     }
01118   }
01119 
01120   return(TRUE);
01121 }
01122 
01123 /********************************************************************/
01125 static
01126 void
01127 buf_chunk_free(
01128 /*===========*/
01129   buf_pool_t* buf_pool, 
01130   buf_chunk_t*  chunk)    
01131 {
01132   buf_block_t*    block;
01133   const buf_block_t*  block_end;
01134 
01135   ut_ad(buf_pool_mutex_own(buf_pool));
01136 
01137   block_end = chunk->blocks + chunk->size;
01138 
01139   for (block = chunk->blocks; block < block_end; block++) {
01140     ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
01141     ut_a(!block->page.zip.data);
01142 
01143     ut_ad(!block->page.in_LRU_list);
01144     ut_ad(!block->in_unzip_LRU_list);
01145     ut_ad(!block->page.in_flush_list);
01146     /* Remove the block from the free list. */
01147     ut_ad(block->page.in_free_list);
01148     UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
01149 
01150     /* Free the latches. */
01151     mutex_free(&block->mutex);
01152     rw_lock_free(&block->lock);
01153 #ifdef UNIV_SYNC_DEBUG
01154     rw_lock_free(&block->debug_latch);
01155 #endif /* UNIV_SYNC_DEBUG */
01156     UNIV_MEM_UNDESC(block);
01157   }
01158 
01159   os_mem_free_large(chunk->mem, chunk->mem_size);
01160 }
01161 
01162 /********************************************************************/
01164 static
01165 void
01166 buf_pool_set_sizes(void)
01167 /*====================*/
01168 {
01169   ulint i;
01170   ulint curr_size = 0;
01171 
01172   buf_pool_mutex_enter_all();
01173 
01174   for (i = 0; i < srv_buf_pool_instances; i++) {
01175     buf_pool_t* buf_pool;
01176 
01177     buf_pool = buf_pool_from_array(i);
01178     curr_size += buf_pool->curr_pool_size;
01179   }
01180 
01181   srv_buf_pool_curr_size = curr_size;
01182   srv_buf_pool_old_size = srv_buf_pool_size;
01183 
01184   buf_pool_mutex_exit_all();
01185 }
01186 
01187 /********************************************************************/
01190 static
01191 ulint
01192 buf_pool_init_instance(
01193 /*===================*/
01194   buf_pool_t* buf_pool, 
01195   ulint   buf_pool_size,  
01196   ulint   instance_no)  
01197 {
01198   ulint   i;
01199   buf_chunk_t*  chunk;
01200 
01201   /* 1. Initialize general fields
01202   ------------------------------- */
01203   mutex_create(buf_pool_mutex_key,
01204          &buf_pool->mutex, SYNC_BUF_POOL);
01205   mutex_create(buf_pool_zip_mutex_key,
01206          &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
01207 
01208   buf_pool_mutex_enter(buf_pool);
01209 
01210   if (buf_pool_size > 0) {
01211     buf_pool->n_chunks = 1;
01212                 void *chunk_ptr= mem_zalloc((sizeof *chunk));
01213     buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
01214 
01215     UT_LIST_INIT(buf_pool->free);
01216 
01217     if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
01218       mem_free(chunk);
01219       mem_free(buf_pool);
01220 
01221       buf_pool_mutex_exit(buf_pool);
01222 
01223       return(DB_ERROR);
01224     }
01225 
01226     buf_pool->instance_no = instance_no;
01227     buf_pool->old_pool_size = buf_pool_size;
01228     buf_pool->curr_size = chunk->size;
01229     buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
01230 
01231     buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
01232     buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
01233     
01234     buf_pool->last_printout_time = ut_time();
01235   }
01236   /* 2. Initialize flushing fields
01237   -------------------------------- */
01238 
01239   mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
01240          SYNC_BUF_FLUSH_LIST);
01241 
01242   for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
01243     buf_pool->no_flush[i] = os_event_create(NULL);
01244   }
01245 
01246   /* 3. Initialize LRU fields
01247   --------------------------- */
01248 
01249   /* All fields are initialized by mem_zalloc(). */
01250 
01251   buf_pool_mutex_exit(buf_pool);
01252 
01253   return(DB_SUCCESS);
01254 }
01255 
01256 /********************************************************************/
01258 static
01259 void
01260 buf_pool_free_instance(
01261 /*===================*/
01262   buf_pool_t* buf_pool) /* in,own: buffer pool instance
01263           to free */
01264 {
01265   buf_chunk_t*  chunk;
01266   buf_chunk_t*  chunks;
01267 
01268   chunks = buf_pool->chunks;
01269   chunk = chunks + buf_pool->n_chunks;
01270 
01271   while (--chunk >= chunks) {
01272     /* Bypass the checks of buf_chunk_free(), since they
01273     would fail at shutdown. */
01274     os_mem_free_large(chunk->mem, chunk->mem_size);
01275   }
01276 
01277   mem_free(buf_pool->chunks);
01278   hash_table_free(buf_pool->page_hash);
01279   hash_table_free(buf_pool->zip_hash);
01280 }
01281 
01282 /********************************************************************/
01285 UNIV_INTERN
01286 ulint
01287 buf_pool_init(
01288 /*==========*/
01289   ulint total_size, 
01290   ulint n_instances)  
01291 {
01292   ulint   i;
01293   const ulint size  = total_size / n_instances;
01294 
01295   ut_ad(n_instances > 0);
01296   ut_ad(n_instances <= MAX_BUFFER_POOLS);
01297   ut_ad(n_instances == srv_buf_pool_instances);
01298 
01299   /* We create an extra buffer pool instance, this instance is used
01300   for flushing the flush lists, to keep track of n_flush for all
01301   the buffer pools and also used as a waiting object during flushing. */
01302         void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
01303   buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
01304 
01305   for (i = 0; i < n_instances; i++) {
01306     buf_pool_t* ptr = &buf_pool_ptr[i];
01307 
01308     if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
01309 
01310       /* Free all the instances created so far. */
01311       buf_pool_free(i);
01312 
01313       return(DB_ERROR);
01314     }
01315   }
01316 
01317   buf_pool_set_sizes();
01318   buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
01319 
01320   btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
01321 
01322   return(DB_SUCCESS);
01323 }
01324 
01325 /********************************************************************/
01328 UNIV_INTERN
01329 void
01330 buf_pool_free(
01331 /*==========*/
01332   ulint n_instances)  
01333 {
01334   ulint i;
01335 
01336   for (i = 0; i < n_instances; i++) {
01337     buf_pool_free_instance(buf_pool_from_array(i));
01338   }
01339 
01340   mem_free(buf_pool_ptr);
01341   buf_pool_ptr = NULL;
01342 }
01343 
01344 /********************************************************************/
01346 static
01347 void
01348 buf_pool_drop_hash_index_instance(
01349 /*==============================*/
01350   buf_pool_t* buf_pool,   
01351   ibool*    released_search_latch)  
01354 {
01355   buf_chunk_t*  chunks  = buf_pool->chunks;
01356   buf_chunk_t*  chunk = chunks + buf_pool->n_chunks;
01357 
01358   while (--chunk >= chunks) {
01359     ulint   i;
01360     buf_block_t*  block = chunk->blocks;
01361 
01362     for (i = chunk->size; i--; block++) {
01363       /* block->is_hashed cannot be modified
01364       when we have an x-latch on btr_search_latch;
01365       see the comment in buf0buf.h */
01366       
01367       if (!block->is_hashed) {
01368         continue;
01369       }
01370       
01371       /* To follow the latching order, we
01372       have to release btr_search_latch
01373       before acquiring block->latch. */
01374       rw_lock_x_unlock(&btr_search_latch);
01375       /* When we release the search latch,
01376       we must rescan all blocks, because
01377       some may become hashed again. */
01378       *released_search_latch = TRUE;
01379       
01380       rw_lock_x_lock(&block->lock);
01381       
01382       /* This should be guaranteed by the
01383       callers, which will be holding
01384       btr_search_enabled_mutex. */
01385       ut_ad(!btr_search_enabled);
01386       
01387       /* Because we did not buffer-fix the
01388       block by calling buf_block_get_gen(),
01389       it is possible that the block has been
01390       allocated for some other use after
01391       btr_search_latch was released above.
01392       We do not care which file page the
01393       block is mapped to.  All we want to do
01394       is to drop any hash entries referring
01395       to the page. */
01396       
01397       /* It is possible that
01398       block->page.state != BUF_FILE_PAGE.
01399       Even that does not matter, because
01400       btr_search_drop_page_hash_index() will
01401       check block->is_hashed before doing
01402       anything.  block->is_hashed can only
01403       be set on uncompressed file pages. */
01404       
01405       btr_search_drop_page_hash_index(block);
01406       
01407       rw_lock_x_unlock(&block->lock);
01408       
01409       rw_lock_x_lock(&btr_search_latch);
01410       
01411       ut_ad(!btr_search_enabled);
01412     }
01413   }
01414 }
01415  
01416 /********************************************************************/
01420 UNIV_INTERN
01421 void
01422 buf_pool_drop_hash_index(void)
01423 /*==========================*/
01424 {
01425   ibool   released_search_latch;
01426 
01427 #ifdef UNIV_SYNC_DEBUG
01428   ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
01429 #endif /* UNIV_SYNC_DEBUG */
01430   ut_ad(!btr_search_enabled);
01431 
01432   do {
01433     ulint i;
01434 
01435     released_search_latch = FALSE;
01436 
01437     for (i = 0; i < srv_buf_pool_instances; i++) {
01438       buf_pool_t* buf_pool;
01439 
01440       buf_pool = buf_pool_from_array(i);
01441 
01442       buf_pool_drop_hash_index_instance(
01443         buf_pool, &released_search_latch);
01444     }
01445 
01446   } while (released_search_latch);
01447 }
01448 
01449 /********************************************************************/
01453 UNIV_INTERN
01454 void
01455 buf_relocate(
01456 /*=========*/
01457   buf_page_t* bpage,  
01460   buf_page_t* dpage)  
01461 {
01462   buf_page_t* b;
01463   ulint   fold;
01464   buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
01465 
01466   ut_ad(buf_pool_mutex_own(buf_pool));
01467   ut_ad(mutex_own(buf_page_get_mutex(bpage)));
01468   ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
01469   ut_a(bpage->buf_fix_count == 0);
01470   ut_ad(bpage->in_LRU_list);
01471   ut_ad(!bpage->in_zip_hash);
01472   ut_ad(bpage->in_page_hash);
01473   ut_ad(bpage == buf_page_hash_get(buf_pool,
01474                  bpage->space, bpage->offset));
01475   ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
01476 #ifdef UNIV_DEBUG
01477   switch (buf_page_get_state(bpage)) {
01478   case BUF_BLOCK_ZIP_FREE:
01479   case BUF_BLOCK_NOT_USED:
01480   case BUF_BLOCK_READY_FOR_USE:
01481   case BUF_BLOCK_FILE_PAGE:
01482   case BUF_BLOCK_MEMORY:
01483   case BUF_BLOCK_REMOVE_HASH:
01484     ut_error;
01485   case BUF_BLOCK_ZIP_DIRTY:
01486   case BUF_BLOCK_ZIP_PAGE:
01487     break;
01488   }
01489 #endif /* UNIV_DEBUG */
01490 
01491   memcpy(dpage, bpage, sizeof *dpage);
01492 
01493   ut_d(bpage->in_LRU_list = FALSE);
01494   ut_d(bpage->in_page_hash = FALSE);
01495 
01496   /* relocate buf_pool->LRU */
01497   b = UT_LIST_GET_PREV(LRU, bpage);
01498   UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
01499 
01500   if (b) {
01501     UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
01502   } else {
01503     UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
01504   }
01505 
01506   if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
01507     buf_pool->LRU_old = dpage;
01508 #ifdef UNIV_LRU_DEBUG
01509     /* buf_pool->LRU_old must be the first item in the LRU list
01510     whose "old" flag is set. */
01511     ut_a(buf_pool->LRU_old->old);
01512     ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
01513          || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
01514     ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
01515          || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
01516   } else {
01517     /* Check that the "old" flag is consistent in
01518     the block and its neighbours. */
01519     buf_page_set_old(dpage, buf_page_is_old(dpage));
01520 #endif /* UNIV_LRU_DEBUG */
01521   }
01522 
01523   ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
01524             ut_ad(ut_list_node_313->in_LRU_list)));
01525 
01526   /* relocate buf_pool->page_hash */
01527   fold = buf_page_address_fold(bpage->space, bpage->offset);
01528 
01529   HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
01530   HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
01531 }
01532 
01533 /********************************************************************/
01535 static
01536 void
01537 buf_pool_shrink_instance(
01538 /*=====================*/
01539   buf_pool_t* buf_pool, 
01540   ulint   chunk_size) 
01541 {
01542   buf_chunk_t*  chunks;
01543   buf_chunk_t*  chunk;
01544   ulint   max_size;
01545   ulint   max_free_size;
01546   buf_chunk_t*  max_chunk;
01547   buf_chunk_t*  max_free_chunk;
01548 
01549   ut_ad(!buf_pool_mutex_own(buf_pool));
01550 
01551 try_again:
01552   btr_search_disable(); /* Empty the adaptive hash index again */
01553   buf_pool_mutex_enter(buf_pool);
01554 
01555 shrink_again:
01556   if (buf_pool->n_chunks <= 1) {
01557 
01558     /* Cannot shrink if there is only one chunk */
01559     goto func_done;
01560   }
01561 
01562   /* Search for the largest free chunk
01563   not larger than the size difference */
01564   chunks = buf_pool->chunks;
01565   chunk = chunks + buf_pool->n_chunks;
01566   max_size = max_free_size = 0;
01567   max_chunk = max_free_chunk = NULL;
01568 
01569   while (--chunk >= chunks) {
01570     if (chunk->size <= chunk_size
01571         && chunk->size > max_free_size) {
01572       if (chunk->size > max_size) {
01573         max_size = chunk->size;
01574         max_chunk = chunk;
01575       }
01576 
01577       if (buf_chunk_all_free(chunk)) {
01578         max_free_size = chunk->size;
01579         max_free_chunk = chunk;
01580       }
01581     }
01582   }
01583 
01584   if (!max_free_size) {
01585 
01586     ulint   dirty = 0;
01587     ulint   nonfree = 0;
01588     buf_block_t*  block;
01589     buf_block_t*  bend;
01590 
01591     /* Cannot shrink: try again later
01592     (do not assign srv_buf_pool_old_size) */
01593     if (!max_chunk) {
01594 
01595       goto func_exit;
01596     }
01597 
01598     block = max_chunk->blocks;
01599     bend = block + max_chunk->size;
01600 
01601     /* Move the blocks of chunk to the end of the
01602     LRU list and try to flush them. */
01603     for (; block < bend; block++) {
01604       switch (buf_block_get_state(block)) {
01605       case BUF_BLOCK_NOT_USED:
01606         continue;
01607       case BUF_BLOCK_FILE_PAGE:
01608         break;
01609       default:
01610         nonfree++;
01611         continue;
01612       }
01613 
01614       mutex_enter(&block->mutex);
01615       /* The following calls will temporarily
01616       release block->mutex and buf_pool->mutex.
01617       Therefore, we have to always retry,
01618       even if !dirty && !nonfree. */
01619 
01620       if (!buf_flush_ready_for_replace(&block->page)) {
01621 
01622         buf_LRU_make_block_old(&block->page);
01623         dirty++;
01624       } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
01625            != BUF_LRU_FREED) {
01626         nonfree++;
01627       }
01628 
01629       mutex_exit(&block->mutex);
01630     }
01631 
01632     buf_pool_mutex_exit(buf_pool);
01633 
01634     /* Request for a flush of the chunk if it helps.
01635     Do not flush if there are non-free blocks, since
01636     flushing will not make the chunk freeable. */
01637     if (nonfree) {
01638       /* Avoid busy-waiting. */
01639       os_thread_sleep(100000);
01640     } else if (dirty
01641          && buf_flush_LRU(buf_pool, dirty)
01642             == ULINT_UNDEFINED) {
01643 
01644       buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
01645     }
01646 
01647     goto try_again;
01648   }
01649 
01650   max_size = max_free_size;
01651   max_chunk = max_free_chunk;
01652 
01653   buf_pool->old_pool_size = buf_pool->curr_pool_size;
01654 
01655   /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
01656   chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
01657   memcpy(chunks, buf_pool->chunks,
01658          (max_chunk - buf_pool->chunks) * sizeof *chunks);
01659   memcpy(chunks + (max_chunk - buf_pool->chunks),
01660          max_chunk + 1,
01661          buf_pool->chunks + buf_pool->n_chunks
01662          - (max_chunk + 1));
01663   ut_a(buf_pool->curr_size > max_chunk->size);
01664   buf_pool->curr_size -= max_chunk->size;
01665   buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
01666   chunk_size -= max_chunk->size;
01667   buf_chunk_free(buf_pool, max_chunk);
01668   mem_free(buf_pool->chunks);
01669   buf_pool->chunks = chunks;
01670   buf_pool->n_chunks--;
01671 
01672   /* Allow a slack of one megabyte. */
01673   if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
01674 
01675     goto shrink_again;
01676   }
01677   goto func_exit;
01678 
01679 func_done:
01680   buf_pool->old_pool_size = buf_pool->curr_pool_size;
01681 func_exit:
01682   buf_pool_mutex_exit(buf_pool);
01683   btr_search_enable();
01684 }
01685 
01686 /********************************************************************/
01688 static
01689 void
01690 buf_pool_shrink(
01691 /*============*/
01692   ulint chunk_size) 
01693 {
01694   ulint i;
01695 
01696   for (i = 0; i < srv_buf_pool_instances; i++) {
01697     buf_pool_t* buf_pool;
01698     ulint   instance_chunk_size;
01699 
01700     instance_chunk_size = chunk_size / srv_buf_pool_instances;
01701     buf_pool = buf_pool_from_array(i);
01702     buf_pool_shrink_instance(buf_pool, instance_chunk_size);
01703   }
01704 
01705   buf_pool_set_sizes();
01706 }
01707 
01708 /********************************************************************/
01710 static
01711 void
01712 buf_pool_page_hash_rebuild_instance(
01713 /*================================*/
01714   buf_pool_t* buf_pool)   
01715 {
01716   ulint   i;
01717   buf_page_t* b;
01718   buf_chunk_t*  chunk;
01719   ulint   n_chunks;
01720   hash_table_t* zip_hash;
01721   hash_table_t* page_hash;
01722 
01723   buf_pool_mutex_enter(buf_pool);
01724 
01725   /* Free, create, and populate the hash table. */
01726   hash_table_free(buf_pool->page_hash);
01727   buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
01728   zip_hash = hash_create(2 * buf_pool->curr_size);
01729 
01730   HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
01731          BUF_POOL_ZIP_FOLD_BPAGE);
01732 
01733   hash_table_free(buf_pool->zip_hash);
01734   buf_pool->zip_hash = zip_hash;
01735 
01736   /* Insert the uncompressed file pages to buf_pool->page_hash. */
01737 
01738   chunk = buf_pool->chunks;
01739   n_chunks = buf_pool->n_chunks;
01740 
01741   for (i = 0; i < n_chunks; i++, chunk++) {
01742     ulint   j;
01743     buf_block_t*  block = chunk->blocks;
01744 
01745     for (j = 0; j < chunk->size; j++, block++) {
01746       if (buf_block_get_state(block)
01747           == BUF_BLOCK_FILE_PAGE) {
01748         ut_ad(!block->page.in_zip_hash);
01749         ut_ad(block->page.in_page_hash);
01750 
01751         HASH_INSERT(buf_page_t, hash, page_hash,
01752               buf_page_address_fold(
01753                 block->page.space,
01754                 block->page.offset),
01755               &block->page);
01756       }
01757     }
01758   }
01759 
01760   /* Insert the compressed-only pages to buf_pool->page_hash.
01761   All such blocks are either in buf_pool->zip_clean or
01762   in buf_pool->flush_list. */
01763 
01764   for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
01765        b = UT_LIST_GET_NEXT(list, b)) {
01766     ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
01767     ut_ad(!b->in_flush_list);
01768     ut_ad(b->in_LRU_list);
01769     ut_ad(b->in_page_hash);
01770     ut_ad(!b->in_zip_hash);
01771 
01772     HASH_INSERT(buf_page_t, hash, page_hash,
01773           buf_page_address_fold(b->space, b->offset), b);
01774   }
01775 
01776   buf_flush_list_mutex_enter(buf_pool);
01777   for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
01778        b = UT_LIST_GET_NEXT(list, b)) {
01779     ut_ad(b->in_flush_list);
01780     ut_ad(b->in_LRU_list);
01781     ut_ad(b->in_page_hash);
01782     ut_ad(!b->in_zip_hash);
01783 
01784     switch (buf_page_get_state(b)) {
01785     case BUF_BLOCK_ZIP_DIRTY:
01786       HASH_INSERT(buf_page_t, hash, page_hash,
01787             buf_page_address_fold(b->space,
01788                 b->offset), b);
01789       break;
01790     case BUF_BLOCK_FILE_PAGE:
01791       /* uncompressed page */
01792       break;
01793     case BUF_BLOCK_ZIP_FREE:
01794     case BUF_BLOCK_ZIP_PAGE:
01795     case BUF_BLOCK_NOT_USED:
01796     case BUF_BLOCK_READY_FOR_USE:
01797     case BUF_BLOCK_MEMORY:
01798     case BUF_BLOCK_REMOVE_HASH:
01799       ut_error;
01800       break;
01801     }
01802   }
01803 
01804   buf_flush_list_mutex_exit(buf_pool);
01805   buf_pool_mutex_exit(buf_pool);
01806 }
01807 
01808 /********************************************************************
01809 Determine if a block is a sentinel for a buffer pool watch.
01810 @return TRUE if a sentinel for a buffer pool watch, FALSE if not */
01811 UNIV_INTERN
01812 ibool
01813 buf_pool_watch_is_sentinel(
01814 /*=======================*/
01815   buf_pool_t*   buf_pool, 
01816   const buf_page_t* bpage)    
01817 {
01818   ut_ad(buf_page_in_file(bpage));
01819 
01820   if (bpage < &buf_pool->watch[0]
01821       || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
01822 
01823     ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
01824           || bpage->zip.data != NULL);
01825 
01826     return(FALSE);
01827   }
01828 
01829   ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
01830   ut_ad(!bpage->in_zip_hash);
01831   ut_ad(bpage->in_page_hash);
01832   ut_ad(bpage->zip.data == NULL);
01833   ut_ad(bpage->buf_fix_count > 0);
01834   return(TRUE);
01835 }
01836 
01837 /****************************************************************/
01841 UNIV_INTERN
01842 buf_page_t*
01843 buf_pool_watch_set(
01844 /*===============*/
01845   ulint space,  
01846   ulint offset, 
01847   ulint fold) 
01848 {
01849   buf_page_t* bpage;
01850   ulint   i;
01851   buf_pool_t* buf_pool = buf_pool_get(space, offset);
01852 
01853   ut_ad(buf_pool_mutex_own(buf_pool));
01854 
01855   bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
01856 
01857   if (UNIV_LIKELY_NULL(bpage)) {
01858     if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
01859       /* The page was loaded meanwhile. */
01860       return(bpage);
01861     }
01862     /* Add to an existing watch. */
01863     bpage->buf_fix_count++;
01864     return(NULL);
01865   }
01866 
01867   for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
01868     bpage = &buf_pool->watch[i];
01869 
01870     ut_ad(bpage->access_time == 0);
01871     ut_ad(bpage->newest_modification == 0);
01872     ut_ad(bpage->oldest_modification == 0);
01873     ut_ad(bpage->zip.data == NULL);
01874     ut_ad(!bpage->in_zip_hash);
01875 
01876     switch (bpage->state) {
01877     case BUF_BLOCK_POOL_WATCH:
01878       ut_ad(!bpage->in_page_hash);
01879       ut_ad(bpage->buf_fix_count == 0);
01880 
01881       /* bpage is pointing to buf_pool->watch[],
01882       which is protected by buf_pool->mutex.
01883       Normally, buf_page_t objects are protected by
01884       buf_block_t::mutex or buf_pool->zip_mutex or both. */
01885 
01886       bpage->state = BUF_BLOCK_ZIP_PAGE;
01887       bpage->space = space;
01888       bpage->offset = offset;
01889       bpage->buf_fix_count = 1;
01890 
01891       ut_d(bpage->in_page_hash = TRUE);
01892       HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
01893             fold, bpage);
01894       return(NULL);
01895     case BUF_BLOCK_ZIP_PAGE:
01896       ut_ad(bpage->in_page_hash);
01897       ut_ad(bpage->buf_fix_count > 0);
01898       break;
01899     default:
01900       ut_error;
01901     }
01902   }
01903 
01904   /* Allocation failed.  Either the maximum number of purge
01905   threads should never exceed BUF_POOL_WATCH_SIZE, or this code
01906   should be modified to return a special non-NULL value and the
01907   caller should purge the record directly. */
01908   ut_error;
01909 
01910   /* Fix compiler warning */
01911   return(NULL);
01912 }
01913 
01914 /********************************************************************/
01916 static
01917 void
01918 buf_pool_page_hash_rebuild(void)
01919 /*============================*/
01920 {
01921   ulint   i;
01922 
01923   for (i = 0; i < srv_buf_pool_instances; i++) {
01924     buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
01925   }
01926 }
01927 
01928 /********************************************************************/
01930 static
01931 void
01932 buf_pool_increase_instance(
01933 /*=======================*/
01934   buf_pool_t* buf_pool, 
01935   ulint   change_size)  
01936 {
01937   buf_chunk_t*  chunks;
01938   buf_chunk_t*  chunk;
01939 
01940   buf_pool_mutex_enter(buf_pool);
01941   chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
01942 
01943   memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
01944 
01945   chunk = &chunks[buf_pool->n_chunks];
01946 
01947   if (!buf_chunk_init(buf_pool, chunk, change_size)) {
01948     mem_free(chunks);
01949   } else {
01950     buf_pool->old_pool_size = buf_pool->curr_pool_size;
01951     buf_pool->curr_size += chunk->size;
01952     buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
01953     mem_free(buf_pool->chunks);
01954     buf_pool->chunks = chunks;
01955     buf_pool->n_chunks++;
01956   }
01957 
01958   buf_pool_mutex_exit(buf_pool);
01959 }
01960 
01961 /********************************************************************/
01963 static
01964 void
01965 buf_pool_increase(
01966 /*==============*/
01967   ulint   change_size)
01968 {
01969   ulint   i;
01970 
01971   for (i = 0; i < srv_buf_pool_instances; i++) {
01972     buf_pool_increase_instance(
01973       buf_pool_from_array(i),
01974       change_size / srv_buf_pool_instances);
01975   }
01976 
01977   buf_pool_set_sizes();
01978 }
01979 
01980 /********************************************************************/
01982 UNIV_INTERN
01983 void
01984 buf_pool_resize(void)
01985 /*=================*/
01986 {
01987   ulint change_size;
01988   ulint min_change_size = 1048576 * srv_buf_pool_instances;
01989 
01990   buf_pool_mutex_enter_all();
01991   
01992     if (srv_buf_pool_old_size == srv_buf_pool_size) {
01993   
01994     buf_pool_mutex_exit_all();
01995 
01996       return;
01997 
01998     } else if (srv_buf_pool_curr_size + min_change_size
01999        > srv_buf_pool_size) {
02000   
02001     change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
02002           / UNIV_PAGE_SIZE;
02003 
02004     buf_pool_mutex_exit_all();
02005   
02006       /* Disable adaptive hash indexes and empty the index
02007       in order to free up memory in the buffer pool chunks. */
02008     buf_pool_shrink(change_size);
02009 
02010   } else if (srv_buf_pool_curr_size + min_change_size
02011        < srv_buf_pool_size) {
02012  
02013       /* Enlarge the buffer pool by at least one megabyte */
02014   
02015     change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
02016 
02017     buf_pool_mutex_exit_all();
02018 
02019     buf_pool_increase(change_size);
02020   } else {
02021     srv_buf_pool_size = srv_buf_pool_old_size;
02022 
02023     buf_pool_mutex_exit_all();
02024 
02025     return;
02026   }
02027   
02028     buf_pool_page_hash_rebuild();
02029 }
02030  
02031 /****************************************************************/
02036 static
02037 void
02038 buf_pool_watch_remove(
02039 /*==================*/
02040   buf_pool_t* buf_pool, 
02041   ulint   fold,   
02043   buf_page_t* watch)    
02044 {
02045   ut_ad(buf_pool_mutex_own(buf_pool));
02046 
02047   HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
02048   ut_d(watch->in_page_hash = FALSE);
02049   watch->buf_fix_count = 0;
02050   watch->state = BUF_BLOCK_POOL_WATCH;
02051 }
02052 
02053 /****************************************************************/
02056 UNIV_INTERN
02057 void
02058 buf_pool_watch_unset(
02059 /*=================*/
02060   ulint space,  
02061   ulint offset) 
02062 {
02063   buf_page_t* bpage;
02064   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02065   ulint   fold = buf_page_address_fold(space, offset);
02066 
02067   buf_pool_mutex_enter(buf_pool);
02068   bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
02069   /* The page must exist because buf_pool_watch_set()
02070   increments buf_fix_count. */
02071   ut_a(bpage);
02072 
02073   if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
02074     mutex_t* mutex = buf_page_get_mutex(bpage);
02075 
02076     mutex_enter(mutex);
02077     ut_a(bpage->buf_fix_count > 0);
02078     bpage->buf_fix_count--;
02079     mutex_exit(mutex);
02080   } else {
02081     ut_a(bpage->buf_fix_count > 0);
02082 
02083     if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
02084       buf_pool_watch_remove(buf_pool, fold, bpage);
02085     }
02086   }
02087 
02088   buf_pool_mutex_exit(buf_pool);
02089 }
02090 
02091 /****************************************************************/
02096 UNIV_INTERN
02097 ibool
02098 buf_pool_watch_occurred(
02099 /*====================*/
02100   ulint space,  
02101   ulint offset) 
02102 {
02103   ibool   ret;
02104   buf_page_t* bpage;
02105   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02106   ulint   fold  = buf_page_address_fold(space, offset);
02107 
02108   buf_pool_mutex_enter(buf_pool);
02109 
02110   bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
02111   /* The page must exist because buf_pool_watch_set()
02112   increments buf_fix_count. */
02113   ut_a(bpage);
02114   ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
02115   buf_pool_mutex_exit(buf_pool);
02116 
02117   return(ret);
02118 }
02119 
02120 /********************************************************************/
02124 UNIV_INTERN
02125 void
02126 buf_page_make_young(
02127 /*================*/
02128   buf_page_t* bpage)  
02129 {
02130   buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
02131 
02132   buf_pool_mutex_enter(buf_pool);
02133 
02134   ut_a(buf_page_in_file(bpage));
02135 
02136   buf_LRU_make_block_young(bpage);
02137 
02138   buf_pool_mutex_exit(buf_pool);
02139 }
02140 
02141 /********************************************************************/
02146 static
02147 void
02148 buf_page_set_accessed_make_young(
02149 /*=============================*/
02150   buf_page_t* bpage,    
02152   unsigned  access_time)  
02155 {
02156   buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
02157 
02158   ut_ad(!buf_pool_mutex_own(buf_pool));
02159   ut_a(buf_page_in_file(bpage));
02160 
02161   if (buf_page_peek_if_too_old(bpage)) {
02162     buf_pool_mutex_enter(buf_pool);
02163     buf_LRU_make_block_young(bpage);
02164     buf_pool_mutex_exit(buf_pool);
02165   } else if (!access_time) {
02166     ulint time_ms = ut_time_ms();
02167     buf_pool_mutex_enter(buf_pool);
02168     buf_page_set_accessed(bpage, time_ms);
02169     buf_pool_mutex_exit(buf_pool);
02170   }
02171 }
02172 
02173 /********************************************************************/
02176 UNIV_INTERN
02177 void
02178 buf_reset_check_index_page_at_flush(
02179 /*================================*/
02180   ulint space,  
02181   ulint offset) 
02182 {
02183   buf_block_t*  block;
02184   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02185 
02186   buf_pool_mutex_enter(buf_pool);
02187 
02188   block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
02189 
02190   if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
02191     ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
02192     block->check_index_page_at_flush = FALSE;
02193   }
02194 
02195   buf_pool_mutex_exit(buf_pool);
02196 }
02197 
02198 /********************************************************************/
02203 UNIV_INTERN
02204 ibool
02205 buf_page_peek_if_search_hashed(
02206 /*===========================*/
02207   ulint space,  
02208   ulint offset) 
02209 {
02210   buf_block_t*  block;
02211   ibool   is_hashed;
02212   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02213 
02214   buf_pool_mutex_enter(buf_pool);
02215 
02216   block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
02217 
02218   if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
02219     is_hashed = FALSE;
02220   } else {
02221     ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
02222     is_hashed = block->is_hashed;
02223   }
02224 
02225   buf_pool_mutex_exit(buf_pool);
02226 
02227   return(is_hashed);
02228 }
02229 
02230 #ifdef UNIV_DEBUG_FILE_ACCESSES
02231 /********************************************************************/
02237 UNIV_INTERN
02238 buf_page_t*
02239 buf_page_set_file_page_was_freed(
02240 /*=============================*/
02241   ulint space,  
02242   ulint offset) 
02243 {
02244   buf_page_t* bpage;
02245   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02246 
02247   buf_pool_mutex_enter(buf_pool);
02248 
02249   bpage = buf_page_hash_get(buf_pool, space, offset);
02250 
02251   if (bpage) {
02252     ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
02253     bpage->file_page_was_freed = TRUE;
02254   }
02255 
02256   buf_pool_mutex_exit(buf_pool);
02257 
02258   return(bpage);
02259 }
02260 
02261 /********************************************************************/
02267 UNIV_INTERN
02268 buf_page_t*
02269 buf_page_reset_file_page_was_freed(
02270 /*===============================*/
02271   ulint space,  
02272   ulint offset) 
02273 {
02274   buf_page_t* bpage;
02275   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02276 
02277   buf_pool_mutex_enter(buf_pool);
02278 
02279   bpage = buf_page_hash_get(buf_pool, space, offset);
02280 
02281   if (bpage) {
02282     ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
02283     bpage->file_page_was_freed = FALSE;
02284   }
02285 
02286   buf_pool_mutex_exit(buf_pool);
02287 
02288   return(bpage);
02289 }
02290 #endif /* UNIV_DEBUG_FILE_ACCESSES */
02291 
02292 /********************************************************************/
02301 UNIV_INTERN
02302 buf_page_t*
02303 buf_page_get_zip(
02304 /*=============*/
02305   ulint   space,  
02306   ulint   zip_size,
02307   ulint   offset) 
02308 {
02309   buf_page_t* bpage;
02310   mutex_t*  block_mutex;
02311   ibool   must_read;
02312   unsigned  access_time;
02313   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02314 
02315 #ifndef UNIV_LOG_DEBUG
02316   ut_ad(!ibuf_inside());
02317 #endif
02318   buf_pool->stat.n_page_gets++;
02319 
02320   for (;;) {
02321     buf_pool_mutex_enter(buf_pool);
02322 lookup:
02323     bpage = buf_page_hash_get(buf_pool, space, offset);
02324     if (bpage) {
02325       ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
02326       break;
02327     }
02328 
02329     /* Page not in buf_pool: needs to be read from file */
02330 
02331     buf_pool_mutex_exit(buf_pool);
02332 
02333     buf_read_page(space, zip_size, offset);
02334 
02335 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
02336     ut_a(++buf_dbg_counter % 37 || buf_validate());
02337 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
02338   }
02339 
02340   if (UNIV_UNLIKELY(!bpage->zip.data)) {
02341     /* There is no compressed page. */
02342 err_exit:
02343     buf_pool_mutex_exit(buf_pool);
02344     return(NULL);
02345   }
02346 
02347   ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
02348 
02349   switch (buf_page_get_state(bpage)) {
02350   case BUF_BLOCK_NOT_USED:
02351   case BUF_BLOCK_READY_FOR_USE:
02352   case BUF_BLOCK_MEMORY:
02353   case BUF_BLOCK_REMOVE_HASH:
02354   case BUF_BLOCK_ZIP_FREE:
02355     break;
02356   case BUF_BLOCK_ZIP_PAGE:
02357   case BUF_BLOCK_ZIP_DIRTY:
02358     block_mutex = &buf_pool->zip_mutex;
02359     mutex_enter(block_mutex);
02360     bpage->buf_fix_count++;
02361     goto got_block;
02362   case BUF_BLOCK_FILE_PAGE:
02363     block_mutex = &((buf_block_t*) bpage)->mutex;
02364     mutex_enter(block_mutex);
02365 
02366     /* Discard the uncompressed page frame if possible. */
02367     if (buf_LRU_free_block(bpage, FALSE, NULL)
02368         == BUF_LRU_FREED) {
02369 
02370       mutex_exit(block_mutex);
02371       goto lookup;
02372     }
02373 
02374     buf_block_buf_fix_inc((buf_block_t*) bpage,
02375               __FILE__, __LINE__);
02376     goto got_block;
02377   }
02378 
02379   ut_error;
02380   goto err_exit;
02381 
02382 got_block:
02383   must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
02384   access_time = buf_page_is_accessed(bpage);
02385 
02386   buf_pool_mutex_exit(buf_pool);
02387 
02388   mutex_exit(block_mutex);
02389 
02390   buf_page_set_accessed_make_young(bpage, access_time);
02391 
02392 #ifdef UNIV_DEBUG_FILE_ACCESSES
02393   ut_a(!bpage->file_page_was_freed);
02394 #endif
02395 
02396 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
02397   ut_a(++buf_dbg_counter % 5771 || buf_validate());
02398   ut_a(bpage->buf_fix_count > 0);
02399   ut_a(buf_page_in_file(bpage));
02400 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
02401 
02402   if (must_read) {
02403     /* Let us wait until the read operation
02404     completes */
02405 
02406     for (;;) {
02407       enum buf_io_fix io_fix;
02408 
02409       mutex_enter(block_mutex);
02410       io_fix = buf_page_get_io_fix(bpage);
02411       mutex_exit(block_mutex);
02412 
02413       if (io_fix == BUF_IO_READ) {
02414 
02415         os_thread_sleep(WAIT_FOR_READ);
02416       } else {
02417         break;
02418       }
02419     }
02420   }
02421 
02422 #ifdef UNIV_IBUF_COUNT_DEBUG
02423   ut_a(ibuf_count_get(buf_page_get_space(bpage),
02424           buf_page_get_page_no(bpage)) == 0);
02425 #endif
02426   return(bpage);
02427 }
02428 
02429 /********************************************************************/
02431 UNIV_INLINE
02432 void
02433 buf_block_init_low(
02434 /*===============*/
02435   buf_block_t*  block)  
02436 {
02437   block->check_index_page_at_flush = FALSE;
02438   block->index    = NULL;
02439 
02440   block->n_hash_helps = 0;
02441   block->is_hashed  = FALSE;
02442   block->n_fields   = 1;
02443   block->n_bytes    = 0;
02444   block->left_side  = TRUE;
02445 }
02446 #endif /* !UNIV_HOTBACKUP */
02447 
02448 /********************************************************************/
02451 UNIV_INTERN
02452 ibool
02453 buf_zip_decompress(
02454 /*===============*/
02455   buf_block_t*  block,  
02456   ibool   check)  
02457 {
02458   const byte* frame   = block->page.zip.data;
02459   ulint   stamp_checksum  = mach_read_from_4(
02460     frame + FIL_PAGE_SPACE_OR_CHKSUM);
02461 
02462   ut_ad(buf_block_get_zip_size(block));
02463   ut_a(buf_block_get_space(block) != 0);
02464 
02465   if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
02466     ulint calc_checksum = page_zip_calc_checksum(
02467       frame, page_zip_get_size(&block->page.zip));
02468 
02469     if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
02470       ut_print_timestamp(stderr);
02471       fprintf(stderr,
02472         "  InnoDB: compressed page checksum mismatch"
02473         " (space %u page %u): %lu != %lu\n",
02474         block->page.space, block->page.offset,
02475         stamp_checksum, calc_checksum);
02476       return(FALSE);
02477     }
02478   }
02479 
02480   switch (fil_page_get_type(frame)) {
02481   case FIL_PAGE_INDEX:
02482     if (page_zip_decompress(&block->page.zip,
02483           block->frame, TRUE)) {
02484       return(TRUE);
02485     }
02486 
02487     fprintf(stderr,
02488       "InnoDB: unable to decompress space %lu page %lu\n",
02489       (ulong) block->page.space,
02490       (ulong) block->page.offset);
02491     return(FALSE);
02492 
02493   case FIL_PAGE_TYPE_ALLOCATED:
02494   case FIL_PAGE_INODE:
02495   case FIL_PAGE_IBUF_BITMAP:
02496   case FIL_PAGE_TYPE_FSP_HDR:
02497   case FIL_PAGE_TYPE_XDES:
02498   case FIL_PAGE_TYPE_ZBLOB:
02499   case FIL_PAGE_TYPE_ZBLOB2:
02500     /* Copy to uncompressed storage. */
02501     memcpy(block->frame, frame,
02502            buf_block_get_zip_size(block));
02503     return(TRUE);
02504   }
02505 
02506   ut_print_timestamp(stderr);
02507   fprintf(stderr,
02508     "  InnoDB: unknown compressed page"
02509     " type %lu\n",
02510     fil_page_get_type(frame));
02511   return(FALSE);
02512 }
02513 
02514 #ifndef UNIV_HOTBACKUP
02515 /*******************************************************************/
02519 static
02520 buf_block_t*
02521 buf_block_align_instance(
02522 /*=====================*/
02523   buf_pool_t* buf_pool, 
02525   const byte* ptr)    
02526 {
02527   buf_chunk_t*  chunk;
02528   ulint   i;
02529 
02530   /* TODO: protect buf_pool->chunks with a mutex (it will
02531   currently remain constant after buf_pool_init()) */
02532   for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
02533     lint  offs = ptr - chunk->blocks->frame;
02534 
02535     if (UNIV_UNLIKELY(offs < 0)) {
02536 
02537       continue;
02538     }
02539 
02540     offs >>= UNIV_PAGE_SIZE_SHIFT;
02541 
02542     if (UNIV_LIKELY((ulint) offs < chunk->size)) {
02543       buf_block_t*  block = &chunk->blocks[offs];
02544 
02545       /* The function buf_chunk_init() invokes
02546       buf_block_init() so that block[n].frame ==
02547       block->frame + n * UNIV_PAGE_SIZE.  Check it. */
02548       ut_ad(block->frame == page_align(ptr));
02549 #ifdef UNIV_DEBUG
02550       /* A thread that updates these fields must
02551       hold buf_pool->mutex and block->mutex.  Acquire
02552       only the latter. */
02553       mutex_enter(&block->mutex);
02554 
02555       switch (buf_block_get_state(block)) {
02556       case BUF_BLOCK_ZIP_FREE:
02557       case BUF_BLOCK_ZIP_PAGE:
02558       case BUF_BLOCK_ZIP_DIRTY:
02559         /* These types should only be used in
02560         the compressed buffer pool, whose
02561         memory is allocated from
02562         buf_pool->chunks, in UNIV_PAGE_SIZE
02563         blocks flagged as BUF_BLOCK_MEMORY. */
02564         ut_error;
02565         break;
02566       case BUF_BLOCK_NOT_USED:
02567       case BUF_BLOCK_READY_FOR_USE:
02568       case BUF_BLOCK_MEMORY:
02569         /* Some data structures contain
02570         "guess" pointers to file pages.  The
02571         file pages may have been freed and
02572         reused.  Do not complain. */
02573         break;
02574       case BUF_BLOCK_REMOVE_HASH:
02575         /* buf_LRU_block_remove_hashed_page()
02576         will overwrite the FIL_PAGE_OFFSET and
02577         FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
02578         0xff and set the state to
02579         BUF_BLOCK_REMOVE_HASH. */
02580         ut_ad(page_get_space_id(page_align(ptr))
02581               == 0xffffffff);
02582         ut_ad(page_get_page_no(page_align(ptr))
02583               == 0xffffffff);
02584         break;
02585       case BUF_BLOCK_FILE_PAGE:
02586         ut_ad(block->page.space
02587               == page_get_space_id(page_align(ptr)));
02588         ut_ad(block->page.offset
02589               == page_get_page_no(page_align(ptr)));
02590         break;
02591       }
02592 
02593       mutex_exit(&block->mutex);
02594 #endif /* UNIV_DEBUG */
02595 
02596       return(block);
02597     }
02598   }
02599 
02600   return(NULL);
02601 }
02602 
02603 /*******************************************************************/
02606 UNIV_INTERN
02607 buf_block_t*
02608 buf_block_align(
02609 /*============*/
02610   const byte* ptr)  
02611 {
02612   ulint   i;
02613 
02614   for (i = 0; i < srv_buf_pool_instances; i++) {
02615     buf_block_t*  block;
02616 
02617     block = buf_block_align_instance(
02618       buf_pool_from_array(i), ptr);
02619     if (block) {
02620       return(block);
02621     }
02622   }
02623 
02624   /* The block should always be found. */
02625   ut_error;
02626   return(NULL);
02627 }
02628 
02629 /********************************************************************/
02634 static
02635 ibool
02636 buf_pointer_is_block_field_instance(
02637 /*================================*/
02638   buf_pool_t* buf_pool, 
02639   const void* ptr)    
02640 {
02641   const buf_chunk_t*    chunk = buf_pool->chunks;
02642   const buf_chunk_t* const  echunk  = chunk + buf_pool->n_chunks;
02643 
02644   /* TODO: protect buf_pool->chunks with a mutex (it will
02645   currently remain constant after buf_pool_init()) */
02646   while (chunk < echunk) {
02647     if (ptr >= (void *)chunk->blocks
02648         && ptr < (void *)(chunk->blocks + chunk->size)) {
02649 
02650       return(TRUE);
02651     }
02652 
02653     chunk++;
02654   }
02655 
02656   return(FALSE);
02657 }
02658 
02659 /********************************************************************/
02663 UNIV_INTERN
02664 ibool
02665 buf_pointer_is_block_field(
02666 /*=======================*/
02667   const void* ptr)  
02668 {
02669   ulint i;
02670 
02671   for (i = 0; i < srv_buf_pool_instances; i++) {
02672     ibool found;
02673 
02674     found = buf_pointer_is_block_field_instance(
02675       buf_pool_from_array(i), ptr);
02676     if (found) {
02677       return(TRUE);
02678     }
02679   }
02680 
02681   return(FALSE);
02682 }
02683 
02684 /********************************************************************/
02687 static
02688 ibool
02689 buf_block_is_uncompressed(
02690 /*======================*/
02691   buf_pool_t*   buf_pool, 
02692   const buf_block_t*  block)    
02694 {
02695   ut_ad(buf_pool_mutex_own(buf_pool));
02696 
02697   if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
02698     /* The pointer should be aligned. */
02699     return(FALSE);
02700   }
02701 
02702   return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
02703 }
02704 
02705 /********************************************************************/
02708 UNIV_INTERN
02709 buf_block_t*
02710 buf_page_get_gen(
02711 /*=============*/
02712   ulint   space,  
02713   ulint   zip_size,
02715   ulint   offset, 
02716   ulint   rw_latch,
02717   buf_block_t*  guess,  
02718   ulint   mode, 
02721   const char* file, 
02722   ulint   line, 
02723   mtr_t*    mtr)  
02724 {
02725   buf_block_t*  block;
02726   ulint   fold;
02727   unsigned  access_time;
02728   ulint   fix_type;
02729   ibool   must_read;
02730   ulint   retries = 0;
02731   buf_pool_t* buf_pool = buf_pool_get(space, offset);
02732 
02733   ut_ad(mtr);
02734   ut_ad(mtr->state == MTR_ACTIVE);
02735   ut_ad((rw_latch == RW_S_LATCH)
02736         || (rw_latch == RW_X_LATCH)
02737         || (rw_latch == RW_NO_LATCH));
02738   ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
02739   ut_ad(mode == BUF_GET
02740         || mode == BUF_GET_IF_IN_POOL
02741         || mode == BUF_GET_NO_LATCH
02742         || mode == BUF_GET_IF_IN_POOL_OR_WATCH);
02743   ut_ad(zip_size == fil_space_get_zip_size(space));
02744   ut_ad(ut_is_2pow(zip_size));
02745 #ifndef UNIV_LOG_DEBUG
02746   ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
02747 #endif
02748   buf_pool->stat.n_page_gets++;
02749   fold = buf_page_address_fold(space, offset);
02750 loop:
02751   block = guess;
02752   buf_pool_mutex_enter(buf_pool);
02753 
02754   if (block) {
02755     /* If the guess is a compressed page descriptor that
02756     has been allocated by buf_buddy_alloc(), it may have
02757     been invalidated by buf_buddy_relocate().  In that
02758     case, block could point to something that happens to
02759     contain the expected bits in block->page.  Similarly,
02760     the guess may be pointing to a buffer pool chunk that
02761     has been released when resizing the buffer pool. */
02762 
02763     if (!buf_block_is_uncompressed(buf_pool, block)
02764         || offset != block->page.offset
02765         || space != block->page.space
02766         || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
02767 
02768       block = guess = NULL;
02769     } else {
02770       ut_ad(!block->page.in_zip_hash);
02771       ut_ad(block->page.in_page_hash);
02772     }
02773   }
02774 
02775   if (block == NULL) {
02776     block = (buf_block_t*) buf_page_hash_get_low(
02777       buf_pool, space, offset, fold);
02778   }
02779 
02780 loop2:
02781   if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
02782     block = NULL;
02783   }
02784 
02785   if (block == NULL) {
02786     /* Page not in buf_pool: needs to be read from file */
02787 
02788     if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
02789       block = (buf_block_t*) buf_pool_watch_set(
02790         space, offset, fold);
02791 
02792       if (UNIV_LIKELY_NULL(block)) {
02793 
02794         goto got_block;
02795       }
02796     }
02797 
02798     buf_pool_mutex_exit(buf_pool);
02799 
02800     if (mode == BUF_GET_IF_IN_POOL
02801         || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
02802 
02803       return(NULL);
02804     }
02805 
02806     if (buf_read_page(space, zip_size, offset)) {
02807       retries = 0;
02808     } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
02809       ++retries;
02810     } else {
02811       fprintf(stderr, "InnoDB: Error: Unable"
02812         " to read tablespace %lu page no"
02813         " %lu into the buffer pool after"
02814         " %lu attempts\n"
02815         "InnoDB: The most probable cause"
02816         " of this error may be that the"
02817         " table has been corrupted.\n"
02818         "InnoDB: You can try to fix this"
02819         " problem by using"
02820         " innodb_force_recovery.\n"
02821         "InnoDB: Please see reference manual"
02822         " for more details.\n"
02823         "InnoDB: Aborting...\n",
02824         space, offset,
02825         BUF_PAGE_READ_MAX_RETRIES);
02826 
02827       ut_error;
02828     }
02829 
02830 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
02831     ut_a(++buf_dbg_counter % 37 || buf_validate());
02832 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
02833     goto loop;
02834   }
02835 
02836 got_block:
02837   ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
02838 
02839   must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
02840 
02841   if (must_read && mode == BUF_GET_IF_IN_POOL) {
02842 
02843     /* The page is being read to buffer pool,
02844     but we cannot wait around for the read to
02845     complete. */
02846     buf_pool_mutex_exit(buf_pool);
02847 
02848     return(NULL);
02849   }
02850 
02851   switch (buf_block_get_state(block)) {
02852     buf_page_t* bpage;
02853     ibool   success;
02854 
02855   case BUF_BLOCK_FILE_PAGE:
02856     break;
02857 
02858   case BUF_BLOCK_ZIP_PAGE:
02859   case BUF_BLOCK_ZIP_DIRTY:
02860     bpage = &block->page;
02861     /* Protect bpage->buf_fix_count. */
02862     mutex_enter(&buf_pool->zip_mutex);
02863 
02864     if (bpage->buf_fix_count
02865         || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
02866       /* This condition often occurs when the buffer
02867       is not buffer-fixed, but I/O-fixed by
02868       buf_page_init_for_read(). */
02869       mutex_exit(&buf_pool->zip_mutex);
02870 wait_until_unfixed:
02871       /* The block is buffer-fixed or I/O-fixed.
02872       Try again later. */
02873       buf_pool_mutex_exit(buf_pool);
02874       os_thread_sleep(WAIT_FOR_READ);
02875   
02876       goto loop;
02877     }
02878 
02879     /* Allocate an uncompressed page. */
02880     buf_pool_mutex_exit(buf_pool);
02881     mutex_exit(&buf_pool->zip_mutex);
02882 
02883     block = buf_LRU_get_free_block(buf_pool, 0);
02884     ut_a(block);
02885 
02886     buf_pool_mutex_enter(buf_pool);
02887     mutex_enter(&block->mutex);
02888 
02889     {
02890       buf_page_t* hash_bpage;
02891 
02892       hash_bpage = buf_page_hash_get_low(
02893         buf_pool, space, offset, fold);
02894 
02895       if (UNIV_UNLIKELY(bpage != hash_bpage)) {
02896         /* The buf_pool->page_hash was modified
02897         while buf_pool->mutex was released.
02898         Free the block that was allocated. */
02899 
02900         buf_LRU_block_free_non_file_page(block);
02901         mutex_exit(&block->mutex);
02902 
02903         block = (buf_block_t*) hash_bpage;
02904         goto loop2;
02905       }
02906     }
02907 
02908     if (UNIV_UNLIKELY
02909         (bpage->buf_fix_count
02910          || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
02911 
02912       /* The block was buffer-fixed or I/O-fixed
02913       while buf_pool->mutex was not held by this thread.
02914       Free the block that was allocated and try again.
02915       This should be extremely unlikely. */
02916 
02917       buf_LRU_block_free_non_file_page(block);
02918       mutex_exit(&block->mutex);
02919 
02920       goto wait_until_unfixed;
02921     }
02922 
02923     /* Move the compressed page from bpage to block,
02924     and uncompress it. */
02925 
02926     mutex_enter(&buf_pool->zip_mutex);
02927 
02928     buf_relocate(bpage, &block->page);
02929     buf_block_init_low(block);
02930     block->lock_hash_val = lock_rec_hash(space, offset);
02931 
02932     UNIV_MEM_DESC(&block->page.zip.data,
02933             page_zip_get_size(&block->page.zip), block);
02934 
02935     if (buf_page_get_state(&block->page)
02936         == BUF_BLOCK_ZIP_PAGE) {
02937       UT_LIST_REMOVE(list, buf_pool->zip_clean,
02938                &block->page);
02939       ut_ad(!block->page.in_flush_list);
02940     } else {
02941       /* Relocate buf_pool->flush_list. */
02942       buf_flush_relocate_on_flush_list(bpage,
02943                &block->page);
02944     }
02945 
02946     /* Buffer-fix, I/O-fix, and X-latch the block
02947     for the duration of the decompression.
02948     Also add the block to the unzip_LRU list. */
02949     block->page.state = BUF_BLOCK_FILE_PAGE;
02950 
02951     /* Insert at the front of unzip_LRU list */
02952     buf_unzip_LRU_add_block(block, FALSE);
02953 
02954     block->page.buf_fix_count = 1;
02955     buf_block_set_io_fix(block, BUF_IO_READ);
02956     rw_lock_x_lock_func(&block->lock, 0, file, line);
02957 
02958     UNIV_MEM_INVALID(bpage, sizeof *bpage);
02959 
02960     mutex_exit(&block->mutex);
02961     mutex_exit(&buf_pool->zip_mutex);
02962     buf_pool->n_pend_unzip++;
02963 
02964     buf_buddy_free(buf_pool, bpage, sizeof *bpage);
02965 
02966     buf_pool_mutex_exit(buf_pool);
02967 
02968     /* Decompress the page and apply buffered operations
02969     while not holding buf_pool->mutex or block->mutex. */
02970     success = buf_zip_decompress(block, srv_use_checksums);
02971     ut_a(success);
02972 
02973     if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
02974       ibuf_merge_or_delete_for_page(block, space, offset,
02975                   zip_size, TRUE);
02976     }
02977 
02978     /* Unfix and unlatch the block. */
02979     buf_pool_mutex_enter(buf_pool);
02980     mutex_enter(&block->mutex);
02981     block->page.buf_fix_count--;
02982     buf_block_set_io_fix(block, BUF_IO_NONE);
02983     mutex_exit(&block->mutex);
02984     buf_pool->n_pend_unzip--;
02985     rw_lock_x_unlock(&block->lock);
02986 
02987     break;
02988 
02989   case BUF_BLOCK_ZIP_FREE:
02990   case BUF_BLOCK_NOT_USED:
02991   case BUF_BLOCK_READY_FOR_USE:
02992   case BUF_BLOCK_MEMORY:
02993   case BUF_BLOCK_REMOVE_HASH:
02994     ut_error;
02995     break;
02996   }
02997 
02998   ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
02999 
03000   mutex_enter(&block->mutex);
03001 #if UNIV_WORD_SIZE == 4
03002   /* On 32-bit systems, there is no padding in buf_page_t.  On
03003   other systems, Valgrind could complain about uninitialized pad
03004   bytes. */
03005   UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
03006 #endif
03007 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
03008   if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
03009       && ibuf_debug) {
03010     /* Try to evict the block from the buffer pool, to use the
03011     insert buffer (change buffer) as much as possible. */
03012 
03013     if (buf_LRU_free_block(&block->page, TRUE, NULL)
03014         == BUF_LRU_FREED) {
03015       mutex_exit(&block->mutex);
03016       if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
03017         /* Set the watch, as it would have
03018         been set if the page were not in the
03019         buffer pool in the first place. */
03020         block = (buf_block_t*) buf_pool_watch_set(
03021           space, offset, fold);
03022 
03023         if (UNIV_LIKELY_NULL(block)) {
03024 
03025           /* The page entered the buffer
03026           pool for some reason. Try to
03027           evict it again. */
03028           goto got_block;
03029         }
03030       }
03031       buf_pool_mutex_exit(buf_pool);
03032       fprintf(stderr,
03033         "innodb_change_buffering_debug evict %u %u\n",
03034         (unsigned) space, (unsigned) offset);
03035       return(NULL);
03036     } else if (buf_flush_page_try(buf_pool, block)) {
03037       fprintf(stderr,
03038         "innodb_change_buffering_debug flush %u %u\n",
03039         (unsigned) space, (unsigned) offset);
03040       guess = block;
03041       goto loop;
03042     }
03043 
03044     /* Failed to evict the page; change it directly */
03045   }
03046 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
03047 
03048   buf_block_buf_fix_inc(block, file, line);
03049 
03050   mutex_exit(&block->mutex);
03051 
03052   /* Check if this is the first access to the page */
03053 
03054   access_time = buf_page_is_accessed(&block->page);
03055 
03056   buf_pool_mutex_exit(buf_pool);
03057 
03058   buf_page_set_accessed_make_young(&block->page, access_time);
03059 
03060 #ifdef UNIV_DEBUG_FILE_ACCESSES
03061   ut_a(!block->page.file_page_was_freed);
03062 #endif
03063 
03064 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03065   ut_a(++buf_dbg_counter % 5771 || buf_validate());
03066   ut_a(block->page.buf_fix_count > 0);
03067   ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
03068 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03069 
03070   switch (rw_latch) {
03071   case RW_NO_LATCH:
03072     if (must_read) {
03073       /* Let us wait until the read operation
03074       completes */
03075 
03076       for (;;) {
03077         enum buf_io_fix io_fix;
03078 
03079         mutex_enter(&block->mutex);
03080         io_fix = buf_block_get_io_fix(block);
03081         mutex_exit(&block->mutex);
03082 
03083         if (io_fix == BUF_IO_READ) {
03084 
03085           os_thread_sleep(WAIT_FOR_READ);
03086         } else {
03087           break;
03088         }
03089       }
03090     }
03091 
03092     fix_type = MTR_MEMO_BUF_FIX;
03093     break;
03094 
03095   case RW_S_LATCH:
03096     rw_lock_s_lock_func(&(block->lock), 0, file, line);
03097 
03098     fix_type = MTR_MEMO_PAGE_S_FIX;
03099     break;
03100 
03101   default:
03102     ut_ad(rw_latch == RW_X_LATCH);
03103     rw_lock_x_lock_func(&(block->lock), 0, file, line);
03104 
03105     fix_type = MTR_MEMO_PAGE_X_FIX;
03106     break;
03107   }
03108 
03109   mtr_memo_push(mtr, block, fix_type);
03110 
03111   if (!access_time) {
03112     /* In the case of a first access, try to apply linear
03113     read-ahead */
03114 
03115     buf_read_ahead_linear(space, zip_size, offset);
03116   }
03117 
03118 #ifdef UNIV_IBUF_COUNT_DEBUG
03119   ut_a(ibuf_count_get(buf_block_get_space(block),
03120           buf_block_get_page_no(block)) == 0);
03121 #endif
03122   return(block);
03123 }
03124 
03125 /********************************************************************/
03129 UNIV_INTERN
03130 ibool
03131 buf_page_optimistic_get(
03132 /*====================*/
03133   ulint   rw_latch,
03134   buf_block_t*  block,  
03135   ib_uint64_t modify_clock,
03137   const char* file, 
03138   ulint   line, 
03139   mtr_t*    mtr)  
03140 {
03141   buf_pool_t* buf_pool;
03142   unsigned  access_time;
03143   ibool   success;
03144   ulint   fix_type;
03145 
03146   ut_ad(block);
03147   ut_ad(mtr);
03148   ut_ad(mtr->state == MTR_ACTIVE);
03149   ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
03150 
03151   mutex_enter(&block->mutex);
03152 
03153   if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
03154 
03155     mutex_exit(&block->mutex);
03156 
03157     return(FALSE);
03158   }
03159 
03160   buf_block_buf_fix_inc(block, file, line);
03161 
03162   mutex_exit(&block->mutex);
03163 
03164   /* Check if this is the first access to the page.
03165   We do a dirty read on purpose, to avoid mutex contention.
03166   This field is only used for heuristic purposes; it does not
03167   affect correctness. */
03168 
03169   access_time = buf_page_is_accessed(&block->page);
03170   buf_page_set_accessed_make_young(&block->page, access_time);
03171 
03172   ut_ad(!ibuf_inside()
03173         || ibuf_page(buf_block_get_space(block),
03174          buf_block_get_zip_size(block),
03175          buf_block_get_page_no(block), NULL));
03176 
03177   if (rw_latch == RW_S_LATCH) {
03178     success = rw_lock_s_lock_nowait(&(block->lock),
03179             file, line);
03180     fix_type = MTR_MEMO_PAGE_S_FIX;
03181   } else {
03182     success = rw_lock_x_lock_func_nowait(&(block->lock),
03183                  file, line);
03184     fix_type = MTR_MEMO_PAGE_X_FIX;
03185   }
03186 
03187   if (UNIV_UNLIKELY(!success)) {
03188     mutex_enter(&block->mutex);
03189     buf_block_buf_fix_dec(block);
03190     mutex_exit(&block->mutex);
03191 
03192     return(FALSE);
03193   }
03194 
03195   if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
03196     buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
03197 
03198     if (rw_latch == RW_S_LATCH) {
03199       rw_lock_s_unlock(&(block->lock));
03200     } else {
03201       rw_lock_x_unlock(&(block->lock));
03202     }
03203 
03204     mutex_enter(&block->mutex);
03205     buf_block_buf_fix_dec(block);
03206     mutex_exit(&block->mutex);
03207 
03208     return(FALSE);
03209   }
03210 
03211   mtr_memo_push(mtr, block, fix_type);
03212 
03213 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03214   ut_a(++buf_dbg_counter % 5771 || buf_validate());
03215   ut_a(block->page.buf_fix_count > 0);
03216   ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
03217 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03218 
03219 #ifdef UNIV_DEBUG_FILE_ACCESSES
03220   ut_a(block->page.file_page_was_freed == FALSE);
03221 #endif
03222   if (UNIV_UNLIKELY(!access_time)) {
03223     /* In the case of a first access, try to apply linear
03224     read-ahead */
03225 
03226     buf_read_ahead_linear(buf_block_get_space(block),
03227               buf_block_get_zip_size(block),
03228               buf_block_get_page_no(block));
03229   }
03230 
03231 #ifdef UNIV_IBUF_COUNT_DEBUG
03232   ut_a(ibuf_count_get(buf_block_get_space(block),
03233           buf_block_get_page_no(block)) == 0);
03234 #endif
03235   buf_pool = buf_pool_from_block(block);
03236   buf_pool->stat.n_page_gets++;
03237 
03238   return(TRUE);
03239 }
03240 
03241 /********************************************************************/
03246 UNIV_INTERN
03247 ibool
03248 buf_page_get_known_nowait(
03249 /*======================*/
03250   ulint   rw_latch,
03251   buf_block_t*  block,  
03252   ulint   mode, 
03253   const char* file, 
03254   ulint   line, 
03255   mtr_t*    mtr)  
03256 {
03257   buf_pool_t* buf_pool;
03258   ibool   success;
03259   ulint   fix_type;
03260 
03261   ut_ad(mtr);
03262   ut_ad(mtr->state == MTR_ACTIVE);
03263   ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
03264 
03265   mutex_enter(&block->mutex);
03266 
03267   if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
03268     /* Another thread is just freeing the block from the LRU list
03269     of the buffer pool: do not try to access this page; this
03270     attempt to access the page can only come through the hash
03271     index because when the buffer block state is ..._REMOVE_HASH,
03272     we have already removed it from the page address hash table
03273     of the buffer pool. */
03274 
03275     mutex_exit(&block->mutex);
03276 
03277     return(FALSE);
03278   }
03279 
03280   ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
03281 
03282   buf_block_buf_fix_inc(block, file, line);
03283 
03284   mutex_exit(&block->mutex);
03285 
03286   buf_pool = buf_pool_from_block(block);
03287 
03288   if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
03289     buf_pool_mutex_enter(buf_pool);
03290     buf_LRU_make_block_young(&block->page);
03291     buf_pool_mutex_exit(buf_pool);
03292   } else if (!buf_page_is_accessed(&block->page)) {
03293     /* Above, we do a dirty read on purpose, to avoid
03294     mutex contention.  The field buf_page_t::access_time
03295     is only used for heuristic purposes.  Writes to the
03296     field must be protected by mutex, however. */
03297     ulint time_ms = ut_time_ms();
03298 
03299     buf_pool_mutex_enter(buf_pool);
03300     buf_page_set_accessed(&block->page, time_ms);
03301     buf_pool_mutex_exit(buf_pool);
03302   }
03303 
03304   ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
03305 
03306   if (rw_latch == RW_S_LATCH) {
03307     success = rw_lock_s_lock_nowait(&(block->lock),
03308             file, line);
03309     fix_type = MTR_MEMO_PAGE_S_FIX;
03310   } else {
03311     success = rw_lock_x_lock_func_nowait(&(block->lock),
03312                  file, line);
03313     fix_type = MTR_MEMO_PAGE_X_FIX;
03314   }
03315 
03316   if (!success) {
03317     mutex_enter(&block->mutex);
03318     buf_block_buf_fix_dec(block);
03319     mutex_exit(&block->mutex);
03320 
03321     return(FALSE);
03322   }
03323 
03324   mtr_memo_push(mtr, block, fix_type);
03325 
03326 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03327   ut_a(++buf_dbg_counter % 5771 || buf_validate());
03328   ut_a(block->page.buf_fix_count > 0);
03329   ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
03330 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03331 #ifdef UNIV_DEBUG_FILE_ACCESSES
03332   ut_a(block->page.file_page_was_freed == FALSE);
03333 #endif
03334 
03335 #ifdef UNIV_IBUF_COUNT_DEBUG
03336   ut_a((mode == BUF_KEEP_OLD)
03337        || (ibuf_count_get(buf_block_get_space(block),
03338         buf_block_get_page_no(block)) == 0));
03339 #endif
03340   buf_pool->stat.n_page_gets++;
03341 
03342   return(TRUE);
03343 }
03344 
03345 /*******************************************************************/
03350 UNIV_INTERN
03351 const buf_block_t*
03352 buf_page_try_get_func(
03353 /*==================*/
03354   ulint   space_id,
03355   ulint   page_no,
03356   const char* file, 
03357   ulint   line, 
03358   mtr_t*    mtr)  
03359 {
03360   buf_block_t*  block;
03361   ibool   success;
03362   ulint   fix_type;
03363   buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
03364 
03365   ut_ad(mtr);
03366   ut_ad(mtr->state == MTR_ACTIVE);
03367 
03368   buf_pool_mutex_enter(buf_pool);
03369   block = buf_block_hash_get(buf_pool, space_id, page_no);
03370 
03371   if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
03372     buf_pool_mutex_exit(buf_pool);
03373     return(NULL);
03374   }
03375 
03376   ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
03377 
03378   mutex_enter(&block->mutex);
03379   buf_pool_mutex_exit(buf_pool);
03380 
03381 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03382   ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
03383   ut_a(buf_block_get_space(block) == space_id);
03384   ut_a(buf_block_get_page_no(block) == page_no);
03385 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03386 
03387   buf_block_buf_fix_inc(block, file, line);
03388   mutex_exit(&block->mutex);
03389 
03390   fix_type = MTR_MEMO_PAGE_S_FIX;
03391   success = rw_lock_s_lock_nowait(&block->lock, file, line);
03392 
03393   if (!success) {
03394     /* Let us try to get an X-latch. If the current thread
03395     is holding an X-latch on the page, we cannot get an
03396     S-latch. */
03397 
03398     fix_type = MTR_MEMO_PAGE_X_FIX;
03399     success = rw_lock_x_lock_func_nowait(&block->lock,
03400                  file, line);
03401   }
03402 
03403   if (!success) {
03404     mutex_enter(&block->mutex);
03405     buf_block_buf_fix_dec(block);
03406     mutex_exit(&block->mutex);
03407 
03408     return(NULL);
03409   }
03410 
03411   mtr_memo_push(mtr, block, fix_type);
03412 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03413   ut_a(++buf_dbg_counter % 5771 || buf_validate());
03414   ut_a(block->page.buf_fix_count > 0);
03415   ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
03416 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03417 #ifdef UNIV_DEBUG_FILE_ACCESSES
03418   ut_a(block->page.file_page_was_freed == FALSE);
03419 #endif /* UNIV_DEBUG_FILE_ACCESSES */
03420   buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
03421 
03422   buf_pool->stat.n_page_gets++;
03423 
03424 #ifdef UNIV_IBUF_COUNT_DEBUG
03425   ut_a(ibuf_count_get(buf_block_get_space(block),
03426           buf_block_get_page_no(block)) == 0);
03427 #endif
03428 
03429   return(block);
03430 }
03431 
03432 /********************************************************************/
03434 UNIV_INLINE
03435 void
03436 buf_page_init_low(
03437 /*==============*/
03438   buf_page_t* bpage)  
03439 {
03440   bpage->flush_type = BUF_FLUSH_LRU;
03441   bpage->io_fix = BUF_IO_NONE;
03442   bpage->buf_fix_count = 0;
03443   bpage->freed_page_clock = 0;
03444   bpage->access_time = 0;
03445   bpage->newest_modification = 0;
03446   bpage->oldest_modification = 0;
03447   HASH_INVALIDATE(bpage, hash);
03448 #ifdef UNIV_DEBUG_FILE_ACCESSES
03449   bpage->file_page_was_freed = FALSE;
03450 #endif /* UNIV_DEBUG_FILE_ACCESSES */
03451 }
03452 
03453 /********************************************************************/
03455 static
03456 void
03457 buf_page_init(
03458 /*==========*/
03459   ulint   space,  
03460   ulint   offset, 
03462   ulint   fold, 
03463   buf_block_t*  block)  
03464 {
03465   buf_page_t* hash_page;
03466   buf_pool_t* buf_pool = buf_pool_get(space, offset);
03467 
03468   ut_ad(buf_pool_mutex_own(buf_pool));
03469   ut_ad(mutex_own(&(block->mutex)));
03470   ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
03471 
03472   /* Set the state of the block */
03473   buf_block_set_file_page(block, space, offset);
03474 
03475 #ifdef UNIV_DEBUG_VALGRIND
03476   if (!space) {
03477     /* Silence valid Valgrind warnings about uninitialized
03478     data being written to data files.  There are some unused
03479     bytes on some pages that InnoDB does not initialize. */
03480     UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
03481   }
03482 #endif /* UNIV_DEBUG_VALGRIND */
03483 
03484   buf_block_init_low(block);
03485 
03486   block->lock_hash_val = lock_rec_hash(space, offset);
03487 
03488   buf_page_init_low(&block->page);
03489 
03490   /* Insert into the hash table of file pages */
03491 
03492   hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
03493 
03494   if (UNIV_LIKELY(!hash_page)) {
03495   } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
03496     /* Preserve the reference count. */
03497     ulint buf_fix_count = hash_page->buf_fix_count;
03498 
03499     ut_a(buf_fix_count > 0);
03500     block->page.buf_fix_count += buf_fix_count;
03501     buf_pool_watch_remove(buf_pool, fold, hash_page);
03502   } else {
03503     fprintf(stderr,
03504       "InnoDB: Error: page %lu %lu already found"
03505       " in the hash table: %p, %p\n",
03506       (ulong) space,
03507       (ulong) offset,
03508       (const void*) hash_page, (const void*) block);
03509 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03510     mutex_exit(&block->mutex);
03511     buf_pool_mutex_exit(buf_pool);
03512     buf_print();
03513     buf_LRU_print();
03514     buf_validate();
03515     buf_LRU_validate();
03516 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03517     ut_error;
03518   }
03519 
03520   ut_ad(!block->page.in_zip_hash);
03521   ut_ad(!block->page.in_page_hash);
03522   ut_d(block->page.in_page_hash = TRUE);
03523   HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
03524         fold, &block->page);
03525 }
03526 
03527 /********************************************************************/
03537 UNIV_INTERN
03538 buf_page_t*
03539 buf_page_init_for_read(
03540 /*===================*/
03541   ulint*    err,  
03542   ulint   mode, 
03543   ulint   space,  
03544   ulint   zip_size,
03545   ibool   unzip,  
03546   ib_int64_t  tablespace_version,
03550   ulint   offset) 
03551 {
03552   buf_block_t*  block;
03553   buf_page_t* bpage = NULL;
03554   buf_page_t* watch_page;
03555   mtr_t   mtr;
03556   ulint   fold;
03557   ibool   lru = FALSE;
03558   void*   data;
03559   buf_pool_t* buf_pool = buf_pool_get(space, offset);
03560 
03561   ut_ad(buf_pool);
03562 
03563   *err = DB_SUCCESS;
03564 
03565   if (mode == BUF_READ_IBUF_PAGES_ONLY) {
03566     /* It is a read-ahead within an ibuf routine */
03567 
03568     ut_ad(!ibuf_bitmap_page(zip_size, offset));
03569     ut_ad(ibuf_inside());
03570 
03571     mtr_start(&mtr);
03572 
03573     if (!recv_no_ibuf_operations
03574         && !ibuf_page(space, zip_size, offset, &mtr)) {
03575 
03576       mtr_commit(&mtr);
03577 
03578       return(NULL);
03579     }
03580   } else {
03581     ut_ad(mode == BUF_READ_ANY_PAGE);
03582   }
03583 
03584   if (zip_size && UNIV_LIKELY(!unzip)
03585       && UNIV_LIKELY(!recv_recovery_is_on())) {
03586     block = NULL;
03587   } else {
03588     block = buf_LRU_get_free_block(buf_pool, 0);
03589     ut_ad(block);
03590     ut_ad(buf_pool_from_block(block) == buf_pool);
03591   }
03592 
03593   fold = buf_page_address_fold(space, offset);
03594 
03595   buf_pool_mutex_enter(buf_pool);
03596 
03597   watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
03598   if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
03599     /* The page is already in the buffer pool. */
03600     watch_page = NULL;
03601 err_exit:
03602     if (block) {
03603       mutex_enter(&block->mutex);
03604       buf_LRU_block_free_non_file_page(block);
03605       mutex_exit(&block->mutex);
03606     }
03607 
03608     bpage = NULL;
03609     goto func_exit;
03610   }
03611 
03612   if (fil_tablespace_deleted_or_being_deleted_in_mem(
03613         space, tablespace_version)) {
03614     /* The page belongs to a space which has been
03615     deleted or is being deleted. */
03616     *err = DB_TABLESPACE_DELETED;
03617 
03618     goto err_exit;
03619   }
03620 
03621   if (block) {
03622     bpage = &block->page;
03623     mutex_enter(&block->mutex);
03624 
03625     ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
03626 
03627     buf_page_init(space, offset, fold, block);
03628 
03629     /* The block must be put to the LRU list, to the old blocks */
03630     buf_LRU_add_block(bpage, TRUE/* to old blocks */);
03631 
03632     /* We set a pass-type x-lock on the frame because then
03633     the same thread which called for the read operation
03634     (and is running now at this point of code) can wait
03635     for the read to complete by waiting for the x-lock on
03636     the frame; if the x-lock were recursive, the same
03637     thread would illegally get the x-lock before the page
03638     read is completed.  The x-lock is cleared by the
03639     io-handler thread. */
03640 
03641     rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
03642     buf_page_set_io_fix(bpage, BUF_IO_READ);
03643 
03644     if (UNIV_UNLIKELY(zip_size)) {
03645       page_zip_set_size(&block->page.zip, zip_size);
03646 
03647       /* buf_pool->mutex may be released and
03648       reacquired by buf_buddy_alloc().  Thus, we
03649       must release block->mutex in order not to
03650       break the latching order in the reacquisition
03651       of buf_pool->mutex.  We also must defer this
03652       operation until after the block descriptor has
03653       been added to buf_pool->LRU and
03654       buf_pool->page_hash. */
03655       mutex_exit(&block->mutex);
03656       data = buf_buddy_alloc(buf_pool, zip_size, &lru);
03657       mutex_enter(&block->mutex);
03658       block->page.zip.data = static_cast<unsigned char *>(data);
03659 
03660       /* To maintain the invariant
03661       block->in_unzip_LRU_list
03662       == buf_page_belongs_to_unzip_LRU(&block->page)
03663       we have to add this block to unzip_LRU
03664       after block->page.zip.data is set. */
03665       ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
03666       buf_unzip_LRU_add_block(block, TRUE);
03667     }
03668 
03669     mutex_exit(&block->mutex);
03670   } else {
03671     /* Defer buf_buddy_alloc() until after the block has
03672     been found not to exist.  The buf_buddy_alloc() and
03673     buf_buddy_free() calls may be expensive because of
03674     buf_buddy_relocate(). */
03675 
03676     /* The compressed page must be allocated before the
03677     control block (bpage), in order to avoid the
03678     invocation of buf_buddy_relocate_block() on
03679     uninitialized data. */
03680     data = buf_buddy_alloc(buf_pool, zip_size, &lru);
03681     bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
03682 
03683     /* Initialize the buf_pool pointer. */
03684     bpage->buf_pool_index = buf_pool_index(buf_pool);
03685 
03686     /* If buf_buddy_alloc() allocated storage from the LRU list,
03687     it released and reacquired buf_pool->mutex.  Thus, we must
03688     check the page_hash again, as it may have been modified. */
03689     if (UNIV_UNLIKELY(lru)) {
03690 
03691       watch_page = buf_page_hash_get_low(
03692         buf_pool, space, offset, fold);
03693 
03694       if (watch_page
03695           && !buf_pool_watch_is_sentinel(buf_pool,
03696                    watch_page)) {
03697 
03698         /* The block was added by some other thread. */
03699         watch_page = NULL;
03700         buf_buddy_free(buf_pool, bpage, sizeof *bpage);
03701         buf_buddy_free(buf_pool, data, zip_size);
03702 
03703         bpage = NULL;
03704         goto func_exit;
03705       }
03706     }
03707 
03708     page_zip_des_init(&bpage->zip);
03709     page_zip_set_size(&bpage->zip, zip_size);
03710     bpage->zip.data = static_cast<unsigned char *>(data);
03711 
03712     mutex_enter(&buf_pool->zip_mutex);
03713     UNIV_MEM_DESC(bpage->zip.data,
03714             page_zip_get_size(&bpage->zip), bpage);
03715 
03716     buf_page_init_low(bpage);
03717 
03718     bpage->state  = BUF_BLOCK_ZIP_PAGE;
03719     bpage->space  = space;
03720     bpage->offset = offset;
03721 
03722 
03723 #ifdef UNIV_DEBUG
03724     bpage->in_page_hash = FALSE;
03725     bpage->in_zip_hash = FALSE;
03726     bpage->in_flush_list = FALSE;
03727     bpage->in_free_list = FALSE;
03728     bpage->in_LRU_list = FALSE;
03729 #endif /* UNIV_DEBUG */
03730 
03731     ut_d(bpage->in_page_hash = TRUE);
03732 
03733     if (UNIV_LIKELY_NULL(watch_page)) {
03734       /* Preserve the reference count. */
03735       ulint buf_fix_count = watch_page->buf_fix_count;
03736       ut_a(buf_fix_count > 0);
03737       bpage->buf_fix_count += buf_fix_count;
03738       ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
03739       buf_pool_watch_remove(buf_pool, fold, watch_page);
03740     }
03741 
03742     HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
03743           bpage);
03744 
03745     /* The block must be put to the LRU list, to the old blocks */
03746     buf_LRU_add_block(bpage, TRUE/* to old blocks */);
03747     buf_LRU_insert_zip_clean(bpage);
03748 
03749     buf_page_set_io_fix(bpage, BUF_IO_READ);
03750 
03751     mutex_exit(&buf_pool->zip_mutex);
03752   }
03753 
03754   buf_pool->n_pend_reads++;
03755 func_exit:
03756   buf_pool_mutex_exit(buf_pool);
03757 
03758   if (mode == BUF_READ_IBUF_PAGES_ONLY) {
03759 
03760     mtr_commit(&mtr);
03761   }
03762 
03763   ut_ad(!bpage || buf_page_in_file(bpage));
03764   return(bpage);
03765 }
03766 
03767 /********************************************************************/
03773 UNIV_INTERN
03774 buf_block_t*
03775 buf_page_create(
03776 /*============*/
03777   ulint space,  
03778   ulint offset, 
03780   ulint zip_size,
03781   mtr_t*  mtr)  
03782 {
03783   buf_frame_t*  frame;
03784   buf_block_t*  block;
03785   ulint   fold;
03786   buf_block_t*  free_block  = NULL;
03787   ulint   time_ms   = ut_time_ms();
03788   buf_pool_t* buf_pool  = buf_pool_get(space, offset);
03789 
03790   ut_ad(mtr);
03791   ut_ad(mtr->state == MTR_ACTIVE);
03792   ut_ad(space || !zip_size);
03793 
03794   free_block = buf_LRU_get_free_block(buf_pool, 0);
03795 
03796   fold = buf_page_address_fold(space, offset);
03797 
03798   buf_pool_mutex_enter(buf_pool);
03799 
03800   block = (buf_block_t*) buf_page_hash_get_low(
03801     buf_pool, space, offset, fold);
03802 
03803   if (block
03804       && buf_page_in_file(&block->page)
03805       && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
03806 #ifdef UNIV_IBUF_COUNT_DEBUG
03807     ut_a(ibuf_count_get(space, offset) == 0);
03808 #endif
03809 #ifdef UNIV_DEBUG_FILE_ACCESSES
03810     block->page.file_page_was_freed = FALSE;
03811 #endif /* UNIV_DEBUG_FILE_ACCESSES */
03812 
03813     /* Page can be found in buf_pool */
03814     buf_pool_mutex_exit(buf_pool);
03815 
03816     buf_block_free(free_block);
03817 
03818     return(buf_page_get_with_no_latch(space, zip_size,
03819               offset, mtr));
03820   }
03821 
03822   /* If we get here, the page was not in buf_pool: init it there */
03823 
03824 #ifdef UNIV_DEBUG
03825   if (buf_debug_prints) {
03826     fprintf(stderr, "Creating space %lu page %lu to buffer\n",
03827       (ulong) space, (ulong) offset);
03828   }
03829 #endif /* UNIV_DEBUG */
03830 
03831   block = free_block;
03832 
03833   mutex_enter(&block->mutex);
03834 
03835   buf_page_init(space, offset, fold, block);
03836 
03837   /* The block must be put to the LRU list */
03838   buf_LRU_add_block(&block->page, FALSE);
03839 
03840   buf_block_buf_fix_inc(block, __FILE__, __LINE__);
03841   buf_pool->stat.n_pages_created++;
03842 
03843   if (zip_size) {
03844     void* data;
03845     ibool lru;
03846 
03847     /* Prevent race conditions during buf_buddy_alloc(),
03848     which may release and reacquire buf_pool->mutex,
03849     by IO-fixing and X-latching the block. */
03850 
03851     buf_page_set_io_fix(&block->page, BUF_IO_READ);
03852     rw_lock_x_lock(&block->lock);
03853 
03854     page_zip_set_size(&block->page.zip, zip_size);
03855     mutex_exit(&block->mutex);
03856     /* buf_pool->mutex may be released and reacquired by
03857     buf_buddy_alloc().  Thus, we must release block->mutex
03858     in order not to break the latching order in
03859     the reacquisition of buf_pool->mutex.  We also must
03860     defer this operation until after the block descriptor
03861     has been added to buf_pool->LRU and buf_pool->page_hash. */
03862     data = buf_buddy_alloc(buf_pool, zip_size, &lru);
03863     mutex_enter(&block->mutex);
03864     block->page.zip.data = static_cast<unsigned char *>(data);
03865 
03866     /* To maintain the invariant
03867     block->in_unzip_LRU_list
03868     == buf_page_belongs_to_unzip_LRU(&block->page)
03869     we have to add this block to unzip_LRU after
03870     block->page.zip.data is set. */
03871     ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
03872     buf_unzip_LRU_add_block(block, FALSE);
03873 
03874     buf_page_set_io_fix(&block->page, BUF_IO_NONE);
03875     rw_lock_x_unlock(&block->lock);
03876   }
03877 
03878   buf_page_set_accessed(&block->page, time_ms);
03879 
03880   buf_pool_mutex_exit(buf_pool);
03881 
03882   mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
03883 
03884   mutex_exit(&block->mutex);
03885 
03886   /* Delete possible entries for the page from the insert buffer:
03887   such can exist if the page belonged to an index which was dropped */
03888 
03889   ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
03890 
03891   /* Flush pages from the end of the LRU list if necessary */
03892   buf_flush_free_margin(buf_pool);
03893 
03894   frame = block->frame;
03895 
03896   memset(frame + FIL_PAGE_PREV, 0xff, 4);
03897   memset(frame + FIL_PAGE_NEXT, 0xff, 4);
03898   mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
03899 
03900   /* Reset to zero the file flush lsn field in the page; if the first
03901   page of an ibdata file is 'created' in this function into the buffer
03902   pool then we lose the original contents of the file flush lsn stamp.
03903   Then InnoDB could in a crash recovery print a big, false, corruption
03904   warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
03905 
03906   memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
03907 
03908 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
03909   ut_a(++buf_dbg_counter % 357 || buf_validate());
03910 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
03911 #ifdef UNIV_IBUF_COUNT_DEBUG
03912   ut_a(ibuf_count_get(buf_block_get_space(block),
03913           buf_block_get_page_no(block)) == 0);
03914 #endif
03915   return(block);
03916 }
03917 
03918 /********************************************************************/
03921 UNIV_INTERN
03922 void
03923 buf_page_io_complete(
03924 /*=================*/
03925   buf_page_t* bpage)  
03926 {
03927   enum buf_io_fix io_type;
03928   buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
03929   const ibool uncompressed = (buf_page_get_state(bpage)
03930           == BUF_BLOCK_FILE_PAGE);
03931 
03932   ut_a(buf_page_in_file(bpage));
03933 
03934   /* We do not need protect io_fix here by mutex to read
03935   it because this is the only function where we can change the value
03936   from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
03937   ensures that this is the only thread that handles the i/o for this
03938   block. */
03939 
03940   io_type = buf_page_get_io_fix(bpage);
03941   ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
03942 
03943   if (io_type == BUF_IO_READ) {
03944     ulint read_page_no;
03945     ulint read_space_id;
03946     byte* frame;
03947 
03948     if (buf_page_get_zip_size(bpage)) {
03949       frame = bpage->zip.data;
03950       buf_pool->n_pend_unzip++;
03951       if (uncompressed
03952           && !buf_zip_decompress((buf_block_t*) bpage,
03953                FALSE)) {
03954 
03955         buf_pool->n_pend_unzip--;
03956         goto corrupt;
03957       }
03958       buf_pool->n_pend_unzip--;
03959     } else {
03960       ut_a(uncompressed);
03961       frame = ((buf_block_t*) bpage)->frame;
03962     }
03963 
03964     /* If this page is not uninitialized and not in the
03965     doublewrite buffer, then the page number and space id
03966     should be the same as in block. */
03967     read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
03968     read_space_id = mach_read_from_4(
03969       frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
03970 
03971     if (bpage->space == TRX_SYS_SPACE
03972         && trx_doublewrite_page_inside(bpage->offset)) {
03973 
03974       ut_print_timestamp(stderr);
03975       fprintf(stderr,
03976         "  InnoDB: Error: reading page %lu\n"
03977         "InnoDB: which is in the"
03978         " doublewrite buffer!\n",
03979         (ulong) bpage->offset);
03980     } else if (!read_space_id && !read_page_no) {
03981       /* This is likely an uninitialized page. */
03982     } else if ((bpage->space
03983           && bpage->space != read_space_id)
03984          || bpage->offset != read_page_no) {
03985       /* We did not compare space_id to read_space_id
03986       if bpage->space == 0, because the field on the
03987       page may contain garbage in MySQL < 4.1.1,
03988       which only supported bpage->space == 0. */
03989 
03990       ut_print_timestamp(stderr);
03991       fprintf(stderr,
03992         "  InnoDB: Error: space id and page n:o"
03993         " stored in the page\n"
03994         "InnoDB: read in are %lu:%lu,"
03995         " should be %lu:%lu!\n",
03996         (ulong) read_space_id, (ulong) read_page_no,
03997         (ulong) bpage->space,
03998         (ulong) bpage->offset);
03999     }
04000 
04001     /* From version 3.23.38 up we store the page checksum
04002     to the 4 first bytes of the page end lsn field */
04003 
04004     if (buf_page_is_corrupted(frame,
04005             buf_page_get_zip_size(bpage))) {
04006 corrupt:
04007       fprintf(stderr,
04008         "InnoDB: Database page corruption on disk"
04009         " or a failed\n"
04010         "InnoDB: file read of page %lu.\n"
04011         "InnoDB: You may have to recover"
04012         " from a backup.\n",
04013         (ulong) bpage->offset);
04014       buf_page_print(frame, buf_page_get_zip_size(bpage));
04015       fprintf(stderr,
04016         "InnoDB: Database page corruption on disk"
04017         " or a failed\n"
04018         "InnoDB: file read of page %lu.\n"
04019         "InnoDB: You may have to recover"
04020         " from a backup.\n",
04021         (ulong) bpage->offset);
04022       fputs("InnoDB: It is also possible that"
04023             " your operating\n"
04024             "InnoDB: system has corrupted its"
04025             " own file cache\n"
04026             "InnoDB: and rebooting your computer"
04027             " removes the\n"
04028             "InnoDB: error.\n"
04029             "InnoDB: If the corrupt page is an index page\n"
04030             "InnoDB: you can also try to"
04031             " fix the corruption\n"
04032             "InnoDB: by dumping, dropping,"
04033             " and reimporting\n"
04034             "InnoDB: the corrupt table."
04035             " You can use CHECK\n"
04036             "InnoDB: TABLE to scan your"
04037             " table for corruption.\n"
04038             "InnoDB: See also "
04039             REFMAN "forcing-recovery.html\n"
04040             "InnoDB: about forcing recovery.\n", stderr);
04041 
04042       if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
04043         fputs("InnoDB: Ending processing because of"
04044               " a corrupt database page.\n",
04045               stderr);
04046         exit(1);
04047       }
04048     }
04049 
04050     if (recv_recovery_is_on()) {
04051       /* Pages must be uncompressed for crash recovery. */
04052       ut_a(uncompressed);
04053       recv_recover_page(TRUE, (buf_block_t*) bpage);
04054     }
04055 
04056     if (uncompressed && !recv_no_ibuf_operations) {
04057       ibuf_merge_or_delete_for_page(
04058         (buf_block_t*) bpage, bpage->space,
04059         bpage->offset, buf_page_get_zip_size(bpage),
04060         TRUE);
04061     }
04062   }
04063 
04064   buf_pool_mutex_enter(buf_pool);
04065   mutex_enter(buf_page_get_mutex(bpage));
04066 
04067 #ifdef UNIV_IBUF_COUNT_DEBUG
04068   if (io_type == BUF_IO_WRITE || uncompressed) {
04069     /* For BUF_IO_READ of compressed-only blocks, the
04070     buffered operations will be merged by buf_page_get_gen()
04071     after the block has been uncompressed. */
04072     ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
04073   }
04074 #endif
04075   /* Because this thread which does the unlocking is not the same that
04076   did the locking, we use a pass value != 0 in unlock, which simply
04077   removes the newest lock debug record, without checking the thread
04078   id. */
04079 
04080   buf_page_set_io_fix(bpage, BUF_IO_NONE);
04081 
04082   switch (io_type) {
04083   case BUF_IO_READ:
04084     /* NOTE that the call to ibuf may have moved the ownership of
04085     the x-latch to this OS thread: do not let this confuse you in
04086     debugging! */
04087 
04088     ut_ad(buf_pool->n_pend_reads > 0);
04089     buf_pool->n_pend_reads--;
04090     buf_pool->stat.n_pages_read++;
04091 
04092     if (uncompressed) {
04093       rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
04094                BUF_IO_READ);
04095     }
04096 
04097     break;
04098 
04099   case BUF_IO_WRITE:
04100     /* Write means a flush operation: call the completion
04101     routine in the flush system */
04102 
04103     buf_flush_write_complete(bpage);
04104 
04105     if (uncompressed) {
04106       rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
04107                BUF_IO_WRITE);
04108     }
04109 
04110     buf_pool->stat.n_pages_written++;
04111 
04112     break;
04113 
04114   default:
04115     ut_error;
04116   }
04117 
04118 #ifdef UNIV_DEBUG
04119   if (buf_debug_prints) {
04120     fprintf(stderr, "Has %s page space %lu page no %lu\n",
04121       io_type == BUF_IO_READ ? "read" : "written",
04122       (ulong) buf_page_get_space(bpage),
04123       (ulong) buf_page_get_page_no(bpage));
04124   }
04125 #endif /* UNIV_DEBUG */
04126 
04127   mutex_exit(buf_page_get_mutex(bpage));
04128   buf_pool_mutex_exit(buf_pool);
04129 }
04130 
04131 /*********************************************************************/
04134 static
04135 ibool
04136 buf_all_freed_instance(
04137 /*===================*/
04138   buf_pool_t* buf_pool) 
04139 {
04140   ulint   i;
04141   buf_chunk_t*  chunk;
04142 
04143   ut_ad(buf_pool);
04144 
04145   buf_pool_mutex_enter(buf_pool);
04146 
04147   chunk = buf_pool->chunks;
04148 
04149   for (i = buf_pool->n_chunks; i--; chunk++) {
04150 
04151     const buf_block_t* block = buf_chunk_not_freed(chunk);
04152 
04153     if (UNIV_LIKELY_NULL(block)) {
04154       fprintf(stderr,
04155         "Page %lu %lu still fixed or dirty\n",
04156         (ulong) block->page.space,
04157         (ulong) block->page.offset);
04158       ut_error;
04159     }
04160   }
04161 
04162   buf_pool_mutex_exit(buf_pool);
04163 
04164   return(TRUE);
04165 }
04166 
04167 /*********************************************************************/
04169 static
04170 void
04171 buf_pool_invalidate_instance(
04172 /*=========================*/
04173   buf_pool_t* buf_pool) 
04174 {
04175   ibool   freed;
04176   int i;
04177 
04178   buf_pool_mutex_enter(buf_pool);
04179 
04180   for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
04181 
04182     /* As this function is called during startup and
04183     during redo application phase during recovery, InnoDB
04184     is single threaded (apart from IO helper threads) at
04185     this stage. No new write batch can be in intialization
04186     stage at this point. */
04187     ut_ad(buf_pool->init_flush[i] == FALSE);
04188 
04189     /* However, it is possible that a write batch that has
04190     been posted earlier is still not complete. For buffer
04191     pool invalidation to proceed we must ensure there is NO
04192     write activity happening. */
04193     if (buf_pool->n_flush[i] > 0) {
04194       buf_pool_mutex_exit(buf_pool);
04195       buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
04196       buf_pool_mutex_enter(buf_pool);
04197     }
04198   }
04199 
04200   buf_pool_mutex_exit(buf_pool);
04201 
04202   ut_ad(buf_all_freed_instance(buf_pool));
04203 
04204   freed = TRUE;
04205 
04206   while (freed) {
04207     freed = buf_LRU_search_and_free_block(buf_pool, 100);
04208   }
04209 
04210   buf_pool_mutex_enter(buf_pool);
04211 
04212   ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
04213   ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
04214 
04215   buf_pool->freed_page_clock = 0;
04216   buf_pool->LRU_old = NULL;
04217   buf_pool->LRU_old_len = 0;
04218   buf_pool->LRU_flush_ended = 0;
04219 
04220   memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
04221   buf_refresh_io_stats(buf_pool);
04222 
04223   buf_pool_mutex_exit(buf_pool);
04224 }
04225 
04226 /*********************************************************************/
04230 UNIV_INTERN
04231 void
04232 buf_pool_invalidate(void)
04233 /*=====================*/
04234 {
04235   ulint   i;
04236 
04237   for (i = 0; i < srv_buf_pool_instances; i++) {
04238     buf_pool_invalidate_instance(buf_pool_from_array(i));
04239   }
04240 }
04241 
04242 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
04243 /*********************************************************************/
04246 static
04247 ibool
04248 buf_pool_validate_instance(
04249 /*=======================*/
04250   buf_pool_t* buf_pool) 
04251 {
04252   buf_page_t* b;
04253   buf_chunk_t*  chunk;
04254   ulint   i;
04255   ulint   n_single_flush  = 0;
04256   ulint   n_lru_flush = 0;
04257   ulint   n_list_flush  = 0;
04258   ulint   n_lru   = 0;
04259   ulint   n_flush   = 0;
04260   ulint   n_free    = 0;
04261   ulint   n_zip   = 0;
04262 
04263   ut_ad(buf_pool);
04264 
04265   buf_pool_mutex_enter(buf_pool);
04266 
04267   chunk = buf_pool->chunks;
04268 
04269   /* Check the uncompressed blocks. */
04270 
04271   for (i = buf_pool->n_chunks; i--; chunk++) {
04272 
04273     ulint   j;
04274     buf_block_t*  block = chunk->blocks;
04275 
04276     for (j = chunk->size; j--; block++) {
04277 
04278       mutex_enter(&block->mutex);
04279 
04280       switch (buf_block_get_state(block)) {
04281       case BUF_BLOCK_ZIP_FREE:
04282       case BUF_BLOCK_ZIP_PAGE:
04283       case BUF_BLOCK_ZIP_DIRTY:
04284         /* These should only occur on
04285         zip_clean, zip_free[], or flush_list. */
04286         ut_error;
04287         break;
04288 
04289       case BUF_BLOCK_FILE_PAGE:
04290         ut_a(buf_page_hash_get(buf_pool,
04291                    buf_block_get_space(
04292                      block),
04293                    buf_block_get_page_no(
04294                      block))
04295              == &block->page);
04296 
04297 #ifdef UNIV_IBUF_COUNT_DEBUG
04298         ut_a(buf_page_get_io_fix(&block->page)
04299              == BUF_IO_READ
04300              || !ibuf_count_get(buf_block_get_space(
04301                 block),
04302               buf_block_get_page_no(
04303                 block)));
04304 #endif
04305         switch (buf_page_get_io_fix(&block->page)) {
04306         case BUF_IO_NONE:
04307           break;
04308 
04309         case BUF_IO_WRITE:
04310           switch (buf_page_get_flush_type(
04311               &block->page)) {
04312           case BUF_FLUSH_LRU:
04313             n_lru_flush++;
04314             ut_a(rw_lock_is_locked(
04315                    &block->lock,
04316                    RW_LOCK_SHARED));
04317             break;
04318           case BUF_FLUSH_LIST:
04319             n_list_flush++;
04320             break;
04321           case BUF_FLUSH_SINGLE_PAGE:
04322             n_single_flush++;
04323             break;
04324           default:
04325             ut_error;
04326           }
04327 
04328           break;
04329 
04330         case BUF_IO_READ:
04331 
04332           ut_a(rw_lock_is_locked(&block->lock,
04333                      RW_LOCK_EX));
04334           break;
04335         }
04336 
04337         n_lru++;
04338         break;
04339 
04340       case BUF_BLOCK_NOT_USED:
04341         n_free++;
04342         break;
04343 
04344       case BUF_BLOCK_READY_FOR_USE:
04345       case BUF_BLOCK_MEMORY:
04346       case BUF_BLOCK_REMOVE_HASH:
04347         /* do nothing */
04348         break;
04349       }
04350 
04351       mutex_exit(&block->mutex);
04352     }
04353   }
04354 
04355   mutex_enter(&buf_pool->zip_mutex);
04356 
04357   /* Check clean compressed-only blocks. */
04358 
04359   for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
04360        b = UT_LIST_GET_NEXT(list, b)) {
04361     ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
04362     switch (buf_page_get_io_fix(b)) {
04363     case BUF_IO_NONE:
04364       /* All clean blocks should be I/O-unfixed. */
04365       break;
04366     case BUF_IO_READ:
04367       /* In buf_LRU_free_block(), we temporarily set
04368       b->io_fix = BUF_IO_READ for a newly allocated
04369       control block in order to prevent
04370       buf_page_get_gen() from decompressing the block. */
04371       break;
04372     default:
04373       ut_error;
04374       break;
04375     }
04376 
04377     /* It is OK to read oldest_modification here because
04378     we have acquired buf_pool->zip_mutex above which acts
04379     as the 'block->mutex' for these bpages. */
04380     ut_a(!b->oldest_modification);
04381     ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
04382 
04383     n_lru++;
04384     n_zip++;
04385   }
04386 
04387   /* Check dirty blocks. */
04388 
04389   buf_flush_list_mutex_enter(buf_pool);
04390   for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
04391        b = UT_LIST_GET_NEXT(list, b)) {
04392     ut_ad(b->in_flush_list);
04393     ut_a(b->oldest_modification);
04394     n_flush++;
04395 
04396     switch (buf_page_get_state(b)) {
04397     case BUF_BLOCK_ZIP_DIRTY:
04398       n_lru++;
04399       n_zip++;
04400       switch (buf_page_get_io_fix(b)) {
04401       case BUF_IO_NONE:
04402       case BUF_IO_READ:
04403         break;
04404       case BUF_IO_WRITE:
04405         switch (buf_page_get_flush_type(b)) {
04406         case BUF_FLUSH_LRU:
04407           n_lru_flush++;
04408           break;
04409         case BUF_FLUSH_LIST:
04410           n_list_flush++;
04411           break;
04412         case BUF_FLUSH_SINGLE_PAGE:
04413           n_single_flush++;
04414           break;
04415         default:
04416           ut_error;
04417         }
04418         break;
04419       }
04420       break;
04421     case BUF_BLOCK_FILE_PAGE:
04422       /* uncompressed page */
04423       break;
04424     case BUF_BLOCK_ZIP_FREE:
04425     case BUF_BLOCK_ZIP_PAGE:
04426     case BUF_BLOCK_NOT_USED:
04427     case BUF_BLOCK_READY_FOR_USE:
04428     case BUF_BLOCK_MEMORY:
04429     case BUF_BLOCK_REMOVE_HASH:
04430       ut_error;
04431       break;
04432     }
04433     ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
04434   }
04435 
04436   ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
04437 
04438   buf_flush_list_mutex_exit(buf_pool);
04439 
04440   mutex_exit(&buf_pool->zip_mutex);
04441 
04442   if (n_lru + n_free > buf_pool->curr_size + n_zip) {
04443     fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
04444       (ulong) n_lru, (ulong) n_free,
04445       (ulong) buf_pool->curr_size, (ulong) n_zip);
04446     ut_error;
04447   }
04448 
04449   ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
04450   if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
04451     fprintf(stderr, "Free list len %lu, free blocks %lu\n",
04452       (ulong) UT_LIST_GET_LEN(buf_pool->free),
04453       (ulong) n_free);
04454     ut_error;
04455   }
04456 
04457   ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
04458   ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
04459   ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
04460 
04461   buf_pool_mutex_exit(buf_pool);
04462 
04463   ut_a(buf_LRU_validate());
04464   ut_a(buf_flush_validate(buf_pool));
04465 
04466   return(TRUE);
04467 }
04468 
04469 /*********************************************************************/
04472 UNIV_INTERN
04473 ibool
04474 buf_validate(void)
04475 /*==============*/
04476 {
04477   ulint i;
04478 
04479   for (i = 0; i < srv_buf_pool_instances; i++) {
04480     buf_pool_t* buf_pool;
04481 
04482     buf_pool = buf_pool_from_array(i);
04483 
04484     buf_pool_validate_instance(buf_pool);
04485   }
04486   return(TRUE);
04487 }
04488 
04489 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
04490 
04491 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
04492 /*********************************************************************/
04494 static
04495 void
04496 buf_print_instance(
04497 /*===============*/
04498   buf_pool_t* buf_pool)
04499 {
04500   index_id_t* index_ids;
04501   ulint*    counts;
04502   ulint   size;
04503   ulint   i;
04504   ulint   j;
04505   index_id_t  id;
04506   ulint   n_found;
04507   buf_chunk_t*  chunk;
04508   dict_index_t* index;
04509 
04510   ut_ad(buf_pool);
04511 
04512   size = buf_pool->curr_size;
04513 
04514   index_ids = mem_alloc(size * sizeof *index_ids);
04515   counts = mem_alloc(sizeof(ulint) * size);
04516 
04517   buf_pool_mutex_enter(buf_pool);
04518   buf_flush_list_mutex_enter(buf_pool);
04519 
04520   fprintf(stderr,
04521     "buf_pool size %lu\n"
04522     "database pages %lu\n"
04523     "free pages %lu\n"
04524     "modified database pages %lu\n"
04525     "n pending decompressions %lu\n"
04526     "n pending reads %lu\n"
04527     "n pending flush LRU %lu list %lu single page %lu\n"
04528     "pages made young %lu, not young %lu\n"
04529     "pages read %lu, created %lu, written %lu\n",
04530     (ulong) size,
04531     (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
04532     (ulong) UT_LIST_GET_LEN(buf_pool->free),
04533     (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
04534     (ulong) buf_pool->n_pend_unzip,
04535     (ulong) buf_pool->n_pend_reads,
04536     (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
04537     (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
04538     (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
04539     (ulong) buf_pool->stat.n_pages_made_young,
04540     (ulong) buf_pool->stat.n_pages_not_made_young,
04541     (ulong) buf_pool->stat.n_pages_read,
04542     (ulong) buf_pool->stat.n_pages_created,
04543     (ulong) buf_pool->stat.n_pages_written);
04544 
04545   buf_flush_list_mutex_exit(buf_pool);
04546 
04547   /* Count the number of blocks belonging to each index in the buffer */
04548 
04549   n_found = 0;
04550 
04551   chunk = buf_pool->chunks;
04552 
04553   for (i = buf_pool->n_chunks; i--; chunk++) {
04554     buf_block_t*  block   = chunk->blocks;
04555     ulint   n_blocks  = chunk->size;
04556 
04557     for (; n_blocks--; block++) {
04558       const buf_frame_t* frame = block->frame;
04559 
04560       if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
04561 
04562         id = btr_page_get_index_id(frame);
04563 
04564         /* Look for the id in the index_ids array */
04565         j = 0;
04566 
04567         while (j < n_found) {
04568 
04569           if (index_ids[j] == id) {
04570             counts[j]++;
04571 
04572             break;
04573           }
04574           j++;
04575         }
04576 
04577         if (j == n_found) {
04578           n_found++;
04579           index_ids[j] = id;
04580           counts[j] = 1;
04581         }
04582       }
04583     }
04584   }
04585 
04586   buf_pool_mutex_exit(buf_pool);
04587 
04588   for (i = 0; i < n_found; i++) {
04589     index = dict_index_get_if_in_cache(index_ids[i]);
04590 
04591     fprintf(stderr,
04592       "Block count for index %llu in buffer is about %lu",
04593       (ullint) index_ids[i],
04594       (ulong) counts[i]);
04595 
04596     if (index) {
04597       putc(' ', stderr);
04598       dict_index_name_print(stderr, NULL, index);
04599     }
04600 
04601     putc('\n', stderr);
04602   }
04603 
04604   mem_free(index_ids);
04605   mem_free(counts);
04606 
04607   ut_a(buf_pool_validate_instance(buf_pool));
04608 }
04609 
04610 /*********************************************************************/
04612 UNIV_INTERN
04613 void
04614 buf_print(void)
04615 /*===========*/
04616 {
04617   ulint   i;
04618 
04619   for (i = 0; i < srv_buf_pool_instances; i++) {
04620     buf_pool_t* buf_pool;
04621 
04622     buf_pool = buf_pool_from_array(i);
04623     buf_print_instance(buf_pool);
04624   }
04625 }
04626 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
04627 
04628 #ifdef UNIV_DEBUG
04629 /*********************************************************************/
04632 UNIV_INTERN
04633 ulint
04634 buf_get_latched_pages_number_instance(
04635 /*==================================*/
04636   buf_pool_t* buf_pool) 
04637 {
04638   buf_page_t* b;
04639   ulint   i;
04640   buf_chunk_t*  chunk;
04641   ulint   fixed_pages_number = 0;
04642 
04643   buf_pool_mutex_enter(buf_pool);
04644 
04645   chunk = buf_pool->chunks;
04646 
04647   for (i = buf_pool->n_chunks; i--; chunk++) {
04648     buf_block_t*  block;
04649     ulint   j;
04650 
04651     block = chunk->blocks;
04652 
04653     for (j = chunk->size; j--; block++) {
04654       if (buf_block_get_state(block)
04655           != BUF_BLOCK_FILE_PAGE) {
04656 
04657         continue;
04658       }
04659 
04660       mutex_enter(&block->mutex);
04661 
04662       if (block->page.buf_fix_count != 0
04663           || buf_page_get_io_fix(&block->page)
04664           != BUF_IO_NONE) {
04665         fixed_pages_number++;
04666       }
04667 
04668       mutex_exit(&block->mutex);
04669     }
04670   }
04671 
04672   mutex_enter(&buf_pool->zip_mutex);
04673 
04674   /* Traverse the lists of clean and dirty compressed-only blocks. */
04675 
04676   for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
04677        b = UT_LIST_GET_NEXT(list, b)) {
04678     ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
04679     ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
04680 
04681     if (b->buf_fix_count != 0
04682         || buf_page_get_io_fix(b) != BUF_IO_NONE) {
04683       fixed_pages_number++;
04684     }
04685   }
04686 
04687   buf_flush_list_mutex_enter(buf_pool);
04688   for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
04689        b = UT_LIST_GET_NEXT(list, b)) {
04690     ut_ad(b->in_flush_list);
04691 
04692     switch (buf_page_get_state(b)) {
04693     case BUF_BLOCK_ZIP_DIRTY:
04694       if (b->buf_fix_count != 0
04695           || buf_page_get_io_fix(b) != BUF_IO_NONE) {
04696         fixed_pages_number++;
04697       }
04698       break;
04699     case BUF_BLOCK_FILE_PAGE:
04700       /* uncompressed page */
04701       break;
04702     case BUF_BLOCK_ZIP_FREE:
04703     case BUF_BLOCK_ZIP_PAGE:
04704     case BUF_BLOCK_NOT_USED:
04705     case BUF_BLOCK_READY_FOR_USE:
04706     case BUF_BLOCK_MEMORY:
04707     case BUF_BLOCK_REMOVE_HASH:
04708       ut_error;
04709       break;
04710     }
04711   }
04712 
04713   buf_flush_list_mutex_exit(buf_pool);
04714   mutex_exit(&buf_pool->zip_mutex);
04715   buf_pool_mutex_exit(buf_pool);
04716 
04717   return(fixed_pages_number);
04718 }
04719 
04720 /*********************************************************************/
04723 UNIV_INTERN
04724 ulint
04725 buf_get_latched_pages_number(void)
04726 /*==============================*/
04727 {
04728   ulint i;
04729   ulint total_latched_pages = 0;
04730 
04731   for (i = 0; i < srv_buf_pool_instances; i++) {
04732     buf_pool_t* buf_pool;
04733 
04734     buf_pool = buf_pool_from_array(i);
04735 
04736     total_latched_pages += buf_get_latched_pages_number_instance(
04737       buf_pool);
04738   }
04739 
04740   return(total_latched_pages);
04741 }
04742 
04743 #endif /* UNIV_DEBUG */
04744 
04745 /*********************************************************************/
04748 UNIV_INTERN
04749 ulint
04750 buf_get_n_pending_ios(void)
04751 /*=======================*/
04752 {
04753   ulint i;
04754   ulint pend_ios = 0;
04755 
04756   for (i = 0; i < srv_buf_pool_instances; i++) {
04757     buf_pool_t* buf_pool;
04758 
04759     buf_pool = buf_pool_from_array(i);
04760 
04761     pend_ios +=
04762       buf_pool->n_pend_reads
04763       + buf_pool->n_flush[BUF_FLUSH_LRU]
04764       + buf_pool->n_flush[BUF_FLUSH_LIST]
04765       + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
04766   }
04767 
04768   return(pend_ios);
04769 }
04770 
04771 /*********************************************************************/
04775 UNIV_INTERN
04776 ulint
04777 buf_get_modified_ratio_pct(void)
04778 /*============================*/
04779 {
04780   ulint   ratio;
04781   ulint   lru_len = 0;
04782   ulint   free_len = 0;
04783   ulint   flush_list_len = 0;
04784 
04785   buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
04786 
04787   ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
04788   
04789   /* 1 + is there to avoid division by zero */
04790 
04791   return(ratio);
04792 }
04793 
04794 /*********************************************************************/
04796 static
04797 void
04798 buf_print_io_instance(
04799 /*==================*/
04800   buf_pool_t* buf_pool, 
04801   FILE*   file)   
04802 {
04803   time_t  current_time;
04804   double  time_elapsed;
04805   ulint n_gets_diff;
04806 
04807   ut_ad(buf_pool);
04808 
04809   buf_pool_mutex_enter(buf_pool);
04810   buf_flush_list_mutex_enter(buf_pool);
04811 
04812   fprintf(file,
04813     "Buffer pool size   %lu\n"
04814     "Free buffers       %lu\n"
04815     "Database pages     %lu\n"
04816     "Old database pages %lu\n"
04817     "Modified db pages  %lu\n"
04818     "Pending reads %lu\n"
04819     "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
04820     (ulong) buf_pool->curr_size,
04821     (ulong) UT_LIST_GET_LEN(buf_pool->free),
04822     (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
04823     (ulong) buf_pool->LRU_old_len,
04824     (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
04825     (ulong) buf_pool->n_pend_reads,
04826     (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
04827     + buf_pool->init_flush[BUF_FLUSH_LRU],
04828     (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
04829     + buf_pool->init_flush[BUF_FLUSH_LIST],
04830     (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
04831 
04832   buf_flush_list_mutex_exit(buf_pool);
04833 
04834   current_time = time(NULL);
04835   time_elapsed = 0.001 + difftime(current_time,
04836           buf_pool->last_printout_time);
04837 
04838   fprintf(file,
04839     "Pages made young %lu, not young %lu\n"
04840     "%.2f youngs/s, %.2f non-youngs/s\n"
04841     "Pages read %lu, created %lu, written %lu\n"
04842     "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
04843     (ulong) buf_pool->stat.n_pages_made_young,
04844     (ulong) buf_pool->stat.n_pages_not_made_young,
04845     (buf_pool->stat.n_pages_made_young
04846      - buf_pool->old_stat.n_pages_made_young)
04847     / time_elapsed,
04848     (buf_pool->stat.n_pages_not_made_young
04849      - buf_pool->old_stat.n_pages_not_made_young)
04850     / time_elapsed,
04851     (ulong) buf_pool->stat.n_pages_read,
04852     (ulong) buf_pool->stat.n_pages_created,
04853     (ulong) buf_pool->stat.n_pages_written,
04854     (buf_pool->stat.n_pages_read
04855      - buf_pool->old_stat.n_pages_read)
04856     / time_elapsed,
04857     (buf_pool->stat.n_pages_created
04858      - buf_pool->old_stat.n_pages_created)
04859     / time_elapsed,
04860     (buf_pool->stat.n_pages_written
04861      - buf_pool->old_stat.n_pages_written)
04862     / time_elapsed);
04863 
04864   n_gets_diff = buf_pool->stat.n_page_gets
04865         - buf_pool->old_stat.n_page_gets;
04866 
04867   if (n_gets_diff) {
04868     fprintf(file,
04869       "Buffer pool hit rate %lu / 1000,"
04870       " young-making rate %lu / 1000 not %lu / 1000\n",
04871       (ulong)
04872       (1000 - ((1000 * (buf_pool->stat.n_pages_read
04873             - buf_pool->old_stat.n_pages_read))
04874          / (buf_pool->stat.n_page_gets
04875             - buf_pool->old_stat.n_page_gets))),
04876       (ulong)
04877       (1000 * (buf_pool->stat.n_pages_made_young
04878          - buf_pool->old_stat.n_pages_made_young)
04879        / n_gets_diff),
04880       (ulong)
04881       (1000 * (buf_pool->stat.n_pages_not_made_young
04882          - buf_pool->old_stat.n_pages_not_made_young)
04883        / n_gets_diff));
04884   } else {
04885     fputs("No buffer pool page gets since the last printout\n",
04886           file);
04887   }
04888 
04889   /* Statistics about read ahead algorithm */
04890   fprintf(file, "Pages read ahead %.2f/s,"
04891     " evicted without access %.2f/s\n",
04892     (buf_pool->stat.n_ra_pages_read
04893     - buf_pool->old_stat.n_ra_pages_read)
04894     / time_elapsed,
04895     (buf_pool->stat.n_ra_pages_evicted
04896     - buf_pool->old_stat.n_ra_pages_evicted)
04897     / time_elapsed);
04898 
04899   /* Print some values to help us with visualizing what is
04900   happening with LRU eviction. */
04901   fprintf(file,
04902     "LRU len: %lu, unzip_LRU len: %lu\n"
04903     "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
04904     static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
04905     static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
04906     buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
04907     buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
04908 
04909   buf_refresh_io_stats(buf_pool);
04910   buf_pool_mutex_exit(buf_pool);
04911 }
04912 
04913 /*********************************************************************/
04915 UNIV_INTERN
04916 void
04917 buf_print_io(
04918 /*=========*/
04919   FILE* file) 
04920 {
04921   ulint   i;
04922 
04923   for (i = 0; i < srv_buf_pool_instances; i++) {
04924     buf_pool_t* buf_pool;
04925 
04926     buf_pool = buf_pool_from_array(i);
04927     buf_print_io_instance(buf_pool, file);
04928   }
04929 }
04930 
04931 /**********************************************************************/
04933 UNIV_INTERN
04934 void
04935 buf_refresh_io_stats(
04936 /*=================*/
04937   buf_pool_t* buf_pool) 
04938 {
04939   buf_pool->last_printout_time = ut_time();
04940   buf_pool->old_stat = buf_pool->stat;
04941 }
04942 
04943 /**********************************************************************/
04945 UNIV_INTERN
04946 void
04947 buf_refresh_io_stats_all(void)
04948 /*==========================*/
04949 {
04950   ulint   i;
04951 
04952   for (i = 0; i < srv_buf_pool_instances; i++) {
04953     buf_pool_t* buf_pool;
04954 
04955     buf_pool = buf_pool_from_array(i);
04956 
04957     buf_refresh_io_stats(buf_pool);
04958   }
04959 }
04960 
04961 /**********************************************************************/
04964 UNIV_INTERN
04965 ibool
04966 buf_all_freed(void)
04967 /*===============*/
04968 {
04969   ulint i;
04970 
04971   for (i = 0; i < srv_buf_pool_instances; i++) {
04972     buf_pool_t* buf_pool;
04973 
04974     buf_pool = buf_pool_from_array(i);
04975 
04976     if (!buf_all_freed_instance(buf_pool)) {
04977       return(FALSE);
04978     }
04979   }
04980 
04981   return(TRUE);
04982 }
04983   
04984 /*********************************************************************/
04988 UNIV_INTERN
04989 ibool
04990 buf_pool_check_no_pending_io(void)
04991 /*==============================*/
04992 {
04993   ulint   i;
04994   ibool   ret = TRUE;
04995 
04996   buf_pool_mutex_enter_all();
04997 
04998   for (i = 0; i < srv_buf_pool_instances && ret; i++) {
04999     const buf_pool_t* buf_pool;
05000 
05001     buf_pool = buf_pool_from_array(i);
05002 
05003     if (buf_pool->n_pend_reads
05004         + buf_pool->n_flush[BUF_FLUSH_LRU]
05005         + buf_pool->n_flush[BUF_FLUSH_LIST]
05006         + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
05007 
05008       ret = FALSE;
05009     }
05010   }
05011 
05012   buf_pool_mutex_exit_all();
05013 
05014   return(ret);
05015 }
05016 
05017 #if 0
05018 Code currently not used
05019 /*********************************************************************/
05022 UNIV_INTERN
05023 ulint
05024 buf_get_free_list_len(void)
05025 /*=======================*/
05026 {
05027   ulint len;
05028 
05029   buf_pool_mutex_enter(buf_pool);
05030 
05031   len = UT_LIST_GET_LEN(buf_pool->free);
05032 
05033   buf_pool_mutex_exit(buf_pool);
05034 
05035   return(len);
05036 }
05037 #endif
05038 
05039 #else /* !UNIV_HOTBACKUP */
05040 /********************************************************************/
05042 UNIV_INTERN
05043 void
05044 buf_page_init_for_backup_restore(
05045 /*=============================*/
05046   ulint   space,  
05047   ulint   offset, 
05049   ulint   zip_size,
05051   buf_block_t*  block)  
05052 {
05053   block->page.state = BUF_BLOCK_FILE_PAGE;
05054   block->page.space = space;
05055   block->page.offset  = offset;
05056 
05057   page_zip_des_init(&block->page.zip);
05058 
05059   /* We assume that block->page.data has been allocated
05060   with zip_size == UNIV_PAGE_SIZE. */
05061   ut_ad(zip_size <= UNIV_PAGE_SIZE);
05062   ut_ad(ut_is_2pow(zip_size));
05063   page_zip_set_size(&block->page.zip, zip_size);
05064   if (zip_size) {
05065     block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
05066   }
05067 }
05068 #endif /* !UNIV_HOTBACKUP */