00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00026 #include "buf0rea.h"
00027
00028 #include "fil0fil.h"
00029 #include "mtr0mtr.h"
00030
00031 #include "buf0buf.h"
00032 #include "buf0flu.h"
00033 #include "buf0lru.h"
00034 #include "ibuf0ibuf.h"
00035 #include "log0recv.h"
00036 #include "trx0sys.h"
00037 #include "os0file.h"
00038 #include "srv0start.h"
00039 #include "srv0srv.h"
00040
00042 #define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
00043
00047 #define BUF_READ_AHEAD_PEND_LIMIT 2
00048
00049
00059 static
00060 ulint
00061 buf_read_page_low(
00062
00063 ulint* err,
00066 ibool sync,
00067 ulint mode,
00070 ulint space,
00071 ulint zip_size,
00072 ibool unzip,
00073 ib_int64_t tablespace_version,
00078 ulint offset)
00079 {
00080 buf_page_t* bpage;
00081 ulint wake_later;
00082
00083 *err = DB_SUCCESS;
00084
00085 wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
00086 mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
00087
00088 if (trx_doublewrite && space == TRX_SYS_SPACE
00089 && ( (offset >= trx_doublewrite->block1
00090 && offset < trx_doublewrite->block1
00091 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
00092 || (offset >= trx_doublewrite->block2
00093 && offset < trx_doublewrite->block2
00094 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
00095 ut_print_timestamp(stderr);
00096 fprintf(stderr,
00097 " InnoDB: Warning: trying to read"
00098 " doublewrite buffer page %lu\n",
00099 (ulong) offset);
00100
00101 return(0);
00102 }
00103
00104 if (ibuf_bitmap_page(zip_size, offset)
00105 || trx_sys_hdr_page(space, offset)) {
00106
00107
00108
00109
00110
00111
00112
00113 sync = TRUE;
00114 }
00115
00116
00117
00118
00119
00120 bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
00121 tablespace_version, offset);
00122 if (bpage == NULL) {
00123
00124 return(0);
00125 }
00126
00127 #ifdef UNIV_DEBUG
00128 if (buf_debug_prints) {
00129 fprintf(stderr,
00130 "Posting read request for page %lu, sync %lu\n",
00131 (ulong) offset,
00132 (ulong) sync);
00133 }
00134 #endif
00135
00136 ut_ad(buf_page_in_file(bpage));
00137
00138 if (zip_size) {
00139 *err = fil_io(OS_FILE_READ | wake_later,
00140 sync, space, zip_size, offset, 0, zip_size,
00141 bpage->zip.data, bpage);
00142 } else {
00143 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
00144
00145 *err = fil_io(OS_FILE_READ | wake_later,
00146 sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
00147 ((buf_block_t*) bpage)->frame, bpage);
00148 }
00149 ut_a(*err == DB_SUCCESS);
00150
00151 if (sync) {
00152
00153
00154 buf_page_io_complete(bpage);
00155 }
00156
00157 return(1);
00158 }
00159
00160
00166 UNIV_INTERN
00167 ibool
00168 buf_read_page(
00169
00170 ulint space,
00171 ulint zip_size,
00172 ulint offset)
00173 {
00174 buf_pool_t* buf_pool = buf_pool_get(space, offset);
00175 ib_int64_t tablespace_version;
00176 ulint count;
00177 ulint err;
00178
00179 tablespace_version = fil_space_get_version(space);
00180
00181
00182
00183
00184 count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
00185 zip_size, FALSE,
00186 tablespace_version, offset);
00187 srv_buf_pool_reads += count;
00188 if (err == DB_TABLESPACE_DELETED) {
00189 ut_print_timestamp(stderr);
00190 fprintf(stderr,
00191 " InnoDB: Error: trying to access"
00192 " tablespace %lu page no. %lu,\n"
00193 "InnoDB: but the tablespace does not exist"
00194 " or is just being dropped.\n",
00195 (ulong) space, (ulong) offset);
00196 }
00197
00198
00199 buf_flush_free_margin(buf_pool);
00200
00201
00202 buf_LRU_stat_inc_io();
00203
00204 return(count);
00205 }
00206
00207
00231 UNIV_INTERN
00232 ulint
00233 buf_read_ahead_linear(
00234
00235 ulint space,
00236 ulint zip_size,
00237 ulint offset)
00239 {
00240 buf_pool_t* buf_pool = buf_pool_get(space, offset);
00241 ib_int64_t tablespace_version;
00242 buf_page_t* bpage;
00243 buf_frame_t* frame;
00244 buf_page_t* pred_bpage = NULL;
00245 ulint pred_offset;
00246 ulint succ_offset;
00247 ulint count;
00248 int asc_or_desc;
00249 ulint new_offset;
00250 ulint fail_count;
00251 ulint ibuf_mode;
00252 ulint low, high;
00253 ulint err;
00254 ulint i;
00255 const ulint buf_read_ahead_linear_area
00256 = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
00257 ulint threshold;
00258
00259 if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
00260
00261 return(0);
00262 }
00263
00264 low = (offset / buf_read_ahead_linear_area)
00265 * buf_read_ahead_linear_area;
00266 high = (offset / buf_read_ahead_linear_area + 1)
00267 * buf_read_ahead_linear_area;
00268
00269 if ((offset != low) && (offset != high - 1)) {
00270
00271
00272 return(0);
00273 }
00274
00275 if (ibuf_bitmap_page(zip_size, offset)
00276 || trx_sys_hdr_page(space, offset)) {
00277
00278
00279
00280
00281
00282 return(0);
00283 }
00284
00285
00286
00287
00288
00289 tablespace_version = fil_space_get_version(space);
00290
00291 buf_pool_mutex_enter(buf_pool);
00292
00293 if (high > fil_space_get_size(space)) {
00294 buf_pool_mutex_exit(buf_pool);
00295
00296
00297 return(0);
00298 }
00299
00300 if (buf_pool->n_pend_reads
00301 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
00302 buf_pool_mutex_exit(buf_pool);
00303
00304 return(0);
00305 }
00306
00307
00308
00309
00310
00311 asc_or_desc = 1;
00312
00313 if (offset == low) {
00314 asc_or_desc = -1;
00315 }
00316
00317
00318
00319 threshold = ut_min((64 - srv_read_ahead_threshold),
00320 BUF_READ_AHEAD_AREA(buf_pool));
00321
00322 fail_count = 0;
00323
00324 for (i = low; i < high; i++) {
00325 bpage = buf_page_hash_get(buf_pool, space, i);
00326
00327 if (bpage == NULL || !buf_page_is_accessed(bpage)) {
00328
00329 fail_count++;
00330
00331 } else if (pred_bpage) {
00332
00333
00334
00335
00336
00337
00338
00339
00340 int res = ut_ulint_cmp(
00341 buf_page_is_accessed(bpage),
00342 buf_page_is_accessed(pred_bpage));
00343
00344 if (res != 0 && res != asc_or_desc) {
00345 fail_count++;
00346 }
00347 }
00348
00349 if (fail_count > threshold) {
00350
00351 buf_pool_mutex_exit(buf_pool);
00352 return(0);
00353 }
00354
00355 if (bpage && buf_page_is_accessed(bpage)) {
00356 pred_bpage = bpage;
00357 }
00358 }
00359
00360
00361
00362
00363 bpage = buf_page_hash_get(buf_pool, space, offset);
00364
00365 if (bpage == NULL) {
00366 buf_pool_mutex_exit(buf_pool);
00367
00368 return(0);
00369 }
00370
00371 switch (buf_page_get_state(bpage)) {
00372 case BUF_BLOCK_ZIP_PAGE:
00373 frame = bpage->zip.data;
00374 break;
00375 case BUF_BLOCK_FILE_PAGE:
00376 frame = ((buf_block_t*) bpage)->frame;
00377 break;
00378 default:
00379 ut_error;
00380 break;
00381 }
00382
00383
00384
00385
00386
00387
00388
00389 pred_offset = fil_page_get_prev(frame);
00390 succ_offset = fil_page_get_next(frame);
00391
00392 buf_pool_mutex_exit(buf_pool);
00393
00394 if ((offset == low) && (succ_offset == offset + 1)) {
00395
00396
00397 new_offset = pred_offset;
00398
00399 } else if ((offset == high - 1) && (pred_offset == offset - 1)) {
00400
00401
00402 new_offset = succ_offset;
00403 } else {
00404
00405
00406 return(0);
00407 }
00408
00409 low = (new_offset / buf_read_ahead_linear_area)
00410 * buf_read_ahead_linear_area;
00411 high = (new_offset / buf_read_ahead_linear_area + 1)
00412 * buf_read_ahead_linear_area;
00413
00414 if ((new_offset != low) && (new_offset != high - 1)) {
00415
00416
00417 return(0);
00418 }
00419
00420 if (high > fil_space_get_size(space)) {
00421
00422
00423 return(0);
00424 }
00425
00426
00427
00428 if (ibuf_inside()) {
00429 ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
00430 } else {
00431 ibuf_mode = BUF_READ_ANY_PAGE;
00432 }
00433
00434 count = 0;
00435
00436
00437
00438
00439
00440 os_aio_simulated_put_read_threads_to_sleep();
00441
00442 for (i = low; i < high; i++) {
00443
00444
00445
00446 if (!ibuf_bitmap_page(zip_size, i)) {
00447 count += buf_read_page_low(
00448 &err, FALSE,
00449 ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
00450 space, zip_size, FALSE, tablespace_version, i);
00451 if (err == DB_TABLESPACE_DELETED) {
00452 ut_print_timestamp(stderr);
00453 fprintf(stderr,
00454 " InnoDB: Warning: in"
00455 " linear readahead trying to access\n"
00456 "InnoDB: tablespace %lu page %lu,\n"
00457 "InnoDB: but the tablespace does not"
00458 " exist or is just being dropped.\n",
00459 (ulong) space, (ulong) i);
00460 }
00461 }
00462 }
00463
00464
00465
00466
00467
00468 os_aio_simulated_wake_handler_threads();
00469
00470
00471 buf_flush_free_margin(buf_pool);
00472
00473 #ifdef UNIV_DEBUG
00474 if (buf_debug_prints && (count > 0)) {
00475 fprintf(stderr,
00476 "LINEAR read-ahead space %lu offset %lu pages %lu\n",
00477 (ulong) space, (ulong) offset, (ulong) count);
00478 }
00479 #endif
00480
00481
00482
00483 buf_LRU_stat_inc_io();
00484
00485 buf_pool->stat.n_ra_pages_read += count;
00486 return(count);
00487 }
00488
00489
00493 UNIV_INTERN
00494 void
00495 buf_read_ibuf_merge_pages(
00496
00497 ibool sync,
00502 const ulint* space_ids,
00503 const ib_int64_t* space_versions,
00510 const ulint* page_nos,
00514 ulint n_stored)
00516 {
00517 ulint i;
00518
00519 ut_ad(!ibuf_inside());
00520 #ifdef UNIV_IBUF_DEBUG
00521 ut_a(n_stored < UNIV_PAGE_SIZE);
00522 #endif
00523
00524 for (i = 0; i < n_stored; i++) {
00525 ulint err;
00526 buf_pool_t* buf_pool;
00527 ulint zip_size = fil_space_get_zip_size(space_ids[i]);
00528
00529 buf_pool = buf_pool_get(space_ids[i], space_versions[i]);
00530
00531 while (buf_pool->n_pend_reads
00532 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
00533 os_thread_sleep(500000);
00534 }
00535
00536 if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
00537
00538 goto tablespace_deleted;
00539 }
00540
00541 buf_read_page_low(&err, sync && (i + 1 == n_stored),
00542 BUF_READ_ANY_PAGE, space_ids[i],
00543 zip_size, TRUE, space_versions[i],
00544 page_nos[i]);
00545
00546 if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
00547 tablespace_deleted:
00548
00549
00550
00551 ibuf_merge_or_delete_for_page(NULL, space_ids[i],
00552 page_nos[i],
00553 zip_size, FALSE);
00554 }
00555 }
00556
00557 os_aio_simulated_wake_handler_threads();
00558
00559
00560 buf_flush_free_margins();
00561
00562 #ifdef UNIV_DEBUG
00563 if (buf_debug_prints) {
00564 fprintf(stderr,
00565 "Ibuf merge read-ahead space %lu pages %lu\n",
00566 (ulong) space_ids[0], (ulong) n_stored);
00567 }
00568 #endif
00569 }
00570
00571
00573 UNIV_INTERN
00574 void
00575 buf_read_recv_pages(
00576
00577 ibool sync,
00582 ulint space,
00583 ulint zip_size,
00585 const ulint* page_nos,
00589 ulint n_stored)
00591 {
00592 ib_int64_t tablespace_version;
00593 ulint count;
00594 ulint err;
00595 ulint i;
00596
00597 zip_size = fil_space_get_zip_size(space);
00598
00599 if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
00600
00601
00602
00603 return;
00604 }
00605
00606 tablespace_version = fil_space_get_version(space);
00607
00608 for (i = 0; i < n_stored; i++) {
00609 buf_pool_t* buf_pool;
00610
00611 count = 0;
00612
00613 os_aio_print_debug = FALSE;
00614 buf_pool = buf_pool_get(space, page_nos[i]);
00615 while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
00616
00617 os_aio_simulated_wake_handler_threads();
00618 os_thread_sleep(10000);
00619
00620 count++;
00621
00622 if (count > 1000) {
00623 fprintf(stderr,
00624 "InnoDB: Error: InnoDB has waited for"
00625 " 10 seconds for pending\n"
00626 "InnoDB: reads to the buffer pool to"
00627 " be finished.\n"
00628 "InnoDB: Number of pending reads %lu,"
00629 " pending pread calls %lu\n",
00630 (ulong) buf_pool->n_pend_reads,
00631 (ulong)os_file_n_pending_preads);
00632
00633 os_aio_print_debug = TRUE;
00634 }
00635 }
00636
00637 os_aio_print_debug = FALSE;
00638
00639 if ((i + 1 == n_stored) && sync) {
00640 buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
00641 zip_size, TRUE, tablespace_version,
00642 page_nos[i]);
00643 } else {
00644 buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
00645 | OS_AIO_SIMULATED_WAKE_LATER,
00646 space, zip_size, TRUE,
00647 tablespace_version, page_nos[i]);
00648 }
00649 }
00650
00651 os_aio_simulated_wake_handler_threads();
00652
00653
00654 buf_flush_free_margins();
00655
00656 #ifdef UNIV_DEBUG
00657 if (buf_debug_prints) {
00658 fprintf(stderr,
00659 "Recovery applies read-ahead pages %lu\n",
00660 (ulong) n_stored);
00661 }
00662 #endif
00663 }