00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00034 #include "os0file.h"
00035
00036 #ifdef UNIV_NONINL
00037 #include "os0file.ic"
00038 #endif
00039
00040 #include "ut0mem.h"
00041 #include "srv0srv.h"
00042 #include "srv0start.h"
00043 #include "fil0fil.h"
00044 #include "buf0buf.h"
00045 #include <errno.h>
00046 #include <fcntl.h>
00047 #include <limits.h>
00048 #include <unistd.h>
00049 #ifndef UNIV_HOTBACKUP
00050 # include "os0sync.h"
00051 # include "os0thread.h"
00052 #else
00053 # ifdef __WIN__
00054
00055 # include <sys/types.h>
00056 # include <sys/stat.h>
00057 # endif
00058 #endif
00059
00060 #if defined(LINUX_NATIVE_AIO)
00061 #include <libaio.h>
00062 #endif
00063
00064
00065
00066
00067
00068 #ifndef __WIN__
00069
00070 UNIV_INTERN ulint os_innodb_umask
00071 = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
00072 #else
00073
00074 UNIV_INTERN ulint os_innodb_umask = 0;
00075 #endif
00076
00077 #ifdef UNIV_DO_FLUSH
00078
00079
00080 UNIV_INTERN ibool os_do_not_call_flush_at_each_write = FALSE;
00081 #else
00082
00083 #endif
00084
00085 #ifndef UNIV_HOTBACKUP
00086
00087
00088 #define OS_FILE_N_SEEK_MUTEXES 16
00089 UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
00090
00091
00092 #define OS_AIO_MERGE_N_CONSECUTIVE 64
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00149 UNIV_INTERN ibool os_aio_print_debug = FALSE;
00150
00151 #ifdef UNIV_PFS_IO
00152
00153 UNIV_INTERN mysql_pfs_key_t innodb_file_data_key;
00154 UNIV_INTERN mysql_pfs_key_t innodb_file_log_key;
00155 UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
00156 #endif
00157
00159 typedef struct os_aio_slot_struct os_aio_slot_t;
00160
00162 struct os_aio_slot_struct{
00163 ibool is_read;
00164 ulint pos;
00166 ibool reserved;
00167 time_t reservation_time;
00168 ulint len;
00170 byte* buf;
00171 ulint type;
00172 ulint offset;
00174 ulint offset_high;
00175 os_file_t file;
00176 const char* name;
00177 ibool io_already_done;
00182 fil_node_t* message1;
00183 void* message2;
00187 #ifdef WIN_ASYNC_IO
00188 HANDLE handle;
00190 OVERLAPPED control;
00192 #elif defined(LINUX_NATIVE_AIO)
00193 struct iocb control;
00194 int n_bytes;
00195 int ret;
00196 #endif
00197 };
00198
00200 typedef struct os_aio_array_struct os_aio_array_t;
00201
00203 struct os_aio_array_struct{
00204 os_mutex_t mutex;
00205 os_event_t not_full;
00209 os_event_t is_empty;
00213 ulint n_slots;
00216 ulint n_segments;
00221 ulint cur_seg;
00225 ulint n_reserved;
00228 os_aio_slot_t* slots;
00229 #ifdef __WIN__
00230 HANDLE* handles;
00237 #endif
00238
00239 #if defined(LINUX_NATIVE_AIO)
00240 io_context_t* aio_ctx;
00241
00242
00243
00244 struct io_event* aio_events;
00245
00246
00247
00248
00249 #endif
00250 };
00251
00252 #if defined(LINUX_NATIVE_AIO)
00253
00254 #define OS_AIO_REAP_TIMEOUT (500000000UL)
00255
00257 #define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL)
00258
00260 #define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
00261 #endif
00262
00264 static os_event_t* os_aio_segment_wait_events = NULL;
00265
00268 static os_aio_array_t* os_aio_read_array = NULL;
00269 static os_aio_array_t* os_aio_write_array = NULL;
00270 static os_aio_array_t* os_aio_ibuf_array = NULL;
00271 static os_aio_array_t* os_aio_log_array = NULL;
00272 static os_aio_array_t* os_aio_sync_array = NULL;
00273
00274
00276 static ulint os_aio_n_segments = ULINT_UNDEFINED;
00277
00280 static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
00281 #endif
00282
00283 UNIV_INTERN ulint os_n_file_reads = 0;
00284 UNIV_INTERN ulint os_bytes_read_since_printout = 0;
00285 UNIV_INTERN ulint os_n_file_writes = 0;
00286 UNIV_INTERN ulint os_n_fsyncs = 0;
00287 UNIV_INTERN ulint os_n_file_reads_old = 0;
00288 UNIV_INTERN ulint os_n_file_writes_old = 0;
00289 UNIV_INTERN ulint os_n_fsyncs_old = 0;
00290 UNIV_INTERN time_t os_last_printout;
00291
00292 UNIV_INTERN ibool os_has_said_disk_full = FALSE;
00293
00294 #ifndef UNIV_HOTBACKUP
00295
00296 static os_mutex_t os_file_count_mutex;
00297 #endif
00298
00299 UNIV_INTERN ulint os_file_n_pending_preads = 0;
00301 UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
00303 UNIV_INTERN ulint os_n_pending_writes = 0;
00305 UNIV_INTERN ulint os_n_pending_reads = 0;
00306
00307
00311 UNIV_INTERN
00312 ulint
00313 os_get_os_version(void)
00314
00315 {
00316 #ifdef __WIN__
00317 OSVERSIONINFO os_info;
00318
00319 os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
00320
00321 ut_a(GetVersionEx(&os_info));
00322
00323 if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
00324 return(OS_WIN31);
00325 } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
00326 return(OS_WIN95);
00327 } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
00328 switch (os_info.dwMajorVersion) {
00329 case 3:
00330 case 4:
00331 return OS_WINNT;
00332 case 5:
00333 return (os_info.dwMinorVersion == 0) ? OS_WIN2000
00334 : OS_WINXP;
00335 case 6:
00336 return (os_info.dwMinorVersion == 0) ? OS_WINVISTA
00337 : OS_WIN7;
00338 default:
00339 return OS_WIN7;
00340 }
00341 } else {
00342 ut_error;
00343 return(0);
00344 }
00345 #else
00346 ut_error;
00347
00348 return(0);
00349 #endif
00350 }
00351
00352
00358 UNIV_INTERN
00359 ulint
00360 os_file_get_last_error(
00361
00362 ibool report_all_errors)
00364 {
00365 ulint err;
00366
00367 #ifdef __WIN__
00368
00369 err = (ulint) GetLastError();
00370
00371 if (report_all_errors
00372 || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
00373
00374 ut_print_timestamp(stderr);
00375 fprintf(stderr,
00376 " InnoDB: Operating system error number %lu"
00377 " in a file operation.\n", (ulong) err);
00378
00379 if (err == ERROR_PATH_NOT_FOUND) {
00380 fprintf(stderr,
00381 "InnoDB: The error means the system"
00382 " cannot find the path specified.\n");
00383
00384 if (srv_is_being_started) {
00385 fprintf(stderr,
00386 "InnoDB: If you are installing InnoDB,"
00387 " remember that you must create\n"
00388 "InnoDB: directories yourself, InnoDB"
00389 " does not create them.\n");
00390 }
00391 } else if (err == ERROR_ACCESS_DENIED) {
00392 fprintf(stderr,
00393 "InnoDB: The error means mysqld does not have"
00394 " the access rights to\n"
00395 "InnoDB: the directory. It may also be"
00396 " you have created a subdirectory\n"
00397 "InnoDB: of the same name as a data file.\n");
00398 } else if (err == ERROR_SHARING_VIOLATION
00399 || err == ERROR_LOCK_VIOLATION) {
00400 fprintf(stderr,
00401 "InnoDB: The error means that another program"
00402 " is using InnoDB's files.\n"
00403 "InnoDB: This might be a backup or antivirus"
00404 " software or another instance\n"
00405 "InnoDB: of MySQL."
00406 " Please close it to get rid of this error.\n");
00407 } else if (err == ERROR_WORKING_SET_QUOTA
00408 || err == ERROR_NO_SYSTEM_RESOURCES) {
00409 fprintf(stderr,
00410 "InnoDB: The error means that there are no"
00411 " sufficient system resources or quota to"
00412 " complete the operation.\n");
00413 } else if (err == ERROR_OPERATION_ABORTED) {
00414 fprintf(stderr,
00415 "InnoDB: The error means that the I/O"
00416 " operation has been aborted\n"
00417 "InnoDB: because of either a thread exit"
00418 " or an application request.\n"
00419 "InnoDB: Retry attempt is made.\n");
00420 } else {
00421 fprintf(stderr,
00422 "InnoDB: Some operating system error numbers"
00423 " are described at\n"
00424 "InnoDB: "
00425 REFMAN
00426 "operating-system-error-codes.html\n");
00427 }
00428 }
00429
00430 fflush(stderr);
00431
00432 if (err == ERROR_FILE_NOT_FOUND) {
00433 return(OS_FILE_NOT_FOUND);
00434 } else if (err == ERROR_DISK_FULL) {
00435 return(OS_FILE_DISK_FULL);
00436 } else if (err == ERROR_FILE_EXISTS) {
00437 return(OS_FILE_ALREADY_EXISTS);
00438 } else if (err == ERROR_SHARING_VIOLATION
00439 || err == ERROR_LOCK_VIOLATION) {
00440 return(OS_FILE_SHARING_VIOLATION);
00441 } else if (err == ERROR_WORKING_SET_QUOTA
00442 || err == ERROR_NO_SYSTEM_RESOURCES) {
00443 return(OS_FILE_INSUFFICIENT_RESOURCE);
00444 } else if (err == ERROR_OPERATION_ABORTED) {
00445 return(OS_FILE_OPERATION_ABORTED);
00446 } else {
00447 return(100 + err);
00448 }
00449 #else
00450 err = (ulint) errno;
00451
00452 if (report_all_errors
00453 || (err != ENOSPC && err != EEXIST)) {
00454
00455 ut_print_timestamp(stderr);
00456 fprintf(stderr,
00457 " InnoDB: Operating system error number %lu"
00458 " in a file operation.\n", (ulong) err);
00459
00460 if (err == ENOENT) {
00461 fprintf(stderr,
00462 "InnoDB: The error means the system"
00463 " cannot find the path specified.\n");
00464
00465 if (srv_is_being_started) {
00466 fprintf(stderr,
00467 "InnoDB: If you are installing InnoDB,"
00468 " remember that you must create\n"
00469 "InnoDB: directories yourself, InnoDB"
00470 " does not create them.\n");
00471 }
00472 } else if (err == EACCES) {
00473 fprintf(stderr,
00474 "InnoDB: The error means mysqld does not have"
00475 " the access rights to\n"
00476 "InnoDB: the directory.\n");
00477 } else {
00478 if (strerror((int)err) != NULL) {
00479 fprintf(stderr,
00480 "InnoDB: Error number %lu"
00481 " means '%s'.\n",
00482 err, strerror((int)err));
00483 }
00484
00485 fprintf(stderr,
00486 "InnoDB: Some operating system"
00487 " error numbers are described at\n"
00488 "InnoDB: "
00489 REFMAN
00490 "operating-system-error-codes.html\n");
00491 }
00492 }
00493
00494 fflush(stderr);
00495
00496 switch (err) {
00497 case ENOSPC:
00498 return(OS_FILE_DISK_FULL);
00499 case ENOENT:
00500 return(OS_FILE_NOT_FOUND);
00501 case EEXIST:
00502 return(OS_FILE_ALREADY_EXISTS);
00503 case EXDEV:
00504 case ENOTDIR:
00505 case EISDIR:
00506 return(OS_FILE_PATH_ERROR);
00507 case EAGAIN:
00508 if (srv_use_native_aio) {
00509 return(OS_FILE_AIO_RESOURCES_RESERVED);
00510 }
00511 break;
00512 case EINTR:
00513 if (srv_use_native_aio) {
00514 return(OS_FILE_AIO_INTERRUPTED);
00515 }
00516 break;
00517 }
00518 return(100 + err);
00519 #endif
00520 }
00521
00522
00527 static
00528 ibool
00529 os_file_handle_error_cond_exit(
00530
00531 const char* name,
00532 const char* operation,
00533 ibool should_exit)
00535 {
00536 ulint err;
00537
00538 err = os_file_get_last_error(FALSE);
00539
00540 if (err == OS_FILE_DISK_FULL) {
00541
00542
00543 if (os_has_said_disk_full) {
00544
00545 return(FALSE);
00546 }
00547
00548 if (name) {
00549 ut_print_timestamp(stderr);
00550 fprintf(stderr,
00551 " InnoDB: Encountered a problem with"
00552 " file %s\n", name);
00553 }
00554
00555 ut_print_timestamp(stderr);
00556 fprintf(stderr,
00557 " InnoDB: Disk is full. Try to clean the disk"
00558 " to free space.\n");
00559
00560 os_has_said_disk_full = TRUE;
00561
00562 fflush(stderr);
00563
00564 return(FALSE);
00565 } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
00566
00567 return(TRUE);
00568 } else if (err == OS_FILE_AIO_INTERRUPTED) {
00569
00570 return(TRUE);
00571 } else if (err == OS_FILE_ALREADY_EXISTS
00572 || err == OS_FILE_PATH_ERROR) {
00573
00574 return(FALSE);
00575 } else if (err == OS_FILE_SHARING_VIOLATION) {
00576
00577 os_thread_sleep(10000000);
00578 return(TRUE);
00579 } else if (err == OS_FILE_INSUFFICIENT_RESOURCE) {
00580
00581 os_thread_sleep(100000);
00582 return(TRUE);
00583 } else if (err == OS_FILE_OPERATION_ABORTED) {
00584
00585 os_thread_sleep(100000);
00586 return(TRUE);
00587 } else {
00588 if (name) {
00589 fprintf(stderr, "InnoDB: File name %s\n", name);
00590 }
00591
00592 fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
00593 operation);
00594
00595 if (should_exit) {
00596 fprintf(stderr, "InnoDB: Cannot continue operation.\n");
00597
00598 fflush(stderr);
00599
00600 exit(1);
00601 }
00602 }
00603
00604 return(FALSE);
00605 }
00606
00607
00610 static
00611 ibool
00612 os_file_handle_error(
00613
00614 const char* name,
00615 const char* operation)
00616 {
00617
00618 return(os_file_handle_error_cond_exit(name, operation, TRUE));
00619 }
00620
00621
00624 static
00625 ibool
00626 os_file_handle_error_no_exit(
00627
00628 const char* name,
00629 const char* operation)
00630 {
00631
00632 return(os_file_handle_error_cond_exit(name, operation, FALSE));
00633 }
00634
00635 #undef USE_FILE_LOCK
00636 #define USE_FILE_LOCK
00637 #if defined(UNIV_HOTBACKUP) || defined(__WIN__)
00638
00639
00640
00641 # undef USE_FILE_LOCK
00642 #endif
00643 #ifdef USE_FILE_LOCK
00644
00647 static
00648 int
00649 os_file_lock(
00650
00651 int fd,
00652 const char* name)
00653 {
00654 struct flock lk;
00655 lk.l_type = F_WRLCK;
00656 lk.l_whence = SEEK_SET;
00657 lk.l_start = lk.l_len = 0;
00658 if (fcntl(fd, F_SETLK, &lk) == -1) {
00659 fprintf(stderr,
00660 "InnoDB: Unable to lock %s, error: %d\n", name, errno);
00661
00662 if (errno == EAGAIN || errno == EACCES) {
00663 fprintf(stderr,
00664 "InnoDB: Check that you do not already have"
00665 " another drizzled process\n"
00666 "InnoDB: using the same InnoDB data"
00667 " or log files.\n");
00668 }
00669
00670 return(-1);
00671 }
00672
00673 return(0);
00674 }
00675 #endif
00676
00677 #ifndef UNIV_HOTBACKUP
00678
00680 UNIV_INTERN
00681 void
00682 os_io_init_simple(void)
00683
00684 {
00685 ulint i;
00686
00687 os_file_count_mutex = os_mutex_create();
00688
00689 for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
00690 os_file_seek_mutexes[i] = os_mutex_create();
00691 }
00692 }
00693
00694
00698 UNIV_INTERN
00699 FILE*
00700 os_file_create_tmpfile(void)
00701
00702 {
00703 FILE* file = NULL;
00704 int fd = innobase_mysql_tmpfile();
00705
00706 if (fd >= 0) {
00707 file = fdopen(fd, "w+b");
00708 }
00709
00710 if (!file) {
00711 ut_print_timestamp(stderr);
00712 fprintf(stderr,
00713 " InnoDB: Error: unable to create temporary file;"
00714 " errno: %d\n", errno);
00715 if (fd >= 0) {
00716 close(fd);
00717 }
00718 }
00719
00720 return(file);
00721 }
00722 #endif
00723
00724
00730 UNIV_INTERN
00731 os_file_dir_t
00732 os_file_opendir(
00733
00734 const char* dirname,
00736 ibool error_is_fatal)
00741 {
00742 os_file_dir_t dir;
00743 #ifdef __WIN__
00744 LPWIN32_FIND_DATA lpFindFileData;
00745 char path[OS_FILE_MAX_PATH + 3];
00746
00747 ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
00748
00749 strcpy(path, dirname);
00750 strcpy(path + strlen(path), "\\*");
00751
00752
00753
00754
00755
00756 lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
00757
00758 dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
00759
00760 ut_free(lpFindFileData);
00761
00762 if (dir == INVALID_HANDLE_VALUE) {
00763
00764 if (error_is_fatal) {
00765 os_file_handle_error(dirname, "opendir");
00766 }
00767
00768 return(NULL);
00769 }
00770
00771 return(dir);
00772 #else
00773 dir = opendir(dirname);
00774
00775 if (dir == NULL && error_is_fatal) {
00776 os_file_handle_error(dirname, "opendir");
00777 }
00778
00779 return(dir);
00780 #endif
00781 }
00782
00783
00786 UNIV_INTERN
00787 int
00788 os_file_closedir(
00789
00790 os_file_dir_t dir)
00791 {
00792 #ifdef __WIN__
00793 BOOL ret;
00794
00795 ret = FindClose(dir);
00796
00797 if (!ret) {
00798 os_file_handle_error_no_exit(NULL, "closedir");
00799
00800 return(-1);
00801 }
00802
00803 return(0);
00804 #else
00805 int ret;
00806
00807 ret = closedir(dir);
00808
00809 if (ret) {
00810 os_file_handle_error_no_exit(NULL, "closedir");
00811 }
00812
00813 return(ret);
00814 #endif
00815 }
00816
00817
00821 UNIV_INTERN
00822 int
00823 os_file_readdir_next_file(
00824
00825 const char* dirname,
00826 os_file_dir_t dir,
00827 os_file_stat_t* info)
00828 {
00829 #ifdef __WIN__
00830 LPWIN32_FIND_DATA lpFindFileData;
00831 BOOL ret;
00832
00833 lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
00834 next_file:
00835 ret = FindNextFile(dir, lpFindFileData);
00836
00837 if (ret) {
00838 ut_a(strlen((char *) lpFindFileData->cFileName)
00839 < OS_FILE_MAX_PATH);
00840
00841 if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
00842 || strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
00843
00844 goto next_file;
00845 }
00846
00847 strcpy(info->name, (char *) lpFindFileData->cFileName);
00848
00849 info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
00850 + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
00851 << 32);
00852
00853 if (lpFindFileData->dwFileAttributes
00854 & FILE_ATTRIBUTE_REPARSE_POINT) {
00855
00856
00857
00858
00859
00860 info->type = OS_FILE_TYPE_LINK;
00861 } else if (lpFindFileData->dwFileAttributes
00862 & FILE_ATTRIBUTE_DIRECTORY) {
00863 info->type = OS_FILE_TYPE_DIR;
00864 } else {
00865
00866
00867
00868
00869 info->type = OS_FILE_TYPE_FILE;
00870 }
00871 }
00872
00873 ut_free(lpFindFileData);
00874
00875 if (ret) {
00876 return(0);
00877 } else if (GetLastError() == ERROR_NO_MORE_FILES) {
00878
00879 return(1);
00880 } else {
00881 os_file_handle_error_no_exit(dirname,
00882 "readdir_next_file");
00883 return(-1);
00884 }
00885 #else
00886 struct dirent* ent;
00887 char* full_path;
00888 int ret;
00889 struct stat statinfo;
00890 #ifdef HAVE_READDIR_R
00891 char dirent_buf[sizeof(struct dirent)
00892 + _POSIX_PATH_MAX + 100];
00893
00894
00895
00896 #endif
00897
00898 next_file:
00899
00900 #ifdef HAVE_READDIR_R
00901 ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
00902
00903 if (ret != 0
00904 #ifdef UNIV_AIX
00905
00906
00907
00908
00909 && ent != NULL
00910 #endif
00911 ) {
00912 fprintf(stderr,
00913 "InnoDB: cannot read directory %s, error %lu\n",
00914 dirname, (ulong)ret);
00915
00916 return(-1);
00917 }
00918
00919 if (ent == NULL) {
00920
00921
00922 return(1);
00923 }
00924
00925 ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
00926 #else
00927 ent = readdir(dir);
00928
00929 if (ent == NULL) {
00930
00931 return(1);
00932 }
00933 #endif
00934 ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
00935
00936 if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
00937
00938 goto next_file;
00939 }
00940
00941 strcpy(info->name, ent->d_name);
00942
00943 full_path = static_cast<char* >(ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10));
00944
00945 sprintf(full_path, "%s/%s", dirname, ent->d_name);
00946
00947 ret = stat(full_path, &statinfo);
00948
00949 if (ret) {
00950
00951 if (errno == ENOENT) {
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962 ut_free(full_path);
00963
00964 goto next_file;
00965 }
00966
00967 os_file_handle_error_no_exit(full_path, "stat");
00968
00969 ut_free(full_path);
00970
00971 return(-1);
00972 }
00973
00974 info->size = (ib_int64_t)statinfo.st_size;
00975
00976 if (S_ISDIR(statinfo.st_mode)) {
00977 info->type = OS_FILE_TYPE_DIR;
00978 } else if (S_ISLNK(statinfo.st_mode)) {
00979 info->type = OS_FILE_TYPE_LINK;
00980 } else if (S_ISREG(statinfo.st_mode)) {
00981 info->type = OS_FILE_TYPE_FILE;
00982 } else {
00983 info->type = OS_FILE_TYPE_UNKNOWN;
00984 }
00985
00986 ut_free(full_path);
00987
00988 return(0);
00989 #endif
00990 }
00991
00992
00998 UNIV_INTERN
00999 ibool
01000 os_file_create_directory(
01001
01002 const char* pathname,
01004 ibool fail_if_exists)
01006 {
01007 #ifdef __WIN__
01008 BOOL rcode;
01009
01010 rcode = CreateDirectory((LPCTSTR) pathname, NULL);
01011 if (!(rcode != 0
01012 || (GetLastError() == ERROR_ALREADY_EXISTS
01013 && !fail_if_exists))) {
01014
01015 os_file_handle_error(pathname, "CreateDirectory");
01016
01017 return(FALSE);
01018 }
01019
01020 return (TRUE);
01021 #else
01022 int rcode;
01023
01024 rcode = mkdir(pathname, 0770);
01025
01026 if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
01027
01028 os_file_handle_error(pathname, "mkdir");
01029
01030 return(FALSE);
01031 }
01032
01033 return (TRUE);
01034 #endif
01035 }
01036
01037
01043 UNIV_INTERN
01044 os_file_t
01045 os_file_create_simple_func(
01046
01047 const char* name,
01049 ulint create_mode,
01056 ulint access_type,
01058 ibool* success)
01059 {
01060 #ifdef __WIN__
01061 os_file_t file;
01062 DWORD create_flag;
01063 DWORD access;
01064 DWORD attributes = 0;
01065 ibool retry;
01066
01067 try_again:
01068 ut_a(name);
01069
01070 if (create_mode == OS_FILE_OPEN) {
01071 create_flag = OPEN_EXISTING;
01072 } else if (create_mode == OS_FILE_CREATE) {
01073 create_flag = CREATE_NEW;
01074 } else if (create_mode == OS_FILE_CREATE_PATH) {
01075
01076 *success = os_file_create_subdirs_if_needed(name);
01077 if (!*success) {
01078 ut_error;
01079 }
01080 create_flag = CREATE_NEW;
01081 create_mode = OS_FILE_CREATE;
01082 } else {
01083 create_flag = 0;
01084 ut_error;
01085 }
01086
01087 if (access_type == OS_FILE_READ_ONLY) {
01088 access = GENERIC_READ;
01089 } else if (access_type == OS_FILE_READ_WRITE) {
01090 access = GENERIC_READ | GENERIC_WRITE;
01091 } else {
01092 access = 0;
01093 ut_error;
01094 }
01095
01096 file = CreateFile((LPCTSTR) name,
01097 access,
01098 FILE_SHARE_READ | FILE_SHARE_WRITE,
01099
01100
01101 NULL,
01102 create_flag,
01103 attributes,
01104 NULL);
01106 if (file == INVALID_HANDLE_VALUE) {
01107 *success = FALSE;
01108
01109 retry = os_file_handle_error(name,
01110 create_mode == OS_FILE_OPEN ?
01111 "open" : "create");
01112 if (retry) {
01113 goto try_again;
01114 }
01115 } else {
01116 *success = TRUE;
01117 }
01118
01119 return(file);
01120 #else
01121 os_file_t file;
01122 int create_flag;
01123 ibool retry;
01124
01125 try_again:
01126 ut_a(name);
01127
01128 if (create_mode == OS_FILE_OPEN) {
01129 if (access_type == OS_FILE_READ_ONLY) {
01130 create_flag = O_RDONLY;
01131 } else {
01132 create_flag = O_RDWR;
01133 }
01134 } else if (create_mode == OS_FILE_CREATE) {
01135 create_flag = O_RDWR | O_CREAT | O_EXCL;
01136 } else if (create_mode == OS_FILE_CREATE_PATH) {
01137
01138 *success = os_file_create_subdirs_if_needed(name);
01139 if (!*success) {
01140 return (-1);
01141 }
01142 create_flag = O_RDWR | O_CREAT | O_EXCL;
01143 create_mode = OS_FILE_CREATE;
01144 } else {
01145 create_flag = 0;
01146 ut_error;
01147 }
01148
01149 if (create_mode == OS_FILE_CREATE) {
01150 file = open(name, create_flag, S_IRUSR | S_IWUSR
01151 | S_IRGRP | S_IWGRP);
01152 } else {
01153 file = open(name, create_flag);
01154 }
01155
01156 if (file == -1) {
01157 *success = FALSE;
01158
01159 retry = os_file_handle_error(name,
01160 create_mode == OS_FILE_OPEN ?
01161 "open" : "create");
01162 if (retry) {
01163 goto try_again;
01164 }
01165 #ifdef USE_FILE_LOCK
01166 } else if (access_type == OS_FILE_READ_WRITE
01167 && os_file_lock(file, name)) {
01168 *success = FALSE;
01169 close(file);
01170 file = -1;
01171 #endif
01172 } else {
01173 *success = TRUE;
01174 }
01175
01176 return(file);
01177 #endif
01178 }
01179
01180
01186 UNIV_INTERN
01187 os_file_t
01188 os_file_create_simple_no_error_handling_func(
01189
01190 const char* name,
01192 ulint create_mode,
01196 ulint access_type,
01200 ibool* success)
01201 {
01202 #ifdef __WIN__
01203 os_file_t file;
01204 DWORD create_flag;
01205 DWORD access;
01206 DWORD attributes = 0;
01207 DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
01208
01209 ut_a(name);
01210
01211 if (create_mode == OS_FILE_OPEN) {
01212 create_flag = OPEN_EXISTING;
01213 } else if (create_mode == OS_FILE_CREATE) {
01214 create_flag = CREATE_NEW;
01215 } else {
01216 create_flag = 0;
01217 ut_error;
01218 }
01219
01220 if (access_type == OS_FILE_READ_ONLY) {
01221 access = GENERIC_READ;
01222 } else if (access_type == OS_FILE_READ_WRITE) {
01223 access = GENERIC_READ | GENERIC_WRITE;
01224 } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
01225 access = GENERIC_READ;
01226 share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
01227 | FILE_SHARE_WRITE;
01231 } else {
01232 access = 0;
01233 ut_error;
01234 }
01235
01236 file = CreateFile((LPCTSTR) name,
01237 access,
01238 share_mode,
01239 NULL,
01240 create_flag,
01241 attributes,
01242 NULL);
01244 if (file == INVALID_HANDLE_VALUE) {
01245 *success = FALSE;
01246 } else {
01247 *success = TRUE;
01248 }
01249
01250 return(file);
01251 #else
01252 os_file_t file;
01253 int create_flag;
01254
01255 ut_a(name);
01256
01257 if (create_mode == OS_FILE_OPEN) {
01258 if (access_type == OS_FILE_READ_ONLY) {
01259 create_flag = O_RDONLY;
01260 } else {
01261 create_flag = O_RDWR;
01262 }
01263 } else if (create_mode == OS_FILE_CREATE) {
01264 create_flag = O_RDWR | O_CREAT | O_EXCL;
01265 } else {
01266 create_flag = 0;
01267 ut_error;
01268 }
01269
01270 if (create_mode == OS_FILE_CREATE) {
01271 file = open(name, create_flag, S_IRUSR | S_IWUSR
01272 | S_IRGRP | S_IWGRP);
01273 } else {
01274 file = open(name, create_flag);
01275 }
01276
01277 if (file == -1) {
01278 *success = FALSE;
01279 #ifdef USE_FILE_LOCK
01280 } else if (access_type == OS_FILE_READ_WRITE
01281 && os_file_lock(file, name)) {
01282 *success = FALSE;
01283 close(file);
01284 file = -1;
01285 #endif
01286 } else {
01287 *success = TRUE;
01288 }
01289
01290 return(file);
01291 #endif
01292 }
01293
01294
01296 UNIV_INTERN
01297 void
01298 os_file_set_nocache(
01299
01300 int fd,
01301 const char* file_name,
01302 const char* operation_name)
01305 {
01306
01307 #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
01308 if (directio(fd, DIRECTIO_ON) == -1) {
01309 int errno_save;
01310 errno_save = (int)errno;
01311 ut_print_timestamp(stderr);
01312 fprintf(stderr,
01313 " InnoDB: Failed to set DIRECTIO_ON "
01314 "on file %s: %s: %s, continuing anyway\n",
01315 file_name, operation_name, strerror(errno_save));
01316 }
01317 #elif defined(O_DIRECT)
01318 if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
01319 int errno_save;
01320 errno_save = (int)errno;
01321 ut_print_timestamp(stderr);
01322 fprintf(stderr,
01323 " InnoDB: Failed to set O_DIRECT "
01324 "on file %s: %s: %s, continuing anyway\n",
01325 file_name, operation_name, strerror(errno_save));
01326 if (errno_save == EINVAL) {
01327 ut_print_timestamp(stderr);
01328 fprintf(stderr,
01329 " InnoDB: O_DIRECT is known to result in "
01330 "'Invalid argument' on Linux on tmpfs, "
01331 "see MySQL Bug#26662\n");
01332 }
01333 }
01334 #else
01335 (void)fd;
01336 (void)file_name;
01337 (void)operation_name;
01338 #endif
01339 }
01340
01341
01347 UNIV_INTERN
01348 os_file_t
01349 os_file_create_func(
01350
01351 const char* name,
01353 ulint create_mode,
01361 ulint purpose,
01368 ulint type,
01369 ibool* success)
01370 {
01371 #ifdef __WIN__
01372 os_file_t file;
01373 DWORD share_mode = FILE_SHARE_READ;
01374 DWORD create_flag;
01375 DWORD attributes;
01376 ibool retry;
01377 try_again:
01378 ut_a(name);
01379
01380 if (create_mode == OS_FILE_OPEN_RAW) {
01381 create_flag = OPEN_EXISTING;
01382 share_mode = FILE_SHARE_WRITE;
01383 } else if (create_mode == OS_FILE_OPEN
01384 || create_mode == OS_FILE_OPEN_RETRY) {
01385 create_flag = OPEN_EXISTING;
01386 } else if (create_mode == OS_FILE_CREATE) {
01387 create_flag = CREATE_NEW;
01388 } else if (create_mode == OS_FILE_OVERWRITE) {
01389 create_flag = CREATE_ALWAYS;
01390 } else {
01391 create_flag = 0;
01392 ut_error;
01393 }
01394
01395 if (purpose == OS_FILE_AIO) {
01396
01397
01398 attributes = 0;
01399 #ifdef WIN_ASYNC_IO
01400 if (srv_use_native_aio) {
01401 attributes = attributes | FILE_FLAG_OVERLAPPED;
01402 }
01403 #endif
01404 #ifdef UNIV_NON_BUFFERED_IO
01405 # ifndef UNIV_HOTBACKUP
01406 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
01407
01408
01409
01410 } else if (srv_win_file_flush_method
01411 == SRV_WIN_IO_UNBUFFERED) {
01412 attributes = attributes | FILE_FLAG_NO_BUFFERING;
01413 }
01414 # else
01415 attributes = attributes | FILE_FLAG_NO_BUFFERING;
01416 # endif
01417 #endif
01418 } else if (purpose == OS_FILE_NORMAL) {
01419 attributes = 0;
01420 #ifdef UNIV_NON_BUFFERED_IO
01421 # ifndef UNIV_HOTBACKUP
01422 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
01423
01424
01425
01426 } else if (srv_win_file_flush_method
01427 == SRV_WIN_IO_UNBUFFERED) {
01428 attributes = attributes | FILE_FLAG_NO_BUFFERING;
01429 }
01430 # else
01431 attributes = attributes | FILE_FLAG_NO_BUFFERING;
01432 # endif
01433 #endif
01434 } else {
01435 attributes = 0;
01436 ut_error;
01437 }
01438
01439 file = CreateFile((LPCTSTR) name,
01440 GENERIC_READ | GENERIC_WRITE,
01441
01442 share_mode,
01443
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453 NULL,
01454 create_flag,
01455 attributes,
01456 NULL);
01458 if (file == INVALID_HANDLE_VALUE) {
01459 *success = FALSE;
01460
01461
01462
01463
01464
01465
01466
01467
01468 if (srv_file_per_table) {
01469 retry = os_file_handle_error_no_exit(name,
01470 create_mode == OS_FILE_CREATE ?
01471 "create" : "open");
01472 } else {
01473 retry = os_file_handle_error(name,
01474 create_mode == OS_FILE_CREATE ?
01475 "create" : "open");
01476 }
01477
01478 if (retry) {
01479 goto try_again;
01480 }
01481 } else {
01482 *success = TRUE;
01483 }
01484
01485 return(file);
01486 #else
01487 os_file_t file;
01488 int create_flag;
01489 ibool retry;
01490 const char* mode_str = NULL;
01491
01492 try_again:
01493 ut_a(name);
01494
01495 if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
01496 || create_mode == OS_FILE_OPEN_RETRY) {
01497 mode_str = "OPEN";
01498 create_flag = O_RDWR;
01499 } else if (create_mode == OS_FILE_CREATE) {
01500 mode_str = "CREATE";
01501 create_flag = O_RDWR | O_CREAT | O_EXCL;
01502 } else if (create_mode == OS_FILE_OVERWRITE) {
01503 mode_str = "OVERWRITE";
01504 create_flag = O_RDWR | O_CREAT | O_TRUNC;
01505 } else {
01506 create_flag = 0;
01507 ut_error;
01508 }
01509
01510 ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
01511 ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
01512
01513 #ifdef O_SYNC
01514
01515
01516
01517 if (type == OS_LOG_FILE
01518 && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
01519
01520 # if 0
01521 fprintf(stderr, "Using O_SYNC for file %s\n", name);
01522 # endif
01523
01524 create_flag = create_flag | O_SYNC;
01525 }
01526 #endif
01527
01528 file = open(name, create_flag, os_innodb_umask);
01529
01530 if (file == -1) {
01531 *success = FALSE;
01532
01533
01534
01535
01536
01537
01538
01539
01540 if (srv_file_per_table) {
01541 retry = os_file_handle_error_no_exit(name,
01542 create_mode == OS_FILE_CREATE ?
01543 "create" : "open");
01544 } else {
01545 retry = os_file_handle_error(name,
01546 create_mode == OS_FILE_CREATE ?
01547 "create" : "open");
01548 }
01549
01550 if (retry) {
01551 goto try_again;
01552 } else {
01553 return(file );
01554 }
01555 }
01556
01557
01558 *success = TRUE;
01559
01560
01561 if (type != OS_LOG_FILE
01562 && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
01563
01564 os_file_set_nocache(file, name, mode_str);
01565 }
01566
01567 #ifdef USE_FILE_LOCK
01568 if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
01569
01570 if (create_mode == OS_FILE_OPEN_RETRY) {
01571 int i;
01572 ut_print_timestamp(stderr);
01573 fputs(" InnoDB: Retrying to lock"
01574 " the first data file\n",
01575 stderr);
01576 for (i = 0; i < 100; i++) {
01577 os_thread_sleep(1000000);
01578 if (!os_file_lock(file, name)) {
01579 *success = TRUE;
01580 return(file);
01581 }
01582 }
01583 ut_print_timestamp(stderr);
01584 fputs(" InnoDB: Unable to open the first data file\n",
01585 stderr);
01586 }
01587
01588 *success = FALSE;
01589 close(file);
01590 file = -1;
01591 }
01592 #endif
01593
01594 return(file);
01595 #endif
01596 }
01597
01598
01601 UNIV_INTERN
01602 ibool
01603 os_file_delete_if_exists(
01604
01605 const char* name)
01606 {
01607 #ifdef __WIN__
01608 BOOL ret;
01609 ulint count = 0;
01610 loop:
01611
01612
01613
01614 ret = DeleteFile((LPCTSTR)name);
01615
01616 if (ret) {
01617 return(TRUE);
01618 }
01619
01620 if (GetLastError() == ERROR_FILE_NOT_FOUND) {
01621
01622
01623 return(TRUE);
01624 }
01625
01626 count++;
01627
01628 if (count > 100 && 0 == (count % 10)) {
01629 fprintf(stderr,
01630 "InnoDB: Warning: cannot delete file %s\n"
01631 "InnoDB: Are you running ibbackup"
01632 " to back up the file?\n", name);
01633
01634 os_file_get_last_error(TRUE);
01635 }
01636
01637 os_thread_sleep(1000000);
01638
01639 if (count > 2000) {
01640
01641 return(FALSE);
01642 }
01643
01644 goto loop;
01645 #else
01646 int ret;
01647
01648 ret = unlink(name);
01649
01650 if (ret != 0 && errno != ENOENT) {
01651 os_file_handle_error_no_exit(name, "delete");
01652
01653 return(FALSE);
01654 }
01655
01656 return(TRUE);
01657 #endif
01658 }
01659
01660
01663 UNIV_INTERN
01664 ibool
01665 os_file_delete(
01666
01667 const char* name)
01668 {
01669 #ifdef __WIN__
01670 BOOL ret;
01671 ulint count = 0;
01672 loop:
01673
01674
01675
01676 ret = DeleteFile((LPCTSTR)name);
01677
01678 if (ret) {
01679 return(TRUE);
01680 }
01681
01682 if (GetLastError() == ERROR_FILE_NOT_FOUND) {
01683
01684
01685
01686 return(FALSE);
01687 }
01688
01689 count++;
01690
01691 if (count > 100 && 0 == (count % 10)) {
01692 fprintf(stderr,
01693 "InnoDB: Warning: cannot delete file %s\n"
01694 "InnoDB: Are you running ibbackup"
01695 " to back up the file?\n", name);
01696
01697 os_file_get_last_error(TRUE);
01698 }
01699
01700 os_thread_sleep(1000000);
01701
01702 if (count > 2000) {
01703
01704 return(FALSE);
01705 }
01706
01707 goto loop;
01708 #else
01709 int ret;
01710
01711 ret = unlink(name);
01712
01713 if (ret != 0) {
01714 os_file_handle_error_no_exit(name, "delete");
01715
01716 return(FALSE);
01717 }
01718
01719 return(TRUE);
01720 #endif
01721 }
01722
01723
01728 UNIV_INTERN
01729 ibool
01730 os_file_rename_func(
01731
01732 const char* oldpath,
01734 const char* newpath)
01735 {
01736 #ifdef __WIN__
01737 BOOL ret;
01738
01739 ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
01740
01741 if (ret) {
01742 return(TRUE);
01743 }
01744
01745 os_file_handle_error_no_exit(oldpath, "rename");
01746
01747 return(FALSE);
01748 #else
01749 int ret;
01750
01751 ret = rename(oldpath, newpath);
01752
01753 if (ret != 0) {
01754 os_file_handle_error_no_exit(oldpath, "rename");
01755
01756 return(FALSE);
01757 }
01758
01759 return(TRUE);
01760 #endif
01761 }
01762
01763
01768 UNIV_INTERN
01769 ibool
01770 os_file_close_func(
01771
01772 os_file_t file)
01773 {
01774 #ifdef __WIN__
01775 BOOL ret;
01776
01777 ut_a(file);
01778
01779 ret = CloseHandle(file);
01780
01781 if (ret) {
01782 return(TRUE);
01783 }
01784
01785 os_file_handle_error(NULL, "close");
01786
01787 return(FALSE);
01788 #else
01789 int ret;
01790
01791 ret = close(file);
01792
01793 if (ret == -1) {
01794 os_file_handle_error(NULL, "close");
01795
01796 return(FALSE);
01797 }
01798
01799 return(TRUE);
01800 #endif
01801 }
01802
01803 #ifdef UNIV_HOTBACKUP
01804
01807 UNIV_INTERN
01808 ibool
01809 os_file_close_no_error_handling(
01810
01811 os_file_t file)
01812 {
01813 #ifdef __WIN__
01814 BOOL ret;
01815
01816 ut_a(file);
01817
01818 ret = CloseHandle(file);
01819
01820 if (ret) {
01821 return(TRUE);
01822 }
01823
01824 return(FALSE);
01825 #else
01826 int ret;
01827
01828 ret = close(file);
01829
01830 if (ret == -1) {
01831
01832 return(FALSE);
01833 }
01834
01835 return(TRUE);
01836 #endif
01837 }
01838 #endif
01839
01840
01843 UNIV_INTERN
01844 ibool
01845 os_file_get_size(
01846
01847 os_file_t file,
01848 ulint* size,
01850 ulint* size_high)
01851 {
01852 #ifdef __WIN__
01853 DWORD high;
01854 DWORD low;
01855
01856 low = GetFileSize(file, &high);
01857
01858 if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
01859 return(FALSE);
01860 }
01861
01862 *size = low;
01863 *size_high = high;
01864
01865 return(TRUE);
01866 #else
01867 off_t offs;
01868
01869 offs = lseek(file, 0, SEEK_END);
01870
01871 if (offs == ((off_t)-1)) {
01872
01873 return(FALSE);
01874 }
01875
01876 if (sizeof(off_t) > 4) {
01877 *size = (ulint)(offs & 0xFFFFFFFFUL);
01878 *size_high = (ulint)(offs >> 32);
01879 } else {
01880 *size = (ulint) offs;
01881 *size_high = 0;
01882 }
01883
01884 return(TRUE);
01885 #endif
01886 }
01887
01888
01891 UNIV_INTERN
01892 ib_int64_t
01893 os_file_get_size_as_iblonglong(
01894
01895 os_file_t file)
01896 {
01897 ulint size;
01898 ulint size_high;
01899 ibool success;
01900
01901 success = os_file_get_size(file, &size, &size_high);
01902
01903 if (!success) {
01904
01905 return(-1);
01906 }
01907
01908 return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
01909 }
01910
01911
01914 UNIV_INTERN
01915 ibool
01916 os_file_set_size(
01917
01918 const char* name,
01920 os_file_t file,
01921 ulint size,
01923 ulint size_high)
01924 {
01925 ib_int64_t current_size;
01926 ib_int64_t desired_size;
01927 ibool ret;
01928 byte* buf;
01929 byte* buf2;
01930 ulint buf_size;
01931
01932 ut_a(size == (size & 0xFFFFFFFF));
01933
01934 current_size = 0;
01935 desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
01936
01937
01938 buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
01939 * UNIV_PAGE_SIZE;
01940 buf2 = static_cast<unsigned char *>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
01941
01942
01943 buf = static_cast<unsigned char *>(ut_align(buf2, UNIV_PAGE_SIZE));
01944
01945
01946 memset(buf, 0, buf_size);
01947
01948 if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
01949
01950 fprintf(stderr, "InnoDB: Progress in MB:");
01951 }
01952
01953 while (current_size < desired_size) {
01954 ulint n_bytes;
01955
01956 if (desired_size - current_size < (ib_int64_t) buf_size) {
01957 n_bytes = (ulint) (desired_size - current_size);
01958 } else {
01959 n_bytes = buf_size;
01960 }
01961
01962 ret = os_file_write(name, file, buf,
01963 (ulint)(current_size & 0xFFFFFFFF),
01964 (ulint)(current_size >> 32),
01965 n_bytes);
01966 if (!ret) {
01967 ut_free(buf2);
01968 goto error_handling;
01969 }
01970
01971
01972 if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
01973 != current_size / (ib_int64_t)(100 * 1024 * 1024)) {
01974
01975 fprintf(stderr, " %lu00",
01976 (ulong) ((current_size + n_bytes)
01977 / (ib_int64_t)(100 * 1024 * 1024)));
01978 }
01979
01980 current_size += n_bytes;
01981 }
01982
01983 if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
01984
01985 fprintf(stderr, "\n");
01986 }
01987
01988 ut_free(buf2);
01989
01990 ret = os_file_flush(file);
01991
01992 if (ret) {
01993 return(TRUE);
01994 }
01995
01996 error_handling:
01997 return(FALSE);
01998 }
01999
02000
02003 UNIV_INTERN
02004 ibool
02005 os_file_set_eof(
02006
02007 FILE* file)
02008 {
02009 #ifdef __WIN__
02010 HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
02011 return(SetEndOfFile(h));
02012 #else
02013 return(!ftruncate(fileno(file), ftell(file)));
02014 #endif
02015 }
02016
02017 #ifndef __WIN__
02018
02024 static
02025 int
02026 os_file_fsync(
02027
02028 os_file_t file)
02029 {
02030 int ret;
02031 int failures;
02032 ibool retry;
02033
02034 failures = 0;
02035
02036 do {
02037 ret = fsync(file);
02038
02039 os_n_fsyncs++;
02040
02041 if (ret == -1 && errno == ENOLCK) {
02042
02043 if (failures % 100 == 0) {
02044
02045 ut_print_timestamp(stderr);
02046 fprintf(stderr,
02047 " InnoDB: fsync(): "
02048 "No locks available; retrying\n");
02049 }
02050
02051 os_thread_sleep(200000 );
02052
02053 failures++;
02054
02055 retry = TRUE;
02056 } else {
02057
02058 retry = FALSE;
02059 }
02060 } while (retry);
02061
02062 return(ret);
02063 }
02064 #endif
02065
02066
02070 UNIV_INTERN
02071 ibool
02072 os_file_flush_func(
02073
02074 os_file_t file)
02075 {
02076 #ifdef __WIN__
02077 BOOL ret;
02078
02079 ut_a(file);
02080
02081 os_n_fsyncs++;
02082
02083 ret = FlushFileBuffers(file);
02084
02085 if (ret) {
02086 return(TRUE);
02087 }
02088
02089
02090
02091
02092
02093 if (srv_start_raw_disk_in_use && GetLastError()
02094 == ERROR_INVALID_FUNCTION) {
02095 return(TRUE);
02096 }
02097
02098 os_file_handle_error(NULL, "flush");
02099
02100
02101
02102 ut_error;
02103
02104 return(FALSE);
02105 #else
02106 int ret;
02107
02108 #if defined(HAVE_DARWIN_THREADS)
02109 # ifndef F_FULLFSYNC
02110
02111 # define F_FULLFSYNC 51
02112 # elif F_FULLFSYNC != 51
02113 # error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
02114 # endif
02115
02116
02117
02118
02119
02120 if (!srv_have_fullfsync) {
02121
02122
02123
02124 ret = os_file_fsync(file);
02125 } else {
02126 ret = fcntl(file, F_FULLFSYNC, NULL);
02127
02128 if (ret) {
02129
02130
02131 ret = os_file_fsync(file);
02132 }
02133 }
02134 #else
02135 ret = os_file_fsync(file);
02136 #endif
02137
02138 if (ret == 0) {
02139 return(TRUE);
02140 }
02141
02142
02143
02144
02145 if (srv_start_raw_disk_in_use && errno == EINVAL) {
02146
02147 return(TRUE);
02148 }
02149
02150 ut_print_timestamp(stderr);
02151
02152 fprintf(stderr,
02153 " InnoDB: Error: the OS said file flush did not succeed\n");
02154
02155 os_file_handle_error(NULL, "flush");
02156
02157
02158
02159 ut_error;
02160
02161 return(FALSE);
02162 #endif
02163 }
02164
02165 #ifndef __WIN__
02166
02169 static
02170 ssize_t
02171 os_file_pread(
02172
02173 os_file_t file,
02174 void* buf,
02175 ulint n,
02176 ulint offset,
02178 ulint offset_high)
02180 {
02181 off_t offs;
02182 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
02183 ssize_t n_bytes;
02184 #endif
02185
02186 ut_a((offset & 0xFFFFFFFFUL) == offset);
02187
02188
02189
02190
02191 if (sizeof(off_t) > 4) {
02192 offs = (off_t)offset + (((off_t)offset_high) << 32);
02193
02194 } else {
02195 offs = (off_t)offset;
02196
02197 if (offset_high > 0) {
02198 fprintf(stderr,
02199 "InnoDB: Error: file read at offset > 4 GB\n");
02200 }
02201 }
02202
02203 os_n_file_reads++;
02204
02205 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
02206 os_mutex_enter(os_file_count_mutex);
02207 os_file_n_pending_preads++;
02208 os_n_pending_reads++;
02209 os_mutex_exit(os_file_count_mutex);
02210
02211 n_bytes = pread(file, buf, (ssize_t)n, offs);
02212
02213 os_mutex_enter(os_file_count_mutex);
02214 os_file_n_pending_preads--;
02215 os_n_pending_reads--;
02216 os_mutex_exit(os_file_count_mutex);
02217
02218 return(n_bytes);
02219 #else
02220 {
02221 off_t ret_offset;
02222 ssize_t ret;
02223 #ifndef UNIV_HOTBACKUP
02224 ulint i;
02225 #endif
02226
02227 os_mutex_enter(os_file_count_mutex);
02228 os_n_pending_reads++;
02229 os_mutex_exit(os_file_count_mutex);
02230
02231 #ifndef UNIV_HOTBACKUP
02232
02233 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
02234
02235 os_mutex_enter(os_file_seek_mutexes[i]);
02236 #endif
02237
02238 ret_offset = lseek(file, offs, SEEK_SET);
02239
02240 if (ret_offset < 0) {
02241 ret = -1;
02242 } else {
02243 ret = read(file, buf, (ssize_t)n);
02244 }
02245
02246 #ifndef UNIV_HOTBACKUP
02247 os_mutex_exit(os_file_seek_mutexes[i]);
02248 #endif
02249
02250 os_mutex_enter(os_file_count_mutex);
02251 os_n_pending_reads--;
02252 os_mutex_exit(os_file_count_mutex);
02253
02254 return(ret);
02255 }
02256 #endif
02257 }
02258
02259
02262 static
02263 ssize_t
02264 os_file_pwrite(
02265
02266 os_file_t file,
02267 const void* buf,
02268 ulint n,
02269 ulint offset,
02271 ulint offset_high)
02273 {
02274 ssize_t ret;
02275 off_t offs;
02276
02277 ut_a((offset & 0xFFFFFFFFUL) == offset);
02278
02279
02280
02281
02282 if (sizeof(off_t) > 4) {
02283 offs = (off_t)offset + (((off_t)offset_high) << 32);
02284 } else {
02285 offs = (off_t)offset;
02286
02287 if (offset_high > 0) {
02288 fprintf(stderr,
02289 "InnoDB: Error: file write"
02290 " at offset > 4 GB\n");
02291 }
02292 }
02293
02294 os_n_file_writes++;
02295
02296 #if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
02297 os_mutex_enter(os_file_count_mutex);
02298 os_file_n_pending_pwrites++;
02299 os_n_pending_writes++;
02300 os_mutex_exit(os_file_count_mutex);
02301
02302 ret = pwrite(file, buf, (ssize_t)n, offs);
02303
02304 os_mutex_enter(os_file_count_mutex);
02305 os_file_n_pending_pwrites--;
02306 os_n_pending_writes--;
02307 os_mutex_exit(os_file_count_mutex);
02308
02309 # ifdef UNIV_DO_FLUSH
02310 if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
02311 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
02312 && !os_do_not_call_flush_at_each_write) {
02313
02314
02315
02316
02317
02318 ut_a(TRUE == os_file_flush(file));
02319 }
02320 # endif
02321
02322 return(ret);
02323 #else
02324 {
02325 off_t ret_offset;
02326 # ifndef UNIV_HOTBACKUP
02327 ulint i;
02328 # endif
02329
02330 os_mutex_enter(os_file_count_mutex);
02331 os_n_pending_writes++;
02332 os_mutex_exit(os_file_count_mutex);
02333
02334 # ifndef UNIV_HOTBACKUP
02335
02336 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
02337
02338 os_mutex_enter(os_file_seek_mutexes[i]);
02339 # endif
02340
02341 ret_offset = lseek(file, offs, SEEK_SET);
02342
02343 if (ret_offset < 0) {
02344 ret = -1;
02345
02346 goto func_exit;
02347 }
02348
02349 ret = write(file, buf, (ssize_t)n);
02350
02351 # ifdef UNIV_DO_FLUSH
02352 if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
02353 && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
02354 && !os_do_not_call_flush_at_each_write) {
02355
02356
02357
02358
02359
02360 ut_a(TRUE == os_file_flush(file));
02361 }
02362 # endif
02363
02364 func_exit:
02365 # ifndef UNIV_HOTBACKUP
02366 os_mutex_exit(os_file_seek_mutexes[i]);
02367 # endif
02368
02369 os_mutex_enter(os_file_count_mutex);
02370 os_n_pending_writes--;
02371 os_mutex_exit(os_file_count_mutex);
02372
02373 return(ret);
02374 }
02375 #endif
02376 }
02377 #endif
02378
02379
02384 UNIV_INTERN
02385 ibool
02386 os_file_read_func(
02387
02388 os_file_t file,
02389 void* buf,
02390 ulint offset,
02392 ulint offset_high,
02394 ulint n)
02395 {
02396 #ifdef __WIN__
02397 BOOL ret;
02398 DWORD len;
02399 DWORD ret2;
02400 DWORD low;
02401 DWORD high;
02402 ibool retry;
02403 #ifndef UNIV_HOTBACKUP
02404 ulint i;
02405 #endif
02406
02407
02408
02409 ut_a((offset & 0xFFFFFFFFUL) == offset);
02410 ut_a((n & 0xFFFFFFFFUL) == n);
02411
02412 os_n_file_reads++;
02413 os_bytes_read_since_printout += n;
02414
02415 try_again:
02416 ut_ad(file);
02417 ut_ad(buf);
02418 ut_ad(n > 0);
02419
02420 low = (DWORD) offset;
02421 high = (DWORD) offset_high;
02422
02423 os_mutex_enter(os_file_count_mutex);
02424 os_n_pending_reads++;
02425 os_mutex_exit(os_file_count_mutex);
02426
02427 #ifndef UNIV_HOTBACKUP
02428
02429 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
02430
02431 os_mutex_enter(os_file_seek_mutexes[i]);
02432 #endif
02433
02434 ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
02435
02436 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
02437
02438 #ifndef UNIV_HOTBACKUP
02439 os_mutex_exit(os_file_seek_mutexes[i]);
02440 #endif
02441
02442 os_mutex_enter(os_file_count_mutex);
02443 os_n_pending_reads--;
02444 os_mutex_exit(os_file_count_mutex);
02445
02446 goto error_handling;
02447 }
02448
02449 ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
02450
02451 #ifndef UNIV_HOTBACKUP
02452 os_mutex_exit(os_file_seek_mutexes[i]);
02453 #endif
02454
02455 os_mutex_enter(os_file_count_mutex);
02456 os_n_pending_reads--;
02457 os_mutex_exit(os_file_count_mutex);
02458
02459 if (ret && len == n) {
02460 return(TRUE);
02461 }
02462 #else
02463 ibool retry;
02464 ssize_t ret;
02465
02466 os_bytes_read_since_printout += n;
02467
02468 try_again:
02469 ret = os_file_pread(file, buf, n, offset, offset_high);
02470
02471 if ((ulint)ret == n) {
02472
02473 return(TRUE);
02474 }
02475
02476 fprintf(stderr,
02477 "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
02478 "InnoDB: Was only able to read %ld.\n",
02479 (ulong)n, (ulong)offset_high,
02480 (ulong)offset, (long)ret);
02481 #endif
02482 #ifdef __WIN__
02483 error_handling:
02484 #endif
02485 retry = os_file_handle_error(NULL, "read");
02486
02487 if (retry) {
02488 goto try_again;
02489 }
02490
02491 fprintf(stderr,
02492 "InnoDB: Fatal error: cannot read from file."
02493 " OS error number %lu.\n",
02494 #ifdef __WIN__
02495 (ulong) GetLastError()
02496 #else
02497 (ulong) errno
02498 #endif
02499 );
02500 fflush(stderr);
02501
02502 ut_error;
02503
02504 return(FALSE);
02505 }
02506
02507
02513 UNIV_INTERN
02514 ibool
02515 os_file_read_no_error_handling_func(
02516
02517 os_file_t file,
02518 void* buf,
02519 ulint offset,
02521 ulint offset_high,
02523 ulint n)
02524 {
02525 #ifdef __WIN__
02526 BOOL ret;
02527 DWORD len;
02528 DWORD ret2;
02529 DWORD low;
02530 DWORD high;
02531 ibool retry;
02532 #ifndef UNIV_HOTBACKUP
02533 ulint i;
02534 #endif
02535
02536
02537
02538 ut_a((offset & 0xFFFFFFFFUL) == offset);
02539 ut_a((n & 0xFFFFFFFFUL) == n);
02540
02541 os_n_file_reads++;
02542 os_bytes_read_since_printout += n;
02543
02544 try_again:
02545 ut_ad(file);
02546 ut_ad(buf);
02547 ut_ad(n > 0);
02548
02549 low = (DWORD) offset;
02550 high = (DWORD) offset_high;
02551
02552 os_mutex_enter(os_file_count_mutex);
02553 os_n_pending_reads++;
02554 os_mutex_exit(os_file_count_mutex);
02555
02556 #ifndef UNIV_HOTBACKUP
02557
02558 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
02559
02560 os_mutex_enter(os_file_seek_mutexes[i]);
02561 #endif
02562
02563 ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
02564
02565 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
02566
02567 #ifndef UNIV_HOTBACKUP
02568 os_mutex_exit(os_file_seek_mutexes[i]);
02569 #endif
02570
02571 os_mutex_enter(os_file_count_mutex);
02572 os_n_pending_reads--;
02573 os_mutex_exit(os_file_count_mutex);
02574
02575 goto error_handling;
02576 }
02577
02578 ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
02579
02580 #ifndef UNIV_HOTBACKUP
02581 os_mutex_exit(os_file_seek_mutexes[i]);
02582 #endif
02583
02584 os_mutex_enter(os_file_count_mutex);
02585 os_n_pending_reads--;
02586 os_mutex_exit(os_file_count_mutex);
02587
02588 if (ret && len == n) {
02589 return(TRUE);
02590 }
02591 #else
02592 ibool retry;
02593 ssize_t ret;
02594
02595 os_bytes_read_since_printout += n;
02596
02597 try_again:
02598 ret = os_file_pread(file, buf, n, offset, offset_high);
02599
02600 if ((ulint)ret == n) {
02601
02602 return(TRUE);
02603 }
02604 #endif
02605 #ifdef __WIN__
02606 error_handling:
02607 #endif
02608 retry = os_file_handle_error_no_exit(NULL, "read");
02609
02610 if (retry) {
02611 goto try_again;
02612 }
02613
02614 return(FALSE);
02615 }
02616
02617
02621 UNIV_INTERN
02622 void
02623 os_file_read_string(
02624
02625 FILE* file,
02626 char* str,
02627 ulint size)
02628 {
02629 size_t flen;
02630
02631 if (size == 0) {
02632 return;
02633 }
02634
02635 rewind(file);
02636 flen = fread(str, 1, size - 1, file);
02637 str[flen] = '\0';
02638 }
02639
02640
02645 UNIV_INTERN
02646 ibool
02647 os_file_write_func(
02648
02649 const char* name,
02651 os_file_t file,
02652 const void* buf,
02653 ulint offset,
02655 ulint offset_high,
02657 ulint n)
02658 {
02659 #ifdef __WIN__
02660 BOOL ret;
02661 DWORD len;
02662 DWORD ret2;
02663 DWORD low;
02664 DWORD high;
02665 ulint n_retries = 0;
02666 ulint err;
02667 #ifndef UNIV_HOTBACKUP
02668 ulint i;
02669 #endif
02670
02671
02672
02673 ut_a((offset & 0xFFFFFFFFUL) == offset);
02674 ut_a((n & 0xFFFFFFFFUL) == n);
02675
02676 os_n_file_writes++;
02677
02678 ut_ad(file);
02679 ut_ad(buf);
02680 ut_ad(n > 0);
02681 retry:
02682 low = (DWORD) offset;
02683 high = (DWORD) offset_high;
02684
02685 os_mutex_enter(os_file_count_mutex);
02686 os_n_pending_writes++;
02687 os_mutex_exit(os_file_count_mutex);
02688
02689 #ifndef UNIV_HOTBACKUP
02690
02691 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
02692
02693 os_mutex_enter(os_file_seek_mutexes[i]);
02694 #endif
02695
02696 ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
02697
02698 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
02699
02700 #ifndef UNIV_HOTBACKUP
02701 os_mutex_exit(os_file_seek_mutexes[i]);
02702 #endif
02703
02704 os_mutex_enter(os_file_count_mutex);
02705 os_n_pending_writes--;
02706 os_mutex_exit(os_file_count_mutex);
02707
02708 ut_print_timestamp(stderr);
02709
02710 fprintf(stderr,
02711 " InnoDB: Error: File pointer positioning to"
02712 " file %s failed at\n"
02713 "InnoDB: offset %lu %lu. Operating system"
02714 " error number %lu.\n"
02715 "InnoDB: Some operating system error numbers"
02716 " are described at\n"
02717 "InnoDB: "
02718 REFMAN "operating-system-error-codes.html\n",
02719 name, (ulong) offset_high, (ulong) offset,
02720 (ulong) GetLastError());
02721
02722 return(FALSE);
02723 }
02724
02725 ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
02726
02727
02728
02729
02730 # ifdef UNIV_DO_FLUSH
02731 if (!os_do_not_call_flush_at_each_write) {
02732 ut_a(TRUE == os_file_flush(file));
02733 }
02734 # endif
02735
02736 #ifndef UNIV_HOTBACKUP
02737 os_mutex_exit(os_file_seek_mutexes[i]);
02738 #endif
02739
02740 os_mutex_enter(os_file_count_mutex);
02741 os_n_pending_writes--;
02742 os_mutex_exit(os_file_count_mutex);
02743
02744 if (ret && len == n) {
02745
02746 return(TRUE);
02747 }
02748
02749
02750
02751
02752
02753 if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
02754
02755 os_thread_sleep(1000000);
02756
02757 n_retries++;
02758
02759 goto retry;
02760 }
02761
02762 if (!os_has_said_disk_full) {
02763
02764 err = (ulint)GetLastError();
02765
02766 ut_print_timestamp(stderr);
02767
02768 fprintf(stderr,
02769 " InnoDB: Error: Write to file %s failed"
02770 " at offset %lu %lu.\n"
02771 "InnoDB: %lu bytes should have been written,"
02772 " only %lu were written.\n"
02773 "InnoDB: Operating system error number %lu.\n"
02774 "InnoDB: Check that your OS and file system"
02775 " support files of this size.\n"
02776 "InnoDB: Check also that the disk is not full"
02777 " or a disk quota exceeded.\n",
02778 name, (ulong) offset_high, (ulong) offset,
02779 (ulong) n, (ulong) len, (ulong) err);
02780
02781 if (strerror((int)err) != NULL) {
02782 fprintf(stderr,
02783 "InnoDB: Error number %lu means '%s'.\n",
02784 (ulong) err, strerror((int)err));
02785 }
02786
02787 fprintf(stderr,
02788 "InnoDB: Some operating system error numbers"
02789 " are described at\n"
02790 "InnoDB: "
02791 REFMAN "operating-system-error-codes.html\n");
02792
02793 os_has_said_disk_full = TRUE;
02794 }
02795
02796 return(FALSE);
02797 #else
02798 ssize_t ret;
02799
02800 ret = os_file_pwrite(file, buf, n, offset, offset_high);
02801
02802 if ((ulint)ret == n) {
02803
02804 return(TRUE);
02805 }
02806
02807 if (!os_has_said_disk_full) {
02808
02809 ut_print_timestamp(stderr);
02810
02811 fprintf(stderr,
02812 " InnoDB: Error: Write to file %s failed"
02813 " at offset %lu %lu.\n"
02814 "InnoDB: %lu bytes should have been written,"
02815 " only %ld were written.\n"
02816 "InnoDB: Operating system error number %lu.\n"
02817 "InnoDB: Check that your OS and file system"
02818 " support files of this size.\n"
02819 "InnoDB: Check also that the disk is not full"
02820 " or a disk quota exceeded.\n",
02821 name, offset_high, offset, n, (long int)ret,
02822 (ulint)errno);
02823 if (strerror(errno) != NULL) {
02824 fprintf(stderr,
02825 "InnoDB: Error number %lu means '%s'.\n",
02826 (ulint)errno, strerror(errno));
02827 }
02828
02829 fprintf(stderr,
02830 "InnoDB: Some operating system error numbers"
02831 " are described at\n"
02832 "InnoDB: "
02833 REFMAN "operating-system-error-codes.html\n");
02834
02835 os_has_said_disk_full = TRUE;
02836 }
02837
02838 return(FALSE);
02839 #endif
02840 }
02841
02842
02845 UNIV_INTERN
02846 ibool
02847 os_file_status(
02848
02849 const char* path,
02850 ibool* exists,
02851 os_file_type_t* type)
02852 {
02853 #ifdef __WIN__
02854 int ret;
02855 struct _stat statinfo;
02856
02857 ret = _stat(path, &statinfo);
02858 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
02859
02860 *exists = FALSE;
02861 return(TRUE);
02862 } else if (ret) {
02863
02864
02865 os_file_handle_error_no_exit(path, "stat");
02866
02867 return(FALSE);
02868 }
02869
02870 if (_S_IFDIR & statinfo.st_mode) {
02871 *type = OS_FILE_TYPE_DIR;
02872 } else if (_S_IFREG & statinfo.st_mode) {
02873 *type = OS_FILE_TYPE_FILE;
02874 } else {
02875 *type = OS_FILE_TYPE_UNKNOWN;
02876 }
02877
02878 *exists = TRUE;
02879
02880 return(TRUE);
02881 #else
02882 int ret;
02883 struct stat statinfo;
02884
02885 ret = stat(path, &statinfo);
02886 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
02887
02888 *exists = FALSE;
02889 return(TRUE);
02890 } else if (ret) {
02891
02892
02893 os_file_handle_error_no_exit(path, "stat");
02894
02895 return(FALSE);
02896 }
02897
02898 if (S_ISDIR(statinfo.st_mode)) {
02899 *type = OS_FILE_TYPE_DIR;
02900 } else if (S_ISLNK(statinfo.st_mode)) {
02901 *type = OS_FILE_TYPE_LINK;
02902 } else if (S_ISREG(statinfo.st_mode)) {
02903 *type = OS_FILE_TYPE_FILE;
02904 } else {
02905 *type = OS_FILE_TYPE_UNKNOWN;
02906 }
02907
02908 *exists = TRUE;
02909
02910 return(TRUE);
02911 #endif
02912 }
02913
02914
02917 UNIV_INTERN
02918 ibool
02919 os_file_get_status(
02920
02921 const char* path,
02922 os_file_stat_t* stat_info)
02924 {
02925 #ifdef __WIN__
02926 int ret;
02927 struct _stat statinfo;
02928
02929 ret = _stat(path, &statinfo);
02930 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
02931
02932
02933 return(FALSE);
02934 } else if (ret) {
02935
02936
02937 os_file_handle_error_no_exit(path, "stat");
02938
02939 return(FALSE);
02940 }
02941 if (_S_IFDIR & statinfo.st_mode) {
02942 stat_info->type = OS_FILE_TYPE_DIR;
02943 } else if (_S_IFREG & statinfo.st_mode) {
02944 stat_info->type = OS_FILE_TYPE_FILE;
02945 } else {
02946 stat_info->type = OS_FILE_TYPE_UNKNOWN;
02947 }
02948
02949 stat_info->ctime = statinfo.st_ctime;
02950 stat_info->atime = statinfo.st_atime;
02951 stat_info->mtime = statinfo.st_mtime;
02952 stat_info->size = statinfo.st_size;
02953
02954 return(TRUE);
02955 #else
02956 int ret;
02957 struct stat statinfo;
02958
02959 ret = stat(path, &statinfo);
02960
02961 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
02962
02963
02964 return(FALSE);
02965 } else if (ret) {
02966
02967
02968 os_file_handle_error_no_exit(path, "stat");
02969
02970 return(FALSE);
02971 }
02972
02973 if (S_ISDIR(statinfo.st_mode)) {
02974 stat_info->type = OS_FILE_TYPE_DIR;
02975 } else if (S_ISLNK(statinfo.st_mode)) {
02976 stat_info->type = OS_FILE_TYPE_LINK;
02977 } else if (S_ISREG(statinfo.st_mode)) {
02978 stat_info->type = OS_FILE_TYPE_FILE;
02979 } else {
02980 stat_info->type = OS_FILE_TYPE_UNKNOWN;
02981 }
02982
02983 stat_info->ctime = statinfo.st_ctime;
02984 stat_info->atime = statinfo.st_atime;
02985 stat_info->mtime = statinfo.st_mtime;
02986 stat_info->size = statinfo.st_size;
02987
02988 return(TRUE);
02989 #endif
02990 }
02991
02992
02993 #ifdef __WIN__
02994 # define OS_FILE_PATH_SEPARATOR '\\'
02995 #else
02996 # define OS_FILE_PATH_SEPARATOR '/'
02997 #endif
02998
02999
03027 UNIV_INTERN
03028 char*
03029 os_file_dirname(
03030
03031 const char* path)
03032 {
03033
03034 const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
03035 if (!last_slash) {
03036
03037
03038 return(mem_strdup("."));
03039 }
03040
03041
03042
03043 if (last_slash == path) {
03044
03045
03046 return(mem_strdup("/"));
03047 }
03048
03049
03050
03051 return(mem_strdupl(path, last_slash - path));
03052 }
03053
03054
03057 UNIV_INTERN
03058 ibool
03059 os_file_create_subdirs_if_needed(
03060
03061 const char* path)
03062 {
03063 char* subdir;
03064 ibool success, subdir_exists;
03065 os_file_type_t type;
03066
03067 subdir = os_file_dirname(path);
03068 if (strlen(subdir) == 1
03069 && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
03070
03071 mem_free(subdir);
03072
03073 return(TRUE);
03074 }
03075
03076
03077 success = os_file_status(subdir, &subdir_exists, &type);
03078 if (success && !subdir_exists) {
03079
03080 success = os_file_create_subdirs_if_needed(subdir);
03081 if (!success) {
03082 mem_free(subdir);
03083
03084 return(FALSE);
03085 }
03086 success = os_file_create_directory(subdir, FALSE);
03087 }
03088
03089 mem_free(subdir);
03090
03091 return(success);
03092 }
03093
03094 #ifndef UNIV_HOTBACKUP
03095
03098 static
03099 os_aio_slot_t*
03100 os_aio_array_get_nth_slot(
03101
03102 os_aio_array_t* array,
03103 ulint index)
03104 {
03105 ut_a(index < array->n_slots);
03106
03107 return((array->slots) + index);
03108 }
03109
03110 #if defined(LINUX_NATIVE_AIO)
03111
03114 static
03115 ibool
03116 os_aio_linux_create_io_ctx(
03117
03118 ulint max_events,
03119 io_context_t* io_ctx)
03120 {
03121 int ret;
03122 ulint retries = 0;
03123
03124 retry:
03125 memset(io_ctx, 0x0, sizeof(*io_ctx));
03126
03127
03128
03129
03130 ret = io_setup(max_events, io_ctx);
03131 if (ret == 0) {
03132 #if defined(UNIV_AIO_DEBUG)
03133 fprintf(stderr,
03134 "InnoDB: Linux native AIO:"
03135 " initialized io_ctx for segment\n");
03136 #endif
03137
03138 return(TRUE);
03139 }
03140
03141
03142
03143 switch (ret) {
03144 case -EAGAIN:
03145 if (retries == 0) {
03146
03147 ut_print_timestamp(stderr);
03148 fprintf(stderr,
03149 " InnoDB: Warning: io_setup() failed"
03150 " with EAGAIN. Will make %d attempts"
03151 " before giving up.\n",
03152 OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
03153 }
03154
03155 if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
03156 ++retries;
03157 fprintf(stderr,
03158 "InnoDB: Warning: io_setup() attempt"
03159 " %lu failed.\n",
03160 retries);
03161 os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP);
03162 goto retry;
03163 }
03164
03165
03166 ut_print_timestamp(stderr);
03167 fprintf(stderr,
03168 " InnoDB: Error: io_setup() failed"
03169 " with EAGAIN after %d attempts.\n",
03170 OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
03171 break;
03172
03173 case -ENOSYS:
03174 ut_print_timestamp(stderr);
03175 fprintf(stderr,
03176 " InnoDB: Error: Linux Native AIO interface"
03177 " is not supported on this platform. Please"
03178 " check your OS documentation and install"
03179 " appropriate binary of InnoDB.\n");
03180
03181 break;
03182
03183 default:
03184 ut_print_timestamp(stderr);
03185 fprintf(stderr,
03186 " InnoDB: Error: Linux Native AIO setup"
03187 " returned following error[%d]\n", -ret);
03188 break;
03189 }
03190
03191 fprintf(stderr,
03192 "InnoDB: You can disable Linux Native AIO by"
03193 " setting innodb_native_aio = off in my.cnf\n");
03194 return(FALSE);
03195 }
03196 #endif
03197
03198
03203 static
03204 os_aio_array_t*
03205 os_aio_array_create(
03206
03207 ulint n,
03210 ulint n_segments)
03211 {
03212 os_aio_array_t* array;
03213 ulint i;
03214 os_aio_slot_t* slot;
03215 #ifdef WIN_ASYNC_IO
03216 OVERLAPPED* over;
03217 #elif defined(LINUX_NATIVE_AIO)
03218 struct io_event* aio_event = NULL;
03219 #endif
03220 ut_a(n > 0);
03221 ut_a(n_segments > 0);
03222
03223 array = static_cast<os_aio_array_t *>(ut_malloc(sizeof(os_aio_array_t)));
03224
03225 array->mutex = os_mutex_create();
03226 array->not_full = os_event_create(NULL);
03227 array->is_empty = os_event_create(NULL);
03228
03229 os_event_set(array->is_empty);
03230
03231 array->n_slots = n;
03232 array->n_segments = n_segments;
03233 array->n_reserved = 0;
03234 array->cur_seg = 0;
03235 array->slots = static_cast<os_aio_slot_t *>(ut_malloc(n * sizeof(os_aio_slot_t)));
03236 #ifdef __WIN__
03237 array->handles = ut_malloc(n * sizeof(HANDLE));
03238 #endif
03239
03240 #if defined(LINUX_NATIVE_AIO)
03241 array->aio_ctx = NULL;
03242 array->aio_events = NULL;
03243
03244
03245
03246 if (!srv_use_native_aio) {
03247 goto skip_native_aio;
03248 }
03249
03250
03251
03252
03253 array->aio_ctx = (io_context**) ut_malloc(n_segments *
03254 sizeof(*array->aio_ctx));
03255 for (i = 0; i < n_segments; ++i) {
03256 if (!os_aio_linux_create_io_ctx(n/n_segments,
03257 &array->aio_ctx[i])) {
03258
03259
03260
03261
03262
03263
03264 return(NULL);
03265 }
03266 }
03267
03268
03269 aio_event = (io_event*) ut_malloc(n * sizeof(io_event));
03270 memset(aio_event, 0x0, sizeof(io_event) * n);
03271 array->aio_events = aio_event;
03272
03273 skip_native_aio:
03274 #endif
03275 for (i = 0; i < n; i++) {
03276 slot = os_aio_array_get_nth_slot(array, i);
03277
03278 slot->pos = i;
03279 slot->reserved = FALSE;
03280 #ifdef WIN_ASYNC_IO
03281 slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
03282
03283 over = &(slot->control);
03284
03285 over->hEvent = slot->handle;
03286
03287 *((array->handles) + i) = over->hEvent;
03288
03289 #elif defined(LINUX_NATIVE_AIO)
03290
03291 memset(&slot->control, 0x0, sizeof(slot->control));
03292 slot->n_bytes = 0;
03293 slot->ret = 0;
03294 #endif
03295 }
03296
03297 return(array);
03298 }
03299
03300
03302 static
03303 void
03304 os_aio_array_free(
03305
03306 os_aio_array_t* array)
03307 {
03308 #ifdef WIN_ASYNC_IO
03309 ulint i;
03310
03311 for (i = 0; i < array->n_slots; i++) {
03312 os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
03313 CloseHandle(slot->handle);
03314 }
03315 #endif
03316
03317 #ifdef __WIN__
03318 ut_free(array->handles);
03319 #endif
03320 os_mutex_free(array->mutex);
03321 os_event_free(array->not_full);
03322 os_event_free(array->is_empty);
03323
03324 #if defined(LINUX_NATIVE_AIO)
03325 if (srv_use_native_aio) {
03326 ut_free(array->aio_events);
03327 ut_free(array->aio_ctx);
03328 }
03329 #endif
03330
03331 ut_free(array->slots);
03332 ut_free(array);
03333 }
03334
03335
03336
03337
03338
03339
03340
03341
03342 UNIV_INTERN
03343 ibool
03344 os_aio_init(
03345
03346 ulint n_per_seg,
03347
03348 ulint n_read_segs,
03349 ulint n_write_segs,
03350 ulint n_slots_sync)
03351
03352 {
03353 ulint i;
03354 ulint n_segments = 2 + n_read_segs + n_write_segs;
03355
03356 ut_ad(n_segments >= 4);
03357
03358 os_io_init_simple();
03359
03360 for (i = 0; i < n_segments; i++) {
03361 srv_set_io_thread_op_info(i, "not started yet");
03362 }
03363
03364
03365
03366
03367 os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
03368 if (os_aio_ibuf_array == NULL) {
03369 goto err_exit;
03370 }
03371
03372 srv_io_thread_function[0] = "insert buffer thread";
03373
03374 os_aio_log_array = os_aio_array_create(n_per_seg, 1);
03375 if (os_aio_log_array == NULL) {
03376 goto err_exit;
03377 }
03378
03379 srv_io_thread_function[1] = "log thread";
03380
03381 os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
03382 n_read_segs);
03383 if (os_aio_read_array == NULL) {
03384 goto err_exit;
03385 }
03386
03387 for (i = 2; i < 2 + n_read_segs; i++) {
03388 ut_a(i < SRV_MAX_N_IO_THREADS);
03389 srv_io_thread_function[i] = "read thread";
03390 }
03391
03392 os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
03393 n_write_segs);
03394 if (os_aio_write_array == NULL) {
03395 goto err_exit;
03396 }
03397
03398 for (i = 2 + n_read_segs; i < n_segments; i++) {
03399 ut_a(i < SRV_MAX_N_IO_THREADS);
03400 srv_io_thread_function[i] = "write thread";
03401 }
03402
03403 os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
03404 if (os_aio_sync_array == NULL) {
03405 goto err_exit;
03406 }
03407
03408
03409 os_aio_n_segments = n_segments;
03410
03411 os_aio_validate();
03412
03413 os_aio_segment_wait_events = static_cast<os_event_t *>(ut_malloc(n_segments * sizeof(void*)));
03414
03415 for (i = 0; i < n_segments; i++) {
03416 os_aio_segment_wait_events[i] = os_event_create(NULL);
03417 }
03418
03419 os_last_printout = time(NULL);
03420
03421 return(TRUE);
03422
03423 err_exit:
03424 return(FALSE);
03425
03426 }
03427
03428
03429
03430 UNIV_INTERN
03431 void
03432 os_aio_free(void)
03433
03434 {
03435 ulint i;
03436
03437 os_aio_array_free(os_aio_ibuf_array);
03438 os_aio_ibuf_array = NULL;
03439 os_aio_array_free(os_aio_log_array);
03440 os_aio_log_array = NULL;
03441 os_aio_array_free(os_aio_read_array);
03442 os_aio_read_array = NULL;
03443 os_aio_array_free(os_aio_write_array);
03444 os_aio_write_array = NULL;
03445 os_aio_array_free(os_aio_sync_array);
03446 os_aio_sync_array = NULL;
03447
03448 for (i = 0; i < os_aio_n_segments; i++) {
03449 os_event_free(os_aio_segment_wait_events[i]);
03450 }
03451
03452 ut_free(os_aio_segment_wait_events);
03453 os_aio_segment_wait_events = 0;
03454 os_aio_n_segments = 0;
03455 }
03456
03457 #ifdef WIN_ASYNC_IO
03458
03461 static
03462 void
03463 os_aio_array_wake_win_aio_at_shutdown(
03464
03465 os_aio_array_t* array)
03466 {
03467 ulint i;
03468
03469 for (i = 0; i < array->n_slots; i++) {
03470
03471 SetEvent((array->slots + i)->handle);
03472 }
03473 }
03474 #endif
03475
03476
03479 UNIV_INTERN
03480 void
03481 os_aio_wake_all_threads_at_shutdown(void)
03482
03483 {
03484 ulint i;
03485
03486 #ifdef WIN_ASYNC_IO
03487
03488 os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
03489 os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
03490 os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
03491 os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
03492
03493 #elif defined(LINUX_NATIVE_AIO)
03494
03495
03496
03497
03498
03499
03500 if (srv_use_native_aio) {
03501 return;
03502 }
03503
03504
03505 #endif
03506
03507
03508 for (i = 0; i < os_aio_n_segments; i++) {
03509
03510 os_event_set(os_aio_segment_wait_events[i]);
03511 }
03512 }
03513
03514
03517 UNIV_INTERN
03518 void
03519 os_aio_wait_until_no_pending_writes(void)
03520
03521 {
03522 os_event_wait(os_aio_write_array->is_empty);
03523 }
03524
03525
03529 static
03530 ulint
03531 os_aio_get_segment_no_from_slot(
03532
03533 os_aio_array_t* array,
03534 os_aio_slot_t* slot)
03535 {
03536 ulint segment;
03537 ulint seg_len;
03538
03539 if (array == os_aio_ibuf_array) {
03540 segment = 0;
03541
03542 } else if (array == os_aio_log_array) {
03543 segment = 1;
03544
03545 } else if (array == os_aio_read_array) {
03546 seg_len = os_aio_read_array->n_slots
03547 / os_aio_read_array->n_segments;
03548
03549 segment = 2 + slot->pos / seg_len;
03550 } else {
03551 ut_a(array == os_aio_write_array);
03552 seg_len = os_aio_write_array->n_slots
03553 / os_aio_write_array->n_segments;
03554
03555 segment = os_aio_read_array->n_segments + 2
03556 + slot->pos / seg_len;
03557 }
03558
03559 return(segment);
03560 }
03561
03562
03565 static
03566 ulint
03567 os_aio_get_array_and_local_segment(
03568
03569 os_aio_array_t** array,
03570 ulint global_segment)
03571 {
03572 ulint segment;
03573
03574 ut_a(global_segment < os_aio_n_segments);
03575
03576 if (global_segment == 0) {
03577 *array = os_aio_ibuf_array;
03578 segment = 0;
03579
03580 } else if (global_segment == 1) {
03581 *array = os_aio_log_array;
03582 segment = 0;
03583
03584 } else if (global_segment < os_aio_read_array->n_segments + 2) {
03585 *array = os_aio_read_array;
03586
03587 segment = global_segment - 2;
03588 } else {
03589 *array = os_aio_write_array;
03590
03591 segment = global_segment - (os_aio_read_array->n_segments + 2);
03592 }
03593
03594 return(segment);
03595 }
03596
03597
03601 static
03602 os_aio_slot_t*
03603 os_aio_array_reserve_slot(
03604
03605 ulint type,
03606 os_aio_array_t* array,
03607 fil_node_t* message1,
03609 void* message2,
03611 os_file_t file,
03612 const char* name,
03614 void* buf,
03616 ulint offset,
03618 ulint offset_high,
03620 ulint len)
03621 {
03622 os_aio_slot_t* slot = NULL;
03623 #ifdef WIN_ASYNC_IO
03624 OVERLAPPED* control;
03625
03626 #elif defined(LINUX_NATIVE_AIO)
03627
03628 struct iocb* iocb;
03629 off_t aio_offset;
03630
03631 #endif
03632 ulint i;
03633 ulint counter;
03634 ulint slots_per_seg;
03635 ulint local_seg;
03636
03637 #ifdef WIN_ASYNC_IO
03638 ut_a((len & 0xFFFFFFFFUL) == len);
03639 #endif
03640
03641
03642 slots_per_seg = array->n_slots / array->n_segments;
03643
03644
03645
03646
03647 local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
03648 % array->n_segments;
03649
03650 loop:
03651 os_mutex_enter(array->mutex);
03652
03653 if (array->n_reserved == array->n_slots) {
03654 os_mutex_exit(array->mutex);
03655
03656 if (!srv_use_native_aio) {
03657
03658
03659
03660 os_aio_simulated_wake_handler_threads();
03661 }
03662
03663 os_event_wait(array->not_full);
03664
03665 goto loop;
03666 }
03667
03668
03669
03670
03671 for (i = local_seg * slots_per_seg, counter = 0;
03672 counter < array->n_slots; i++, counter++) {
03673
03674 i %= array->n_slots;
03675 slot = os_aio_array_get_nth_slot(array, i);
03676
03677 if (slot->reserved == FALSE) {
03678 goto found;
03679 }
03680 }
03681
03682
03683 ut_error;
03684
03685 found:
03686 ut_a(slot->reserved == FALSE);
03687 array->n_reserved++;
03688
03689 if (array->n_reserved == 1) {
03690 os_event_reset(array->is_empty);
03691 }
03692
03693 if (array->n_reserved == array->n_slots) {
03694 os_event_reset(array->not_full);
03695 }
03696
03697 slot->reserved = TRUE;
03698 slot->reservation_time = time(NULL);
03699 slot->message1 = message1;
03700 slot->message2 = message2;
03701 slot->file = file;
03702 slot->name = name;
03703 slot->len = len;
03704 slot->type = type;
03705 slot->buf = static_cast<unsigned char *>(buf);
03706 slot->offset = offset;
03707 slot->offset_high = offset_high;
03708 slot->io_already_done = FALSE;
03709
03710 #ifdef WIN_ASYNC_IO
03711 control = &(slot->control);
03712 control->Offset = (DWORD)offset;
03713 control->OffsetHigh = (DWORD)offset_high;
03714 ResetEvent(slot->handle);
03715
03716 #elif defined(LINUX_NATIVE_AIO)
03717
03718
03719 if (!srv_use_native_aio) {
03720 goto skip_native_aio;
03721 }
03722
03723
03724
03725 if (sizeof(aio_offset) == 8) {
03726 aio_offset = offset_high;
03727 aio_offset <<= 32;
03728 aio_offset += offset;
03729 } else {
03730 ut_a(offset_high == 0);
03731 aio_offset = offset;
03732 }
03733
03734 iocb = &slot->control;
03735
03736 if (type == OS_FILE_READ) {
03737 io_prep_pread(iocb, file, buf, len, aio_offset);
03738 } else {
03739 ut_a(type == OS_FILE_WRITE);
03740 io_prep_pwrite(iocb, file, buf, len, aio_offset);
03741 }
03742
03743 iocb->data = (void*)slot;
03744 slot->n_bytes = 0;
03745 slot->ret = 0;
03746
03747
03748
03749 skip_native_aio:
03750 #endif
03751 os_mutex_exit(array->mutex);
03752
03753 return(slot);
03754 }
03755
03756
03758 static
03759 void
03760 os_aio_array_free_slot(
03761
03762 os_aio_array_t* array,
03763 os_aio_slot_t* slot)
03764 {
03765 ut_ad(array);
03766 ut_ad(slot);
03767
03768 os_mutex_enter(array->mutex);
03769
03770 ut_ad(slot->reserved);
03771
03772 slot->reserved = FALSE;
03773
03774 array->n_reserved--;
03775
03776 if (array->n_reserved == array->n_slots - 1) {
03777 os_event_set(array->not_full);
03778 }
03779
03780 if (array->n_reserved == 0) {
03781 os_event_set(array->is_empty);
03782 }
03783
03784 #ifdef WIN_ASYNC_IO
03785
03786 ResetEvent(slot->handle);
03787
03788 #elif defined(LINUX_NATIVE_AIO)
03789
03790 if (srv_use_native_aio) {
03791 memset(&slot->control, 0x0, sizeof(slot->control));
03792 slot->n_bytes = 0;
03793 slot->ret = 0;
03794
03795 } else {
03796
03797
03798 ut_ad(slot->n_bytes == 0);
03799 ut_ad(slot->ret == 0);
03800 }
03801
03802 #endif
03803 os_mutex_exit(array->mutex);
03804 }
03805
03806
03808 static
03809 void
03810 os_aio_simulated_wake_handler_thread(
03811
03812 ulint global_segment)
03814 {
03815 os_aio_array_t* array;
03816 os_aio_slot_t* slot;
03817 ulint segment;
03818 ulint n;
03819 ulint i;
03820
03821 ut_ad(!srv_use_native_aio);
03822
03823 segment = os_aio_get_array_and_local_segment(&array, global_segment);
03824
03825 n = array->n_slots / array->n_segments;
03826
03827
03828
03829 os_mutex_enter(array->mutex);
03830
03831 for (i = 0; i < n; i++) {
03832 slot = os_aio_array_get_nth_slot(array, i + segment * n);
03833
03834 if (slot->reserved) {
03835
03836
03837 break;
03838 }
03839 }
03840
03841 os_mutex_exit(array->mutex);
03842
03843 if (i < n) {
03844 os_event_set(os_aio_segment_wait_events[global_segment]);
03845 }
03846 }
03847
03848
03850 UNIV_INTERN
03851 void
03852 os_aio_simulated_wake_handler_threads(void)
03853
03854 {
03855 ulint i;
03856
03857 if (srv_use_native_aio) {
03858
03859
03860 return;
03861 }
03862
03863 os_aio_recommend_sleep_for_read_threads = FALSE;
03864
03865 for (i = 0; i < os_aio_n_segments; i++) {
03866 os_aio_simulated_wake_handler_thread(i);
03867 }
03868 }
03869
03870
03875 UNIV_INTERN
03876 void
03877 os_aio_simulated_put_read_threads_to_sleep(void)
03878
03879 {
03880
03881
03882
03883
03884
03885 #ifdef __WIN__
03886 os_aio_array_t* array;
03887 ulint g;
03888
03889 if (srv_use_native_aio) {
03890
03891
03892 return;
03893 }
03894
03895 os_aio_recommend_sleep_for_read_threads = TRUE;
03896
03897 for (g = 0; g < os_aio_n_segments; g++) {
03898 os_aio_get_array_and_local_segment(&array, g);
03899
03900 if (array == os_aio_read_array) {
03901
03902 os_event_reset(os_aio_segment_wait_events[g]);
03903 }
03904 }
03905 #endif
03906 }
03907
03908 #if defined(LINUX_NATIVE_AIO)
03909
03912 static
03913 ibool
03914 os_aio_linux_dispatch(
03915
03916 os_aio_array_t* array,
03917 os_aio_slot_t* slot)
03918 {
03919 int ret;
03920 ulint io_ctx_index;
03921 struct iocb* iocb;
03922
03923 ut_ad(slot != NULL);
03924 ut_ad(array);
03925
03926 ut_a(slot->reserved);
03927
03928
03929
03930
03931
03932 iocb = &slot->control;
03933 io_ctx_index = (slot->pos * array->n_segments) / array->n_slots;
03934
03935 ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
03936
03937 #if defined(UNIV_AIO_DEBUG)
03938 fprintf(stderr,
03939 "io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n",
03940 (slot->type == OS_FILE_WRITE) ? 'w' : 'r', ret, slot,
03941 array->aio_ctx[io_ctx_index], (ulong)io_ctx_index);
03942 #endif
03943
03944
03945
03946 if (UNIV_UNLIKELY(ret != 1)) {
03947 errno = -ret;
03948 return(FALSE);
03949 }
03950
03951 return(TRUE);
03952 }
03953 #endif
03954
03955
03956
03960 UNIV_INTERN
03961 ibool
03962 os_aio_func(
03963
03964 ulint type,
03965 ulint mode,
03978 const char* name,
03980 os_file_t file,
03981 void* buf,
03983 ulint offset,
03985 ulint offset_high,
03987 ulint n,
03988 fil_node_t* message1,
03992 void* message2)
03996 {
03997 os_aio_array_t* array;
03998 os_aio_slot_t* slot;
03999 #ifdef WIN_ASYNC_IO
04000 ibool retval;
04001 BOOL ret = TRUE;
04002 DWORD len = (DWORD) n;
04003 struct fil_node_struct * dummy_mess1;
04004 void* dummy_mess2;
04005 ulint dummy_type;
04006 #endif
04007 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
04008 ibool retry;
04009 #endif
04010 ulint wake_later;
04011
04012 ut_ad(file);
04013 ut_ad(buf);
04014 ut_ad(n > 0);
04015 ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
04016 ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
04017 ut_ad(os_aio_validate());
04018 #ifdef WIN_ASYNC_IO
04019 ut_ad((n & 0xFFFFFFFFUL) == n);
04020 #endif
04021
04022 wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
04023 mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
04024
04025 if (mode == OS_AIO_SYNC
04026 #ifdef WIN_ASYNC_IO
04027 && !srv_use_native_aio
04028 #endif
04029 ) {
04030
04031
04032
04033
04034
04035
04036
04037 if (type == OS_FILE_READ) {
04038 return(os_file_read(file, buf, offset,
04039 offset_high, n));
04040 }
04041
04042 ut_a(type == OS_FILE_WRITE);
04043
04044 return(os_file_write(name, file, buf, offset, offset_high, n));
04045 }
04046
04047 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
04048 try_again:
04049 #endif
04050 if (mode == OS_AIO_NORMAL) {
04051 if (type == OS_FILE_READ) {
04052 array = os_aio_read_array;
04053 } else {
04054 array = os_aio_write_array;
04055 }
04056 } else if (mode == OS_AIO_IBUF) {
04057 ut_ad(type == OS_FILE_READ);
04058
04059
04060
04061 wake_later = FALSE;
04062
04063 array = os_aio_ibuf_array;
04064 } else if (mode == OS_AIO_LOG) {
04065
04066 array = os_aio_log_array;
04067 } else if (mode == OS_AIO_SYNC) {
04068 array = os_aio_sync_array;
04069
04070 #if defined(LINUX_NATIVE_AIO)
04071
04072 ut_a(!srv_use_native_aio);
04073 #endif
04074 } else {
04075 array = NULL;
04076 ut_error;
04077 }
04078
04079 slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
04080 name, buf, offset, offset_high, n);
04081 if (type == OS_FILE_READ) {
04082 if (srv_use_native_aio) {
04083 os_n_file_reads++;
04084 os_bytes_read_since_printout += n;
04085 #ifdef WIN_ASYNC_IO
04086 ret = ReadFile(file, buf, (DWORD)n, &len,
04087 &(slot->control));
04088
04089 #elif defined(LINUX_NATIVE_AIO)
04090 if (!os_aio_linux_dispatch(array, slot)) {
04091 goto err_exit;
04092 }
04093 #endif
04094 } else {
04095 if (!wake_later) {
04096 os_aio_simulated_wake_handler_thread(
04097 os_aio_get_segment_no_from_slot(
04098 array, slot));
04099 }
04100 }
04101 } else if (type == OS_FILE_WRITE) {
04102 if (srv_use_native_aio) {
04103 os_n_file_writes++;
04104 #ifdef WIN_ASYNC_IO
04105 ret = WriteFile(file, buf, (DWORD)n, &len,
04106 &(slot->control));
04107
04108 #elif defined(LINUX_NATIVE_AIO)
04109 if (!os_aio_linux_dispatch(array, slot)) {
04110 goto err_exit;
04111 }
04112 #endif
04113 } else {
04114 if (!wake_later) {
04115 os_aio_simulated_wake_handler_thread(
04116 os_aio_get_segment_no_from_slot(
04117 array, slot));
04118 }
04119 }
04120 } else {
04121 ut_error;
04122 }
04123
04124 #ifdef WIN_ASYNC_IO
04125 if (srv_use_native_aio) {
04126 if ((ret && len == n)
04127 || (!ret && GetLastError() == ERROR_IO_PENDING)) {
04128
04129
04130 if (mode == OS_AIO_SYNC) {
04131
04132
04133
04134
04135
04136 retval = os_aio_windows_handle(ULINT_UNDEFINED,
04137 slot->pos,
04138 &dummy_mess1,
04139 &dummy_mess2,
04140 &dummy_type);
04141
04142 return(retval);
04143 }
04144
04145 return(TRUE);
04146 }
04147
04148 goto err_exit;
04149 }
04150 #endif
04151
04152 return(TRUE);
04153
04154 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
04155 err_exit:
04156 os_aio_array_free_slot(array, slot);
04157
04158 retry = os_file_handle_error(name,
04159 type == OS_FILE_READ
04160 ? "aio read" : "aio write");
04161 if (retry) {
04162
04163 goto try_again;
04164 }
04165
04166 return(FALSE);
04167 #endif
04168 }
04169
04170 #ifdef WIN_ASYNC_IO
04171
04179 UNIV_INTERN
04180 ibool
04181 os_aio_windows_handle(
04182
04183 ulint segment,
04191 ulint pos,
04193 fil_node_t**message1,
04198 void** message2,
04199 ulint* type)
04200 {
04201 ulint orig_seg = segment;
04202 os_aio_array_t* array;
04203 os_aio_slot_t* slot;
04204 ulint n;
04205 ulint i;
04206 ibool ret_val;
04207 BOOL ret;
04208 DWORD len;
04209 BOOL retry = FALSE;
04210
04211 if (segment == ULINT_UNDEFINED) {
04212 array = os_aio_sync_array;
04213 segment = 0;
04214 } else {
04215 segment = os_aio_get_array_and_local_segment(&array, segment);
04216 }
04217
04218
04219
04220
04221 ut_ad(os_aio_validate());
04222 ut_ad(segment < array->n_segments);
04223
04224 n = array->n_slots / array->n_segments;
04225
04226 if (array == os_aio_sync_array) {
04227 WaitForSingleObject(
04228 os_aio_array_get_nth_slot(array, pos)->handle,
04229 INFINITE);
04230 i = pos;
04231 } else {
04232 srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
04233 i = WaitForMultipleObjects((DWORD) n,
04234 array->handles + segment * n,
04235 FALSE,
04236 INFINITE);
04237 }
04238
04239 if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
04240 os_thread_exit(NULL);
04241 }
04242
04243 os_mutex_enter(array->mutex);
04244
04245 slot = os_aio_array_get_nth_slot(array, i + segment * n);
04246
04247 ut_a(slot->reserved);
04248
04249 if (orig_seg != ULINT_UNDEFINED) {
04250 srv_set_io_thread_op_info(orig_seg,
04251 "get windows aio return value");
04252 }
04253
04254 ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
04255
04256 *message1 = slot->message1;
04257 *message2 = slot->message2;
04258
04259 *type = slot->type;
04260
04261 if (ret && len == slot->len) {
04262 ret_val = TRUE;
04263
04264 #ifdef UNIV_DO_FLUSH
04265 if (slot->type == OS_FILE_WRITE
04266 && !os_do_not_call_flush_at_each_write) {
04267 if (!os_file_flush(slot->file)) {
04268 ut_error;
04269 }
04270 }
04271 #endif
04272 } else if (os_file_handle_error(slot->name, "Windows aio")) {
04273
04274 retry = TRUE;
04275 } else {
04276
04277 ret_val = FALSE;
04278 }
04279
04280 os_mutex_exit(array->mutex);
04281
04282 if (retry) {
04283
04284
04285
04286 #ifdef UNIV_PFS_IO
04287
04288
04289
04290 struct PSI_file_locker* locker = NULL;
04291 register_pfs_file_io_begin(locker, slot->file, slot->len,
04292 (slot->type == OS_FILE_WRITE)
04293 ? PSI_FILE_WRITE
04294 : PSI_FILE_READ,
04295 __FILE__, __LINE__);
04296 #endif
04297
04298 ut_a((slot->len & 0xFFFFFFFFUL) == slot->len);
04299
04300 switch (slot->type) {
04301 case OS_FILE_WRITE:
04302 ret = WriteFile(slot->file, slot->buf,
04303 (DWORD) slot->len, &len,
04304 &(slot->control));
04305
04306 break;
04307 case OS_FILE_READ:
04308 ret = ReadFile(slot->file, slot->buf,
04309 (DWORD) slot->len, &len,
04310 &(slot->control));
04311
04312 break;
04313 default:
04314 ut_error;
04315 }
04316
04317 #ifdef UNIV_PFS_IO
04318 register_pfs_file_io_end(locker, len);
04319 #endif
04320
04321 if (!ret && GetLastError() == ERROR_IO_PENDING) {
04322
04323
04324
04325
04326
04327
04328 ret = GetOverlappedResult(slot->file,
04329 &(slot->control),
04330 &len, TRUE);
04331 }
04332
04333 ret_val = ret && len == slot->len;
04334 }
04335
04336 os_aio_array_free_slot(array, slot);
04337
04338 return(ret_val);
04339 }
04340 #endif
04341
04342 #if defined(LINUX_NATIVE_AIO)
04343
04354 static
04355 void
04356 os_aio_linux_collect(
04357
04358 os_aio_array_t* array,
04359 ulint segment,
04360 ulint seg_size)
04361 {
04362 int i;
04363 int ret;
04364 ulint start_pos;
04365 ulint end_pos;
04366 struct timespec timeout;
04367 struct io_event* events;
04368 struct io_context* io_ctx;
04369
04370
04371 ut_ad(array != NULL);
04372 ut_ad(seg_size > 0);
04373 ut_ad(segment < array->n_segments);
04374
04375
04376 events = &array->aio_events[segment * seg_size];
04377
04378
04379 io_ctx = array->aio_ctx[segment];
04380
04381
04382 start_pos = segment * seg_size;
04383
04384
04385 end_pos = start_pos + seg_size;
04386
04387 retry:
04388
04389
04390
04391
04392
04393 if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
04394 os_thread_exit(NULL);
04395 }
04396
04397
04398
04399 memset(events, 0, sizeof(*events) * seg_size);
04400 timeout.tv_sec = 0;
04401 timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
04402
04403 ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
04404
04405
04406
04407
04408
04409
04410 if (ret == -EAGAIN) {
04411 goto retry;
04412 }
04413
04414
04415
04416
04417
04418 if (ret == -EINTR) {
04419 goto retry;
04420 }
04421
04422
04423 if (ret == 0) {
04424 goto retry;
04425 }
04426
04427
04428 if (UNIV_UNLIKELY(ret < 0)) {
04429 ut_print_timestamp(stderr);
04430 fprintf(stderr,
04431 " InnoDB: unexpected ret_code[%d] from"
04432 " io_getevents()!\n", ret);
04433 ut_error;
04434 }
04435
04436 ut_a(ret > 0);
04437
04438 for (i = 0; i < ret; i++) {
04439 os_aio_slot_t* slot;
04440 struct iocb* control;
04441
04442 control = (struct iocb *)events[i].obj;
04443 ut_a(control != NULL);
04444
04445 slot = (os_aio_slot_t *) control->data;
04446
04447
04448 ut_a(slot != NULL);
04449 ut_a(slot->reserved);
04450
04451 #if defined(UNIV_AIO_DEBUG)
04452 fprintf(stderr,
04453 "io_getevents[%c]: slot[%p] ctx[%p]"
04454 " seg[%lu]\n",
04455 (slot->type == OS_FILE_WRITE) ? 'w' : 'r',
04456 slot, io_ctx, segment);
04457 #endif
04458
04459
04460 ut_a(slot->pos >= start_pos);
04461
04462
04463 ut_a(slot->pos < end_pos);
04464
04465
04466
04467 os_mutex_enter(array->mutex);
04468 slot->n_bytes = events[i].res;
04469 slot->ret = events[i].res2;
04470 slot->io_already_done = TRUE;
04471 os_mutex_exit(array->mutex);
04472 }
04473
04474 return;
04475 }
04476
04477
04485 UNIV_INTERN
04486 ibool
04487 os_aio_linux_handle(
04488
04489 ulint global_seg,
04495 fil_node_t**message1,
04496 void** message2,
04500 ulint* type)
04501 {
04502 ulint segment;
04503 os_aio_array_t* array;
04504 os_aio_slot_t* slot;
04505 ulint n;
04506 ulint i;
04507 ibool ret = FALSE;
04508
04509
04510 ut_a(global_seg != ULINT_UNDEFINED);
04511
04512
04513 segment = os_aio_get_array_and_local_segment(&array, global_seg);
04514 n = array->n_slots / array->n_segments;
04515
04516
04517 for (;;) {
04518 os_mutex_enter(array->mutex);
04519 for (i = 0; i < n; ++i) {
04520 slot = os_aio_array_get_nth_slot(
04521 array, i + segment * n);
04522 if (slot->reserved && slot->io_already_done) {
04523
04524 goto found;
04525 }
04526 }
04527
04528 os_mutex_exit(array->mutex);
04529
04530
04531
04532
04533
04534 srv_set_io_thread_op_info(global_seg,
04535 "waiting for completed aio requests");
04536 os_aio_linux_collect(array, segment, n);
04537 }
04538
04539 found:
04540
04541
04542
04543
04544 srv_set_io_thread_op_info(global_seg,
04545 "processing completed aio requests");
04546
04547
04548 ut_a(i < n);
04549
04550 ut_ad(slot != NULL);
04551 ut_ad(slot->reserved);
04552 ut_ad(slot->io_already_done);
04553
04554 *message1 = slot->message1;
04555 *message2 = slot->message2;
04556
04557 *type = slot->type;
04558
04559 if ((slot->ret == 0) && (slot->n_bytes == (long)slot->len)) {
04560 ret = TRUE;
04561
04562 #ifdef UNIV_DO_FLUSH
04563 if (slot->type == OS_FILE_WRITE
04564 && !os_do_not_call_flush_at_each_write)
04565 && !os_file_flush(slot->file) {
04566 ut_error;
04567 }
04568 #endif
04569 } else {
04570 errno = -slot->ret;
04571
04572
04573
04574
04575
04576
04577
04578
04579 os_file_handle_error(slot->name, "Linux aio");
04580
04581 ret = FALSE;
04582 }
04583
04584 os_mutex_exit(array->mutex);
04585
04586 os_aio_array_free_slot(array, slot);
04587
04588 return(ret);
04589 }
04590 #endif
04591
04592
04596 UNIV_INTERN
04597 ibool
04598 os_aio_simulated_handle(
04599
04600 ulint global_segment,
04605 fil_node_t**message1,
04610 void** message2,
04611 ulint* type)
04612 {
04613 os_aio_array_t* array;
04614 ulint segment;
04615 os_aio_slot_t* slot;
04616 os_aio_slot_t* slot2;
04617 os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
04618 ulint n_consecutive;
04619 ulint total_len;
04620 ulint offs;
04621 ulint lowest_offset;
04622 ulint biggest_age;
04623 ulint age;
04624 byte* combined_buf;
04625 byte* combined_buf2;
04626 ibool ret;
04627 ulint n;
04628 ulint i;
04629
04630
04631 *consecutive_ios = NULL;
04632
04633 memset(consecutive_ios, 0, sizeof(os_aio_slot_t*) * OS_AIO_MERGE_N_CONSECUTIVE);
04634 segment = os_aio_get_array_and_local_segment(&array, global_segment);
04635
04636 restart:
04637
04638
04639
04640 srv_set_io_thread_op_info(global_segment,
04641 "looking for i/o requests (a)");
04642 ut_ad(os_aio_validate());
04643 ut_ad(segment < array->n_segments);
04644
04645 n = array->n_slots / array->n_segments;
04646
04647
04648
04649 if (array == os_aio_read_array
04650 && os_aio_recommend_sleep_for_read_threads) {
04651
04652
04653
04654
04655 goto recommended_sleep;
04656 }
04657
04658 os_mutex_enter(array->mutex);
04659
04660 srv_set_io_thread_op_info(global_segment,
04661 "looking for i/o requests (b)");
04662
04663
04664
04665
04666 for (i = 0; i < n; i++) {
04667 slot = os_aio_array_get_nth_slot(array, i + segment * n);
04668
04669 if (slot->reserved && slot->io_already_done) {
04670
04671 if (os_aio_print_debug) {
04672 fprintf(stderr,
04673 "InnoDB: i/o for slot %lu"
04674 " already done, returning\n",
04675 (ulong) i);
04676 }
04677
04678 ret = TRUE;
04679
04680 goto slot_io_done;
04681 }
04682 }
04683
04684 n_consecutive = 0;
04685
04686
04687
04688
04689
04690 biggest_age = 0;
04691 lowest_offset = ULINT_MAX;
04692
04693 for (i = 0; i < n; i++) {
04694 slot = os_aio_array_get_nth_slot(array, i + segment * n);
04695
04696 if (slot->reserved) {
04697 age = (ulint)difftime(time(NULL),
04698 slot->reservation_time);
04699
04700 if ((age >= 2 && age > biggest_age)
04701 || (age >= 2 && age == biggest_age
04702 && slot->offset < lowest_offset)) {
04703
04704
04705 consecutive_ios[0] = slot;
04706
04707 n_consecutive = 1;
04708
04709 biggest_age = age;
04710 lowest_offset = slot->offset;
04711 }
04712 }
04713 }
04714
04715 if (n_consecutive == 0) {
04716
04717
04718
04719
04720 lowest_offset = ULINT_MAX;
04721
04722 for (i = 0; i < n; i++) {
04723 slot = os_aio_array_get_nth_slot(array,
04724 i + segment * n);
04725
04726 if (slot->reserved && slot->offset < lowest_offset) {
04727
04728
04729 consecutive_ios[0] = slot;
04730
04731 n_consecutive = 1;
04732
04733 lowest_offset = slot->offset;
04734 }
04735 }
04736 }
04737
04738 if (n_consecutive == 0) {
04739
04740
04741
04742 goto wait_for_io;
04743 }
04744
04745
04746
04747 ut_ad(n_consecutive != 0);
04748 ut_ad(consecutive_ios[0] != NULL);
04749
04750 slot = consecutive_ios[0];
04751
04752
04753
04754 consecutive_loop:
04755 for (i = 0; i < n; i++) {
04756 slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
04757
04758 if (slot2->reserved && slot2 != slot
04759 && slot2->offset == slot->offset + slot->len
04760
04761 && slot->offset + slot->len > slot->offset
04762 && slot2->offset_high == slot->offset_high
04763 && slot2->type == slot->type
04764 && slot2->file == slot->file) {
04765
04766
04767
04768 consecutive_ios[n_consecutive] = slot2;
04769 n_consecutive++;
04770
04771 slot = slot2;
04772
04773 if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
04774
04775 goto consecutive_loop;
04776 } else {
04777 break;
04778 }
04779 }
04780 }
04781
04782 srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
04783
04784
04785
04786
04787
04788 total_len = 0;
04789 slot = consecutive_ios[0];
04790
04791 for (i = 0; i < n_consecutive; i++) {
04792 total_len += consecutive_ios[i]->len;
04793 }
04794
04795 if (n_consecutive == 1) {
04796
04797 combined_buf = slot->buf;
04798 combined_buf2 = NULL;
04799 } else {
04800 combined_buf2 = static_cast<unsigned char *>(ut_malloc(total_len + UNIV_PAGE_SIZE));
04801
04802 ut_a(combined_buf2);
04803
04804 combined_buf = static_cast<unsigned char *>(ut_align(combined_buf2, UNIV_PAGE_SIZE));
04805 }
04806
04807
04808
04809
04810
04811 os_mutex_exit(array->mutex);
04812
04813 if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
04814
04815 offs = 0;
04816
04817 for (i = 0; i < n_consecutive; i++) {
04818
04819 ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
04820 consecutive_ios[i]->len);
04821 offs += consecutive_ios[i]->len;
04822 }
04823 }
04824
04825 srv_set_io_thread_op_info(global_segment, "doing file i/o");
04826
04827 if (os_aio_print_debug) {
04828 fprintf(stderr,
04829 "InnoDB: doing i/o of type %lu at offset %lu %lu,"
04830 " length %lu\n",
04831 (ulong) slot->type, (ulong) slot->offset_high,
04832 (ulong) slot->offset, (ulong) total_len);
04833 }
04834
04835
04836 if (slot->type == OS_FILE_WRITE) {
04837 ret = os_file_write(slot->name, slot->file, combined_buf,
04838 slot->offset, slot->offset_high,
04839 total_len);
04840 } else {
04841 ret = os_file_read(slot->file, combined_buf,
04842 slot->offset, slot->offset_high, total_len);
04843 }
04844
04845 ut_a(ret);
04846 srv_set_io_thread_op_info(global_segment, "file i/o done");
04847
04848 #if 0
04849 fprintf(stderr,
04850 "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
04851 n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
04852 #endif
04853
04854 if (slot->type == OS_FILE_READ && n_consecutive > 1) {
04855
04856 offs = 0;
04857
04858 for (i = 0; i < n_consecutive; i++) {
04859
04860 ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
04861 consecutive_ios[i]->len);
04862 offs += consecutive_ios[i]->len;
04863 }
04864 }
04865
04866 if (combined_buf2) {
04867 ut_free(combined_buf2);
04868 }
04869
04870 os_mutex_enter(array->mutex);
04871
04872
04873
04874 for (i = 0; i < n_consecutive; i++) {
04875 consecutive_ios[i]->io_already_done = TRUE;
04876 }
04877
04878
04879
04880
04881
04882 slot_io_done:
04883
04884 ut_a(slot->reserved);
04885
04886 *message1 = slot->message1;
04887 *message2 = slot->message2;
04888
04889 *type = slot->type;
04890
04891 os_mutex_exit(array->mutex);
04892
04893 os_aio_array_free_slot(array, slot);
04894
04895 return(ret);
04896
04897 wait_for_io:
04898 srv_set_io_thread_op_info(global_segment, "resetting wait event");
04899
04900
04901
04902
04903 os_event_reset(os_aio_segment_wait_events[global_segment]);
04904
04905 os_mutex_exit(array->mutex);
04906
04907 recommended_sleep:
04908 srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
04909
04910 os_event_wait(os_aio_segment_wait_events[global_segment]);
04911
04912 if (os_aio_print_debug) {
04913 fprintf(stderr,
04914 "InnoDB: i/o handler thread for i/o"
04915 " segment %lu wakes up\n",
04916 (ulong) global_segment);
04917 }
04918
04919 goto restart;
04920 }
04921
04922
04925 static
04926 ibool
04927 os_aio_array_validate(
04928
04929 os_aio_array_t* array)
04930 {
04931 os_aio_slot_t* slot;
04932 ulint n_reserved = 0;
04933 ulint i;
04934
04935 ut_a(array);
04936
04937 os_mutex_enter(array->mutex);
04938
04939 ut_a(array->n_slots > 0);
04940 ut_a(array->n_segments > 0);
04941
04942 for (i = 0; i < array->n_slots; i++) {
04943 slot = os_aio_array_get_nth_slot(array, i);
04944
04945 if (slot->reserved) {
04946 n_reserved++;
04947 ut_a(slot->len > 0);
04948 }
04949 }
04950
04951 ut_a(array->n_reserved == n_reserved);
04952
04953 os_mutex_exit(array->mutex);
04954
04955 return(TRUE);
04956 }
04957
04958
04961 UNIV_INTERN
04962 ibool
04963 os_aio_validate(void)
04964
04965 {
04966 os_aio_array_validate(os_aio_read_array);
04967 os_aio_array_validate(os_aio_write_array);
04968 os_aio_array_validate(os_aio_ibuf_array);
04969 os_aio_array_validate(os_aio_log_array);
04970 os_aio_array_validate(os_aio_sync_array);
04971
04972 return(TRUE);
04973 }
04974
04975
04980 static
04981 void
04982 os_aio_print_segment_info(
04983
04984 FILE* file,
04985 ulint* n_seg,
04986 os_aio_array_t* array)
04987 {
04988 ulint i;
04989
04990 ut_ad(array);
04991 ut_ad(n_seg);
04992 ut_ad(array->n_segments > 0);
04993
04994 if (array->n_segments == 1) {
04995 return;
04996 }
04997
04998 fprintf(file, " [");
04999 for (i = 0; i < array->n_segments; i++) {
05000 if (i != 0) {
05001 fprintf(file, ", ");
05002 }
05003
05004 fprintf(file, "%lu", n_seg[i]);
05005 }
05006 fprintf(file, "] ");
05007 }
05008
05009
05011 UNIV_INTERN
05012 void
05013 os_aio_print(
05014
05015 FILE* file)
05016 {
05017 os_aio_array_t* array;
05018 os_aio_slot_t* slot;
05019 ulint n_reserved;
05020 ulint n_res_seg[SRV_MAX_N_IO_THREADS];
05021 time_t current_time;
05022 double time_elapsed;
05023 double avg_bytes_read;
05024 ulint i;
05025
05026 for (i = 0; i < srv_n_file_io_threads; i++) {
05027 fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
05028 srv_io_thread_op_info[i],
05029 srv_io_thread_function[i]);
05030
05031 #ifndef __WIN__
05032 if (os_aio_segment_wait_events[i]->is_set) {
05033 fprintf(file, " ev set");
05034 }
05035 #endif
05036
05037 fprintf(file, "\n");
05038 }
05039
05040 fputs("Pending normal aio reads:", file);
05041
05042 array = os_aio_read_array;
05043 loop:
05044 ut_a(array);
05045
05046 os_mutex_enter(array->mutex);
05047
05048 ut_a(array->n_slots > 0);
05049 ut_a(array->n_segments > 0);
05050
05051 n_reserved = 0;
05052
05053 memset(n_res_seg, 0x0, sizeof(n_res_seg));
05054
05055 for (i = 0; i < array->n_slots; i++) {
05056 ulint seg_no;
05057
05058 slot = os_aio_array_get_nth_slot(array, i);
05059
05060 seg_no = (i * array->n_segments) / array->n_slots;
05061 if (slot->reserved) {
05062 n_reserved++;
05063 n_res_seg[seg_no]++;
05064 #if 0
05065 fprintf(stderr, "Reserved slot, messages %p %p\n",
05066 (void*) slot->message1,
05067 (void*) slot->message2);
05068 #endif
05069 ut_a(slot->len > 0);
05070 }
05071 }
05072
05073 ut_a(array->n_reserved == n_reserved);
05074
05075 fprintf(file, " %lu", (ulong) n_reserved);
05076
05077 os_aio_print_segment_info(file, n_res_seg, array);
05078
05079 os_mutex_exit(array->mutex);
05080
05081 if (array == os_aio_read_array) {
05082 fputs(", aio writes:", file);
05083
05084 array = os_aio_write_array;
05085
05086 goto loop;
05087 }
05088
05089 if (array == os_aio_write_array) {
05090 fputs(",\n ibuf aio reads:", file);
05091 array = os_aio_ibuf_array;
05092
05093 goto loop;
05094 }
05095
05096 if (array == os_aio_ibuf_array) {
05097 fputs(", log i/o's:", file);
05098 array = os_aio_log_array;
05099
05100 goto loop;
05101 }
05102
05103 if (array == os_aio_log_array) {
05104 fputs(", sync i/o's:", file);
05105 array = os_aio_sync_array;
05106
05107 goto loop;
05108 }
05109
05110 putc('\n', file);
05111 current_time = time(NULL);
05112 time_elapsed = 0.001 + difftime(current_time, os_last_printout);
05113
05114 fprintf(file,
05115 "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
05116 "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
05117 (ulong) fil_n_pending_log_flushes,
05118 (ulong) fil_n_pending_tablespace_flushes,
05119 (ulong) os_n_file_reads, (ulong) os_n_file_writes,
05120 (ulong) os_n_fsyncs);
05121
05122 if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
05123 fprintf(file,
05124 "%lu pending preads, %lu pending pwrites\n",
05125 (ulong) os_file_n_pending_preads,
05126 (ulong) os_file_n_pending_pwrites);
05127 }
05128
05129 if (os_n_file_reads == os_n_file_reads_old) {
05130 avg_bytes_read = 0.0;
05131 } else {
05132 avg_bytes_read = (double) os_bytes_read_since_printout
05133 / (os_n_file_reads - os_n_file_reads_old);
05134 }
05135
05136 fprintf(file,
05137 "%.2f reads/s, %lu avg bytes/read,"
05138 " %.2f writes/s, %.2f fsyncs/s\n",
05139 (os_n_file_reads - os_n_file_reads_old)
05140 / time_elapsed,
05141 (ulong)avg_bytes_read,
05142 (os_n_file_writes - os_n_file_writes_old)
05143 / time_elapsed,
05144 (os_n_fsyncs - os_n_fsyncs_old)
05145 / time_elapsed);
05146
05147 os_n_file_reads_old = os_n_file_reads;
05148 os_n_file_writes_old = os_n_file_writes;
05149 os_n_fsyncs_old = os_n_fsyncs;
05150 os_bytes_read_since_printout = 0;
05151
05152 os_last_printout = current_time;
05153 }
05154
05155
05157 UNIV_INTERN
05158 void
05159 os_aio_refresh_stats(void)
05160
05161 {
05162 os_n_file_reads_old = os_n_file_reads;
05163 os_n_file_writes_old = os_n_file_writes;
05164 os_n_fsyncs_old = os_n_fsyncs;
05165 os_bytes_read_since_printout = 0;
05166
05167 os_last_printout = time(NULL);
05168 }
05169
05170 #ifdef UNIV_DEBUG
05171
05175 UNIV_INTERN
05176 ibool
05177 os_aio_all_slots_free(void)
05178
05179 {
05180 os_aio_array_t* array;
05181 ulint n_res = 0;
05182
05183 array = os_aio_read_array;
05184
05185 os_mutex_enter(array->mutex);
05186
05187 n_res += array->n_reserved;
05188
05189 os_mutex_exit(array->mutex);
05190
05191 array = os_aio_write_array;
05192
05193 os_mutex_enter(array->mutex);
05194
05195 n_res += array->n_reserved;
05196
05197 os_mutex_exit(array->mutex);
05198
05199 array = os_aio_ibuf_array;
05200
05201 os_mutex_enter(array->mutex);
05202
05203 n_res += array->n_reserved;
05204
05205 os_mutex_exit(array->mutex);
05206
05207 array = os_aio_log_array;
05208
05209 os_mutex_enter(array->mutex);
05210
05211 n_res += array->n_reserved;
05212
05213 os_mutex_exit(array->mutex);
05214
05215 array = os_aio_sync_array;
05216
05217 os_mutex_enter(array->mutex);
05218
05219 n_res += array->n_reserved;
05220
05221 os_mutex_exit(array->mutex);
05222
05223 if (n_res == 0) {
05224
05225 return(TRUE);
05226 }
05227
05228 return(FALSE);
05229 }
05230 #endif
05231
05232 #endif