Drizzled Public API Documentation

mem0pool.cc

00001 /*****************************************************************************
00002 
00003 Copyright (C) 1997, 2009, Innobase Oy. All Rights Reserved.
00004 
00005 This program is free software; you can redistribute it and/or modify it under
00006 the terms of the GNU General Public License as published by the Free Software
00007 Foundation; version 2 of the License.
00008 
00009 This program is distributed in the hope that it will be useful, but WITHOUT
00010 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00012 
00013 You should have received a copy of the GNU General Public License along with
00014 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00015 St, Fifth Floor, Boston, MA 02110-1301 USA
00016 
00017 *****************************************************************************/
00018 
00019 /********************************************************************/
00026 #include "mem0pool.h"
00027 #ifdef UNIV_NONINL
00028 #include "mem0pool.ic"
00029 #endif
00030 
00031 #include "srv0srv.h"
00032 #include "sync0sync.h"
00033 #include "ut0mem.h"
00034 #include "ut0lst.h"
00035 #include "ut0byte.h"
00036 #include "mem0mem.h"
00037 #include "srv0start.h"
00038 
00039 /* We would like to use also the buffer frames to allocate memory. This
00040 would be desirable, because then the memory consumption of the database
00041 would be fixed, and we might even lock the buffer pool to the main memory.
00042 The problem here is that the buffer management routines can themselves call
00043 memory allocation, while the buffer pool mutex is reserved.
00044 
00045 The main components of the memory consumption are:
00046 
00047 1. buffer pool,
00048 2. parsed and optimized SQL statements,
00049 3. data dictionary cache,
00050 4. log buffer,
00051 5. locks for each transaction,
00052 6. hash table for the adaptive index,
00053 7. state and buffers for each SQL query currently being executed,
00054 8. session for each user, and
00055 9. stack for each OS thread.
00056 
00057 Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
00058 consume very much memory. Items 7 and 8 should consume quite little memory,
00059 and the OS should take care of item 9, which too should consume little memory.
00060 
00061 A solution to the memory management:
00062 
00063 1. the buffer pool size is set separately;
00064 2. log buffer size is set separately;
00065 3. the common pool size for all the other entries, except 8, is set separately.
00066 
00067 Problems: we may waste memory if the common pool is set too big. Another
00068 problem is the locks, which may take very much space in big transactions.
00069 Then the shared pool size should be set very big. We can allow locks to take
00070 space from the buffer pool, but the SQL optimizer is then unaware of the
00071 usable size of the buffer pool. We could also combine the objects in the
00072 common pool and the buffers in the buffer pool into a single LRU list and
00073 manage it uniformly, but this approach does not take into account the parsing
00074 and other costs unique to SQL statements.
00075 
00076 The locks for a transaction can be seen as a part of the state of the
00077 transaction. Hence, they should be stored in the common pool. We still
00078 have the problem of a very big update transaction, for example, which
00079 will set very many x-locks on rows, and the locks will consume a lot
00080 of memory, say, half of the buffer pool size.
00081 
00082 Another problem is what to do if we are not able to malloc a requested
00083 block of memory from the common pool. Then we can request memory from
00084 the operating system. If it does not help, a system error results.
00085 
00086 Because 5 and 6 may potentially consume very much memory, we let them grow
00087 into the buffer pool. We may let the locks of a transaction take frames
00088 from the buffer pool, when the corresponding memory heap block has grown to
00089 the size of a buffer frame. Similarly for the hash node cells of the locks,
00090 and for the adaptive index. Thus, for each individual transaction, its locks
00091 can occupy at most about the size of the buffer frame of memory in the common
00092 pool, and after that its locks will grow into the buffer pool. */
00093 
00095 #define MEM_AREA_FREE 1
00096 
00098 #define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
00099 
00100 
00103 struct mem_pool_struct{
00104   byte*   buf;    
00105   ulint   size;   
00106   ulint   reserved; 
00108   mutex_t   mutex;    
00109   UT_LIST_BASE_NODE_T(mem_area_t)
00110       free_list[64];  
00113 };
00114 
00116 UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
00117 
00118 #ifdef UNIV_PFS_MUTEX
00119 /* Key to register mutex in mem_pool_struct with performance schema */
00120 UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
00121 #endif /* UNIV_PFS_MUTEX */
00122 
00123 /* We use this counter to check that the mem pool mutex does not leak;
00124 this is to track a strange assertion failure reported at
00125 mysql@lists.mysql.com */
00126 
00127 UNIV_INTERN ulint mem_n_threads_inside    = 0;
00128 
00129 /********************************************************************/
00133 UNIV_INLINE
00134 void
00135 mem_pool_mutex_enter(
00136 /*=================*/
00137   mem_pool_t* pool)   
00138 {
00139   if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
00140     mutex_enter(&(pool->mutex));
00141   }
00142 }
00143 
00144 /********************************************************************/
00148 UNIV_INLINE
00149 void
00150 mem_pool_mutex_exit(
00151 /*================*/
00152   mem_pool_t* pool)   
00153 {
00154   if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
00155     mutex_exit(&(pool->mutex));
00156   }
00157 }
00158 
00159 /********************************************************************/
00162 UNIV_INLINE
00163 ulint
00164 mem_area_get_size(
00165 /*==============*/
00166   mem_area_t* area) 
00167 {
00168   return(area->size_and_free & ~MEM_AREA_FREE);
00169 }
00170 
00171 /********************************************************************/
00173 UNIV_INLINE
00174 void
00175 mem_area_set_size(
00176 /*==============*/
00177   mem_area_t* area, 
00178   ulint   size) 
00179 {
00180   area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
00181     | size;
00182 }
00183 
00184 /********************************************************************/
00187 UNIV_INLINE
00188 ibool
00189 mem_area_get_free(
00190 /*==============*/
00191   mem_area_t* area) 
00192 {
00193 #if TRUE != MEM_AREA_FREE
00194 # error "TRUE != MEM_AREA_FREE"
00195 #endif
00196   return(area->size_and_free & MEM_AREA_FREE);
00197 }
00198 
00199 /********************************************************************/
00201 UNIV_INLINE
00202 void
00203 mem_area_set_free(
00204 /*==============*/
00205   mem_area_t* area, 
00206   ibool   free) 
00207 {
00208 #if TRUE != MEM_AREA_FREE
00209 # error "TRUE != MEM_AREA_FREE"
00210 #endif
00211   area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
00212     | free;
00213 }
00214 
00215 /********************************************************************/
00218 UNIV_INTERN
00219 mem_pool_t*
00220 mem_pool_create(
00221 /*============*/
00222   ulint size) 
00223 {
00224   mem_pool_t* pool;
00225   mem_area_t* area;
00226   ulint   i;
00227   ulint   used;
00228 
00229   pool = static_cast<mem_pool_t *>(ut_malloc(sizeof(mem_pool_t)));
00230 
00231   /* We do not set the memory to zero (FALSE) in the pool,
00232   but only when allocated at a higher level in mem0mem.c.
00233   This is to avoid masking useful Purify warnings. */
00234 
00235   pool->buf = static_cast<unsigned char *>(ut_malloc_low(size, FALSE, TRUE));
00236   pool->size = size;
00237 
00238   mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
00239 
00240   /* Initialize the free lists */
00241 
00242   for (i = 0; i < 64; i++) {
00243 
00244     UT_LIST_INIT(pool->free_list[i]);
00245   }
00246 
00247   used = 0;
00248 
00249   while (size - used >= MEM_AREA_MIN_SIZE) {
00250 
00251     i = ut_2_log(size - used);
00252 
00253     if (ut_2_exp(i) > size - used) {
00254 
00255       /* ut_2_log rounds upward */
00256 
00257       i--;
00258     }
00259 
00260     area = (mem_area_t*)(pool->buf + used);
00261 
00262     mem_area_set_size(area, ut_2_exp(i));
00263     mem_area_set_free(area, TRUE);
00264     UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
00265             ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
00266 
00267     UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
00268 
00269     used = used + ut_2_exp(i);
00270   }
00271 
00272   ut_ad(size >= used);
00273 
00274   pool->reserved = 0;
00275 
00276   return(pool);
00277 }
00278 
00279 /********************************************************************/
00281 UNIV_INTERN
00282 void
00283 mem_pool_free(
00284 /*==========*/
00285   mem_pool_t* pool) 
00286 {
00287   ut_free(pool->buf);
00288   ut_free(pool);
00289 }
00290 
00291 /********************************************************************/
00294 static
00295 ibool
00296 mem_pool_fill_free_list(
00297 /*====================*/
00298   ulint   i,  
00299   mem_pool_t* pool) 
00300 {
00301   mem_area_t* area;
00302   mem_area_t* area2;
00303   ibool   ret;
00304 
00305   ut_ad(mutex_own(&(pool->mutex)));
00306 
00307   if (UNIV_UNLIKELY(i >= 63)) {
00308     /* We come here when we have run out of space in the
00309     memory pool: */
00310 
00311     return(FALSE);
00312   }
00313 
00314   area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
00315 
00316   if (area == NULL) {
00317     if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
00318       ut_print_timestamp(stderr);
00319 
00320       fprintf(stderr,
00321         "  InnoDB: Error: mem pool free list %lu"
00322         " length is %lu\n"
00323         "InnoDB: though the list is empty!\n",
00324         (ulong) i + 1,
00325         (ulong)
00326         UT_LIST_GET_LEN(pool->free_list[i + 1]));
00327     }
00328 
00329     ret = mem_pool_fill_free_list(i + 1, pool);
00330 
00331     if (ret == FALSE) {
00332 
00333       return(FALSE);
00334     }
00335 
00336     area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
00337   }
00338 
00339   if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
00340     mem_analyze_corruption(area);
00341 
00342     ut_error;
00343   }
00344 
00345   UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
00346 
00347   area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
00348   UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
00349 
00350   mem_area_set_size(area2, ut_2_exp(i));
00351   mem_area_set_free(area2, TRUE);
00352 
00353   UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
00354 
00355   mem_area_set_size(area, ut_2_exp(i));
00356 
00357   UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
00358 
00359   return(TRUE);
00360 }
00361 
00362 /********************************************************************/
00366 UNIV_INTERN
00367 void*
00368 mem_area_alloc(
00369 /*===========*/
00370   ulint*    psize,  
00375   mem_pool_t* pool) 
00376 {
00377   mem_area_t* area;
00378   ulint   size;
00379   ulint   n;
00380   ibool   ret;
00381 
00382   /* If we are using os allocator just make a simple call
00383   to malloc */
00384   if (UNIV_LIKELY(srv_use_sys_malloc)) {
00385     return(malloc(*psize));
00386   }
00387 
00388   size = *psize;
00389   n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
00390 
00391   mutex_enter(&(pool->mutex));
00392   mem_n_threads_inside++;
00393 
00394   ut_a(mem_n_threads_inside == 1);
00395 
00396   area = UT_LIST_GET_FIRST(pool->free_list[n]);
00397 
00398   if (area == NULL) {
00399     ret = mem_pool_fill_free_list(n, pool);
00400 
00401     if (ret == FALSE) {
00402       /* Out of memory in memory pool: we try to allocate
00403       from the operating system with the regular malloc: */
00404 
00405       mem_n_threads_inside--;
00406       mutex_exit(&(pool->mutex));
00407 
00408       return(ut_malloc(size));
00409     }
00410 
00411     area = UT_LIST_GET_FIRST(pool->free_list[n]);
00412   }
00413 
00414   if (!mem_area_get_free(area)) {
00415     fprintf(stderr,
00416       "InnoDB: Error: Removing element from mem pool"
00417       " free list %lu though the\n"
00418       "InnoDB: element is not marked free!\n",
00419       (ulong) n);
00420 
00421     mem_analyze_corruption(area);
00422 
00423     /* Try to analyze a strange assertion failure reported at
00424     mysql@lists.mysql.com where the free bit IS 1 in the
00425     hex dump above */
00426 
00427     if (mem_area_get_free(area)) {
00428       fprintf(stderr,
00429         "InnoDB: Probably a race condition"
00430         " because now the area is marked free!\n");
00431     }
00432 
00433     ut_error;
00434   }
00435 
00436   if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
00437     fprintf(stderr,
00438       "InnoDB: Error: Removing element from mem pool"
00439       " free list %lu\n"
00440       "InnoDB: though the list length is 0!\n",
00441       (ulong) n);
00442     mem_analyze_corruption(area);
00443 
00444     ut_error;
00445   }
00446 
00447   ut_ad(mem_area_get_size(area) == ut_2_exp(n));
00448 
00449   mem_area_set_free(area, FALSE);
00450 
00451   UT_LIST_REMOVE(free_list, pool->free_list[n], area);
00452 
00453   pool->reserved += mem_area_get_size(area);
00454 
00455   mem_n_threads_inside--;
00456   mutex_exit(&(pool->mutex));
00457 
00458   ut_ad(mem_pool_validate(pool));
00459 
00460   *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
00461   UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);
00462 
00463   return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
00464 }
00465 
00466 /********************************************************************/
00469 UNIV_INLINE
00470 mem_area_t*
00471 mem_area_get_buddy(
00472 /*===============*/
00473   mem_area_t* area, 
00474   ulint   size, 
00475   mem_pool_t* pool) 
00476 {
00477   mem_area_t* buddy;
00478 
00479   ut_ad(size != 0);
00480 
00481   if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
00482 
00483     /* The buddy is in a higher address */
00484 
00485     buddy = (mem_area_t*)(((byte*)area) + size);
00486 
00487     if ((((byte*)buddy) - pool->buf) + size > pool->size) {
00488 
00489       /* The buddy is not wholly contained in the pool:
00490       there is no buddy */
00491 
00492       buddy = NULL;
00493     }
00494   } else {
00495     /* The buddy is in a lower address; NOTE that area cannot
00496     be at the pool lower end, because then we would end up to
00497     the upper branch in this if-clause: the remainder would be
00498     0 */
00499 
00500     buddy = (mem_area_t*)(((byte*)area) - size);
00501   }
00502 
00503   return(buddy);
00504 }
00505 
00506 /********************************************************************/
00508 UNIV_INTERN
00509 void
00510 mem_area_free(
00511 /*==========*/
00512   void*   ptr,  
00514   mem_pool_t* pool) 
00515 {
00516   mem_area_t* area;
00517   mem_area_t* buddy;
00518   void*   new_ptr;
00519   ulint   size;
00520   ulint   n;
00521 
00522   if (UNIV_LIKELY(srv_use_sys_malloc)) {
00523     free(ptr);
00524 
00525     return;
00526   }
00527 
00528   /* It may be that the area was really allocated from the OS with
00529   regular malloc: check if ptr points within our memory pool */
00530 
00531   if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
00532     ut_free(ptr);
00533 
00534     return;
00535   }
00536 
00537   area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
00538 
00539   if (mem_area_get_free(area)) {
00540     fprintf(stderr,
00541       "InnoDB: Error: Freeing element to mem pool"
00542       " free list though the\n"
00543       "InnoDB: element is marked free!\n");
00544 
00545     mem_analyze_corruption(area);
00546     ut_error;
00547   }
00548 
00549   size = mem_area_get_size(area);
00550   UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
00551 
00552   if (size == 0) {
00553     fprintf(stderr,
00554       "InnoDB: Error: Mem area size is 0. Possibly a"
00555       " memory overrun of the\n"
00556       "InnoDB: previous allocated area!\n");
00557 
00558     mem_analyze_corruption(area);
00559     ut_error;
00560   }
00561 
00562 #ifdef UNIV_LIGHT_MEM_DEBUG
00563   if (((byte*)area) + size < pool->buf + pool->size) {
00564 
00565     ulint next_size;
00566 
00567     next_size = mem_area_get_size(
00568       (mem_area_t*)(((byte*)area) + size));
00569     if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
00570       fprintf(stderr,
00571         "InnoDB: Error: Memory area size %lu,"
00572         " next area size %lu not a power of 2!\n"
00573         "InnoDB: Possibly a memory overrun of"
00574         " the buffer being freed here.\n",
00575         (ulong) size, (ulong) next_size);
00576       mem_analyze_corruption(area);
00577 
00578       ut_error;
00579     }
00580   }
00581 #endif
00582   buddy = mem_area_get_buddy(area, size, pool);
00583 
00584   n = ut_2_log(size);
00585 
00586   mem_pool_mutex_enter(pool);
00587   mem_n_threads_inside++;
00588 
00589   ut_a(mem_n_threads_inside == 1);
00590 
00591   if (buddy && mem_area_get_free(buddy)
00592       && (size == mem_area_get_size(buddy))) {
00593 
00594     /* The buddy is in a free list */
00595 
00596     if ((byte*)buddy < (byte*)area) {
00597       new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
00598 
00599       mem_area_set_size(buddy, 2 * size);
00600       mem_area_set_free(buddy, FALSE);
00601     } else {
00602       new_ptr = ptr;
00603 
00604       mem_area_set_size(area, 2 * size);
00605     }
00606 
00607     /* Remove the buddy from its free list and merge it to area */
00608 
00609     UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
00610 
00611     pool->reserved += ut_2_exp(n);
00612 
00613     mem_n_threads_inside--;
00614     mem_pool_mutex_exit(pool);
00615 
00616     mem_area_free(new_ptr, pool);
00617 
00618     return;
00619   } else {
00620     UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
00621 
00622     mem_area_set_free(area, TRUE);
00623 
00624     ut_ad(pool->reserved >= size);
00625 
00626     pool->reserved -= size;
00627   }
00628 
00629   mem_n_threads_inside--;
00630   mem_pool_mutex_exit(pool);
00631 
00632   ut_ad(mem_pool_validate(pool));
00633 }
00634 
00635 /********************************************************************/
00638 UNIV_INTERN
00639 ibool
00640 mem_pool_validate(
00641 /*==============*/
00642   mem_pool_t* pool) 
00643 {
00644   mem_area_t* area;
00645   mem_area_t* buddy;
00646   ulint   free;
00647   ulint   i;
00648 
00649   mem_pool_mutex_enter(pool);
00650 
00651   free = 0;
00652 
00653   for (i = 0; i < 64; i++) {
00654 
00655     UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i],
00656          (void) 0);
00657 
00658     area = UT_LIST_GET_FIRST(pool->free_list[i]);
00659 
00660     while (area != NULL) {
00661       ut_a(mem_area_get_free(area));
00662       ut_a(mem_area_get_size(area) == ut_2_exp(i));
00663 
00664       buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
00665 
00666       ut_a(!buddy || !mem_area_get_free(buddy)
00667            || (ut_2_exp(i) != mem_area_get_size(buddy)));
00668 
00669       area = UT_LIST_GET_NEXT(free_list, area);
00670 
00671       free += ut_2_exp(i);
00672     }
00673   }
00674 
00675   ut_a(free + pool->reserved == pool->size);
00676 
00677   mem_pool_mutex_exit(pool);
00678 
00679   return(TRUE);
00680 }
00681 
00682 /********************************************************************/
00684 UNIV_INTERN
00685 void
00686 mem_pool_print_info(
00687 /*================*/
00688   FILE*   outfile,
00689   mem_pool_t* pool) 
00690 {
00691   ulint   i;
00692 
00693   mem_pool_validate(pool);
00694 
00695   fprintf(outfile, "INFO OF A MEMORY POOL\n");
00696 
00697   mutex_enter(&(pool->mutex));
00698 
00699   for (i = 0; i < 64; i++) {
00700     if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
00701 
00702       fprintf(outfile,
00703         "Free list length %lu for"
00704         " blocks of size %lu\n",
00705         (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
00706         (ulong) ut_2_exp(i));
00707     }
00708   }
00709 
00710   fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
00711     (ulong) pool->reserved);
00712   mutex_exit(&(pool->mutex));
00713 }
00714 
00715 /********************************************************************/
00718 UNIV_INTERN
00719 ulint
00720 mem_pool_get_reserved(
00721 /*==================*/
00722   mem_pool_t* pool) 
00723 {
00724   ulint reserved;
00725 
00726   mutex_enter(&(pool->mutex));
00727 
00728   reserved = pool->reserved;
00729 
00730   mutex_exit(&(pool->mutex));
00731 
00732   return(reserved);
00733 }