00001 /* Copyright (C) 2000-2002 MySQL AB 00002 Copyright (C) 2008 eBay, Inc 00003 00004 This program is free software; you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation; version 2 of the License. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 00016 00017 /* Implements various base dataspace-related functions - allocate, free, clear */ 00018 00019 #include "heap_priv.h" 00020 00021 #include <cassert> 00022 00023 00024 /* 00025 MySQL Heap tables keep data in arrays of fixed-size chunks. 00026 These chunks are organized into two groups of HP_BLOCK structures: 00027 - group1 contains indexes, with one HP_BLOCK per key 00028 (part of HP_KEYDEF) 00029 - group2 contains record data, with single HP_BLOCK 00030 for all records, referenced by HP_SHARE.recordspace.block 00031 00032 While columns used in index are usually small, other columns 00033 in the table may need to accomodate larger data. Typically, 00034 larger data is placed into VARCHAR or BLOB columns. With actual 00035 sizes varying, Heap Engine has to support variable-sized records 00036 in memory. Heap Engine implements the concept of dataspace 00037 (HP_DATASPACE), which incorporates HP_BLOCK for the record data, 00038 and adds more information for managing variable-sized records. 00039 00040 Variable-size records are stored in multiple "chunks", 00041 which means that a single record of data (database "row") can 00042 consist of multiple chunks organized into one "set". HP_BLOCK 00043 contains chunks. In variable-size format, one record 00044 is represented as one or many chunks, depending on the actual 00045 data, while in fixed-size mode, one record is always represented 00046 as one chunk. The index structures would always point to the first 00047 chunk in the chunkset. 00048 00049 At the time of table creation, Heap Engine attempts to find out 00050 if variable-size records are desired. A user can request 00051 variable-size records by providing either row_type=dynamic or 00052 block_size=NNN table create option. Heap Engine will check 00053 whether block_size provides enough space in the first chunk 00054 to keep all null bits and columns that are used in indexes. 00055 If block_size is too small, table creation will be aborted 00056 with an error. Heap Engine will revert to fixed-size allocation 00057 mode if block_size provides no memory benefits (no VARCHAR 00058 fields extending past first chunk). 00059 00060 In order to improve index search performance, Heap Engine needs 00061 to keep all null flags and all columns used as keys inside 00062 the first chunk of a chunkset. In particular, this means that 00063 all columns used as keys should be defined first in the table 00064 creation SQL. The length of data used by null bits and key columns 00065 is stored as fixed_data_length inside HP_SHARE. fixed_data_length 00066 will extend past last key column if more fixed-length fields can 00067 fit into the first chunk. 00068 00069 Variable-size records are necessary only in the presence 00070 of variable-size columns. Heap Engine will be looking for VARCHAR 00071 columns, which declare length of 32 or more. If no such columns 00072 are found, table will be switched to fixed-size format. You should 00073 always try to put such columns at the end of the table definition. 00074 00075 Whenever data is being inserted or updated in the table 00076 Heap Engine will calculate how many chunks are necessary. 00077 For insert operations, Heap Engine allocates new chunkset in 00078 the recordspace. For update operations it will modify length of 00079 the existing chunkset, unlinking unnecessary chunks at the end, 00080 or allocating and adding more if larger length is necessary. 00081 00082 When writing data to chunks or copying data back to record, 00083 Heap Engine will first copy fixed_data_length of data using single 00084 memcpy call. The rest of the columns are processed one-by-one. 00085 Non-VARCHAR columns are copied in their full format. VARCHAR's 00086 are copied based on their actual length. Any NULL values after 00087 fixed_data_length are skipped. 00088 00089 The allocation and contents of the actual chunks varies between 00090 fixed and variable-size modes. Total chunk length is always 00091 aligned to the next sizeof(unsigned char*). Here is the format of 00092 fixed-size chunk: 00093 unsigned char[] - sizeof=chunk_dataspace_length, but at least 00094 sizeof(unsigned char*) bytes. Keeps actual data or pointer 00095 to the next deleted chunk. 00096 chunk_dataspace_length equals to full record length 00097 unsigned char - status field (1 means "in use", 0 means "deleted") 00098 Variable-size uses different format: 00099 unsigned char[] - sizeof=chunk_dataspace_length, but at least 00100 sizeof(unsigned char*) bytes. Keeps actual data or pointer 00101 to the next deleted chunk. 00102 chunk_dataspace_length is set according to table 00103 setup (block_size) 00104 unsigned char* - pointer to the next chunk in this chunkset, 00105 or NULL for the last chunk 00106 unsigned char - status field (1 means "first", 0 means "deleted", 00107 2 means "linked") 00108 00109 When allocating a new chunkset of N chunks, Heap Engine will try 00110 to allocate chunks one-by-one, linking them as they become 00111 allocated. Allocation of a single chunk will attempt to reuse 00112 a deleted (freed) chunk. If no free chunks are available, 00113 it will attempt to allocate a new area inside HP_BLOCK. 00114 Freeing chunks will place them at the front of free list 00115 referenced by del_link in HP_DATASPACE. The newly freed chunk 00116 will contain reference to the previously freed chunk in its first 00117 sizeof(unsigned char*) of the payload space. 00118 00119 Here is open issues: 00120 1. It is not very nice to require people to keep key columns 00121 at the beginning of the table creation SQL. There are three 00122 proposed resolutions: 00123 a. Leave it as is. It's a reasonable limitation 00124 b. Add new HA_KEEP_KEY_COLUMNS_TO_FRONT flag to handler.h and 00125 make table.cpp align columns when it creates the table 00126 c. Make HeapEngine reorder columns in the chunk data, so that 00127 key columns go first. Add parallel HA_KEYSEG structures 00128 to distinguish positions in record vs. positions in 00129 the first chunk. Copy all data field-by-field rather than 00130 using single memcpy unless DBA kept key columns to 00131 the beginning. 00132 2. heap_check_heap needs verify linked chunks, looking for 00133 issues such as orphans, cycles, and bad links. However, 00134 Heap Engine today does not do similar things even for 00135 free list. 00136 3. With new HP_DATASPACE allocation mechaism, BLOB will become 00137 increasingly simple to implement, but I may not have time 00138 for that. In one approach, BLOB data can be placed at 00139 the end of the same record. In another approach (which I 00140 prefer) BLOB data would have its own HP_DATASPACE with 00141 variable-size entries. 00142 4. In a more sophisticated implementation, some space can 00143 be saved even with all fixed-size columns if many of them 00144 have NULL value, as long as these columns are not used 00145 in indexes 00146 5. In variable-size format status should be moved to lower 00147 bits of the "next" pointer. Pointer is always aligned 00148 to sizeof(unsigned char*), which is at least 4, leaving 2 lower 00149 bits free. This will save 8 bytes per chunk 00150 on 64-bit platform. 00151 6. As we do not want to modify FRM format, BLOCK_SIZE option 00152 of "CREATE TABLE" is saved as "RAID_CHUNKSIZE" for 00153 Heap Engine tables. 00154 */ 00155 00156 static unsigned char *hp_allocate_one_chunk(HP_DATASPACE *info); 00157 00158 00167 void hp_clear_dataspace(HP_DATASPACE *info) 00168 { 00169 if (info->block.levels) 00170 { 00171 hp_free_level(&info->block,info->block.levels,info->block.root, 00172 (unsigned char*) 0); 00173 } 00174 info->block.levels=0; 00175 info->del_chunk_count= info->chunk_count= 0; 00176 info->del_link=0; 00177 info->total_data_length= 0; 00178 } 00179 00180 00192 unsigned char *hp_allocate_chunkset(HP_DATASPACE *info, uint32_t ) 00193 { 00194 unsigned char* result; 00195 00196 result= hp_allocate_one_chunk(info); 00197 if (result) 00198 { 00199 result[info->offset_status]= CHUNK_STATUS_ACTIVE; 00200 } 00201 00202 return(result); 00203 } 00204 00205 00216 static unsigned char *hp_allocate_one_chunk(HP_DATASPACE *info) 00217 { 00218 unsigned char* curr_chunk; 00219 size_t length, block_pos; 00220 00221 if (info->del_link) 00222 { 00223 curr_chunk=info->del_link; 00224 info->del_link= *((unsigned char**) curr_chunk); 00225 info->del_chunk_count--; 00226 00227 return curr_chunk; 00228 } 00229 00230 block_pos= (info->chunk_count % info->block.records_in_block); 00231 if (!block_pos) 00232 { 00233 if (hp_get_new_block(&info->block,&length)) 00234 { 00235 /* no space in the current block */ 00236 return NULL; 00237 } 00238 00239 info->total_data_length+= length; 00240 } 00241 00242 info->chunk_count++; 00243 curr_chunk= ((unsigned char*) info->block.level_info[0].last_blocks + 00244 block_pos * info->block.recbuffer); 00245 00246 00247 return curr_chunk; 00248 } 00249 00250 00261 void hp_free_chunks(HP_DATASPACE *info, unsigned char *pos) 00262 { 00263 unsigned char* curr_chunk= pos; 00264 00265 if (curr_chunk) 00266 { 00267 info->del_chunk_count++; 00268 *((unsigned char**) curr_chunk)= info->del_link; 00269 info->del_link= curr_chunk; 00270 00271 curr_chunk[info->offset_status]= CHUNK_STATUS_DELETED; 00272 } 00273 }