Drizzled Public API Documentation

charset_info.h

00001 /* Copyright (C) 2000 MySQL AB
00002 
00003    This program is free software; you can redistribute it and/or modify
00004    it under the terms of the GNU General Public License as published by
00005    the Free Software Foundation; version 2 of the License.
00006 
00007    This program is distributed in the hope that it will be useful,
00008    but WITHOUT ANY WARRANTY; without even the implied warranty of
00009    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00010    GNU General Public License for more details.
00011 
00012    You should have received a copy of the GNU General Public License
00013    along with this program; if not, write to the Free Software
00014    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
00015 
00016 /*
00017   A better inplementation of the UNIX ctype(3) library.
00018 */
00019 
00020 #pragma once
00021 
00022 #include <sys/types.h>
00023 #include <cstddef>
00024 
00025 #include <drizzled/visibility.h>
00026 
00027 namespace drizzled
00028 {
00029 
00030 #define MY_CS_NAME_SIZE     32
00031 #define MY_CS_CTYPE_TABLE_SIZE    257
00032 #define MY_CS_TO_LOWER_TABLE_SIZE 256
00033 #define MY_CS_TO_UPPER_TABLE_SIZE 256
00034 #define MY_CS_SORT_ORDER_TABLE_SIZE 256
00035 #define MY_CS_TO_UNI_TABLE_SIZE   256
00036 
00037 #define CHARSET_DIR "charsets/"
00038 
00039 #define my_wc_t unsigned long
00040 
00041 typedef struct unicase_info_st
00042 {
00043   uint16_t toupper;
00044   uint16_t tolower;
00045   uint16_t sort;
00046 } MY_UNICASE_INFO;
00047 
00048 
00049 extern MY_UNICASE_INFO *my_unicase_default[256];
00050 extern MY_UNICASE_INFO *my_unicase_turkish[256];
00051 
00052 typedef struct uni_ctype_st
00053 {
00054   unsigned char  pctype;
00055   unsigned char  *ctype;
00056 } MY_UNI_CTYPE;
00057 
00058 extern MY_UNI_CTYPE my_uni_ctype[256];
00059 
00060 /* wm_wc and wc_mb return codes */
00061 #define MY_CS_ILSEQ 0     /* Wrong by sequence: wb_wc                   */
00062 #define MY_CS_ILUNI 0     /* Cannot encode Unicode to charset: wc_mb    */
00063 #define MY_CS_TOOSMALL  -101  /* Need at least one byte:    wc_mb and mb_wc */
00064 #define MY_CS_TOOSMALL2 -102  /* Need at least two bytes:   wc_mb and mb_wc */
00065 #define MY_CS_TOOSMALL3 -103  /* Need at least three bytes: wc_mb and mb_wc */
00066 /* These following three are currently not really used */
00067 #define MY_CS_TOOSMALL4 -104  /* Need at least 4 bytes: wc_mb and mb_wc */
00068 #define MY_CS_TOOSMALL5 -105  /* Need at least 5 bytes: wc_mb and mb_wc */
00069 #define MY_CS_TOOSMALL6 -106  /* Need at least 6 bytes: wc_mb and mb_wc */
00070 
00071 /* A helper function for "need at least n bytes" */
00072 inline static int my_cs_toosmalln(int n)
00073 {
00074   return -100-n;
00075 }
00076 
00077 #define MY_SEQ_INTTAIL  1
00078 #define MY_SEQ_SPACES 2
00079 
00080         /* My charsets_list flags */
00081 #define MY_CS_COMPILED  1      /* compiled-in sets               */
00082 #define MY_CS_CONFIG    2      /* sets that have a *.conf file   */
00083 #define MY_CS_INDEX     4      /* sets listed in the Index file  */
00084 #define MY_CS_LOADED    8      /* sets that are currently loaded */
00085 #define MY_CS_BINSORT 16     /* if binary sort order           */
00086 #define MY_CS_PRIMARY 32     /* if primary collation           */
00087 #define MY_CS_STRNXFRM  64     /* if strnxfrm is used for sort   */
00088 #define MY_CS_UNICODE 128    /* is a charset is full unicode   */
00089 #define MY_CS_READY 256    /* if a charset is initialized    */
00090 #define MY_CS_AVAILABLE 512    /* If either compiled-in or loaded*/
00091 #define MY_CS_CSSORT  1024   /* if case sensitive sort order   */
00092 #define MY_CS_HIDDEN  2048   /* don't display in SHOW          */
00093 #define MY_CS_NONASCII  8192   /* if not ASCII-compatible        */
00094 #define MY_CHARSET_UNDEFINED 0
00095 
00096 /* Flags for strxfrm */
00097 #define MY_STRXFRM_LEVEL1          0x00000001 /* for primary weights   */
00098 #define MY_STRXFRM_LEVEL2          0x00000002 /* for secondary weights */
00099 #define MY_STRXFRM_LEVEL3          0x00000004 /* for tertiary weights  */
00100 #define MY_STRXFRM_LEVEL4          0x00000008 /* fourth level weights  */
00101 #define MY_STRXFRM_LEVEL5          0x00000010 /* fifth level weights   */
00102 #define MY_STRXFRM_LEVEL6          0x00000020 /* sixth level weights   */
00103 #define MY_STRXFRM_LEVEL_ALL       0x0000003F /* Bit OR for the above six */
00104 #define MY_STRXFRM_NLEVELS         6          /* Number of possible levels*/
00105 
00106 #define MY_STRXFRM_PAD_WITH_SPACE  0x00000040 /* if pad result with spaces */
00107 #define MY_STRXFRM_UNUSED_00000080 0x00000080 /* for future extensions     */
00108 
00109 #define MY_STRXFRM_DESC_LEVEL1     0x00000100 /* if desc order for level1 */
00110 #define MY_STRXFRM_DESC_LEVEL2     0x00000200 /* if desc order for level2 */
00111 #define MY_STRXFRM_DESC_LEVEL3     0x00000300 /* if desc order for level3 */
00112 #define MY_STRXFRM_DESC_LEVEL4     0x00000800 /* if desc order for level4 */
00113 #define MY_STRXFRM_DESC_LEVEL5     0x00001000 /* if desc order for level5 */
00114 #define MY_STRXFRM_DESC_LEVEL6     0x00002000 /* if desc order for level6 */
00115 #define MY_STRXFRM_DESC_SHIFT      8
00116 
00117 #define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions     */
00118 #define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions     */
00119 
00120 #define MY_STRXFRM_REVERSE_LEVEL1  0x00010000 /* if reverse order for level1 */
00121 #define MY_STRXFRM_REVERSE_LEVEL2  0x00020000 /* if reverse order for level2 */
00122 #define MY_STRXFRM_REVERSE_LEVEL3  0x00040000 /* if reverse order for level3 */
00123 #define MY_STRXFRM_REVERSE_LEVEL4  0x00080000 /* if reverse order for level4 */
00124 #define MY_STRXFRM_REVERSE_LEVEL5  0x00100000 /* if reverse order for level5 */
00125 #define MY_STRXFRM_REVERSE_LEVEL6  0x00200000 /* if reverse order for level6 */
00126 #define MY_STRXFRM_REVERSE_SHIFT   16
00127 
00128 
00129 typedef struct my_uni_idx_st
00130 {
00131   uint16_t from;
00132   uint16_t to;
00133   unsigned char  *tab;
00134 } MY_UNI_IDX;
00135 
00136 typedef struct
00137 {
00138   uint32_t beg;
00139   uint32_t end;
00140   uint32_t mb_len;
00141 } my_match_t;
00142 
00143 enum my_lex_states
00144 {
00145   MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
00146   MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
00147   MY_LEX_REAL, MY_LEX_HEX_NUMBER, MY_LEX_BIN_NUMBER,
00148   MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
00149   MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
00150   MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
00151   MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
00152   MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
00153   MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
00154   MY_LEX_IDENT_OR_KEYWORD,
00155   MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN,
00156   MY_LEX_STRING_OR_DELIMITER
00157 };
00158 
00159 struct charset_info_st;
00160 
00161 
00162 /* See strings/CHARSET_INFO.txt for information about this structure  */
00163 typedef struct my_collation_handler_st
00164 {
00165   bool (*init)(struct charset_info_st *, unsigned char *(*alloc)(size_t));
00166   /* Collation routines */
00167   int     (*strnncoll)(const struct charset_info_st * const,
00168            const unsigned char *, size_t, const unsigned char *, size_t, bool);
00169   int     (*strnncollsp)(const struct charset_info_st * const,
00170                          const unsigned char *, size_t, const unsigned char *, size_t,
00171                          bool diff_if_only_endspace_difference);
00172   size_t  (*strnxfrm)(const struct charset_info_st * const,
00173                       unsigned char *dst, size_t dstlen, uint32_t nweights,
00174                       const unsigned char *src, size_t srclen, uint32_t flags);
00175   size_t    (*strnxfrmlen)(const struct charset_info_st * const, size_t);
00176   bool (*like_range)(const struct charset_info_st * const,
00177                         const char *s, size_t s_length,
00178                         char escape, char w_one, char w_many,
00179                         size_t res_length,
00180                         char *min_str, char *max_str,
00181                         size_t *min_len, size_t *max_len);
00182   int     (*wildcmp)(const struct charset_info_st * const,
00183            const char *str,const char *str_end,
00184                      const char *wildstr,const char *wildend,
00185                      int escape,int w_one, int w_many);
00186 
00187   int  (*strcasecmp)(const struct charset_info_st * const, const char *, const char *);
00188 
00189   uint32_t (*instr)(const struct charset_info_st * const,
00190                 const char *b, size_t b_length,
00191                 const char *s, size_t s_length,
00192                 my_match_t *match, uint32_t nmatch);
00193 
00194   /* Hash calculation */
00195   void (*hash_sort)(const struct charset_info_st *cs, const unsigned char *key, size_t len,
00196                     uint32_t *nr1, uint32_t *nr2);
00197   bool (*propagate)(const struct charset_info_st *cs, const unsigned char *str, size_t len);
00198 } MY_COLLATION_HANDLER;
00199 
00200 extern MY_COLLATION_HANDLER my_collation_mb_bin_handler;
00201 extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
00202 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
00203 
00204 /* Some typedef to make it easy for C++ to make function pointers */
00205 typedef int (*my_charset_conv_mb_wc)(const struct charset_info_st * const, my_wc_t *,
00206                                      const unsigned char *, const unsigned char *);
00207 typedef int (*my_charset_conv_wc_mb)(const struct charset_info_st * const, my_wc_t,
00208                                      unsigned char *, unsigned char *);
00209 typedef size_t (*my_charset_conv_case)(const struct charset_info_st * const,
00210                                        char *, size_t, char *, size_t);
00211 
00212 
00213 /* See strings/CHARSET_INFO.txt about information on this structure  */
00214 typedef struct my_charset_handler_st
00215 {
00216   bool (*init)(struct charset_info_st *, unsigned char *(*alloc)(size_t));
00217   /* Multibyte routines */
00218   uint32_t    (*ismbchar)(const struct charset_info_st * const, const char *, const char *);
00219   uint32_t    (*mbcharlen)(const struct charset_info_st * const, uint32_t c);
00220   size_t  (*numchars)(const struct charset_info_st * const, const char *b, const char *e);
00221   size_t  (*charpos)(const struct charset_info_st * const, const char *b, const char *e,
00222                      size_t pos);
00223   size_t  (*well_formed_len)(const struct charset_info_st * const,
00224                              const char *b,const char *e,
00225                              size_t nchars, int *error);
00226   size_t  (*lengthsp)(const struct charset_info_st * const, const char *ptr, size_t length);
00227   size_t  (*numcells)(const struct charset_info_st * const, const char *b, const char *e);
00228 
00229   /* Unicode conversion */
00230   my_charset_conv_mb_wc mb_wc;
00231   my_charset_conv_wc_mb wc_mb;
00232 
00233   /* CTYPE scanner */
00234   int (*ctype)(const struct charset_info_st *cs, int *ctype,
00235                const unsigned char *s, const unsigned char *e);
00236 
00237   /* Functions for case and sort conversion */
00238   size_t  (*caseup_str)(const struct charset_info_st * const, char *);
00239   size_t  (*casedn_str)(const struct charset_info_st * const, char *);
00240 
00241   my_charset_conv_case caseup;
00242   my_charset_conv_case casedn;
00243 
00244   /* Charset dependant snprintf() */
00245   size_t (*snprintf)(const struct charset_info_st * const, char *to, size_t n,
00246                      const char *fmt,
00247                      ...)
00248 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
00249                          __attribute__((format(printf, 4, 5)))
00250 #endif
00251                          ;
00252   size_t (*long10_to_str)(const struct charset_info_st * const, char *to, size_t n,
00253                           int radix, long int val);
00254   size_t (*int64_t10_to_str)(const struct charset_info_st * const, char *to, size_t n,
00255                               int radix, int64_t val);
00256 
00257   void (*fill)(const struct charset_info_st * const, char *to, size_t len, int fill);
00258 
00259   /* String-to-number conversion routines */
00260   long        (*strntol)(const struct charset_info_st * const, const char *s, size_t l,
00261        int base, char **e, int *err);
00262   unsigned long      (*strntoul)(const struct charset_info_st * const, const char *s, size_t l,
00263        int base, char **e, int *err);
00264   int64_t   (*strntoll)(const struct charset_info_st * const, const char *s, size_t l,
00265        int base, char **e, int *err);
00266   uint64_t (*strntoull)(const struct charset_info_st * const, const char *s, size_t l,
00267        int base, char **e, int *err);
00268   double      (*strntod)(const struct charset_info_st * const, char *s, size_t l, char **e,
00269        int *err);
00270   int64_t    (*strtoll10)(const struct charset_info_st *cs,
00271                            const char *nptr, char **endptr, int *error);
00272   uint64_t   (*strntoull10rnd)(const struct charset_info_st *cs,
00273                                 const char *str, size_t length,
00274                                 int unsigned_fl,
00275                                 char **endptr, int *error);
00276   size_t        (*scan)(const struct charset_info_st * const, const char *b, const char *e,
00277                         int sq);
00278 } MY_CHARSET_HANDLER;
00279 
00280 extern MY_CHARSET_HANDLER my_charset_8bit_handler;
00281 extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
00282 
00283 
00284 /* See strings/CHARSET_INFO.txt about information on this structure  */
00285 typedef struct charset_info_st
00286 {
00287   uint32_t      number;
00288   uint32_t      primary_number;
00289   uint32_t      binary_number;
00290   uint32_t      state;
00291   const char *csname;
00292   const char *name;
00293   const char *comment;
00294   const char *tailoring;
00295   unsigned char    *ctype;
00296   unsigned char    *to_lower;
00297   unsigned char    *to_upper;
00298   unsigned char    *sort_order;
00299   uint16_t   *contractions;
00300   uint16_t   **sort_order_big;
00301   uint16_t      *tab_to_uni;
00302   MY_UNI_IDX  *tab_from_uni;
00303   MY_UNICASE_INFO **caseinfo;
00304   unsigned char     *state_map;
00305   unsigned char     *ident_map;
00306   uint32_t      strxfrm_multiply;
00307   unsigned char     caseup_multiply;
00308   unsigned char     casedn_multiply;
00309   uint32_t      mbminlen;
00310   uint32_t      mbmaxlen;
00311   uint16_t    min_sort_char;
00312   uint16_t    max_sort_char; /* For LIKE optimization */
00313   unsigned char     pad_char;
00314   bool   escape_with_backslash_is_dangerous;
00315   unsigned char     levels_for_compare;
00316   unsigned char     levels_for_order;
00317 
00318   MY_CHARSET_HANDLER *cset;
00319   MY_COLLATION_HANDLER *coll;
00320 
00321 } CHARSET_INFO;
00322 
00323 #define ILLEGAL_CHARSET_INFO_NUMBER (UINT32_MAX)
00324 
00325 
00326 extern DRIZZLED_API CHARSET_INFO my_charset_bin;
00327 extern DRIZZLED_API CHARSET_INFO my_charset_utf8mb4_bin;
00328 extern DRIZZLED_API CHARSET_INFO my_charset_utf8mb4_general_ci;
00329 extern DRIZZLED_API CHARSET_INFO my_charset_utf8mb4_unicode_ci;
00330 
00331 #define MY_UTF8MB4                 "utf8"
00332 #define my_charset_utf8_general_ci ::drizzled::my_charset_utf8mb4_general_ci
00333 #define my_charset_utf8_bin        ::drizzled::my_charset_utf8mb4_bin
00334 
00335 
00336 /* declarations for simple charsets */
00337 size_t my_strnxfrmlen_simple(const CHARSET_INFO * const, size_t);
00338 
00339 extern int  my_strnncollsp_simple(const CHARSET_INFO * const, const unsigned char *, size_t,
00340                                   const unsigned char *, size_t,
00341                                   bool diff_if_only_endspace_difference);
00342 
00343 extern size_t my_lengthsp_8bit(const CHARSET_INFO * const cs, const char *ptr, size_t length);
00344 
00345 extern uint32_t my_instr_simple(const CHARSET_INFO * const,
00346                             const char *b, size_t b_length,
00347                             const char *s, size_t s_length,
00348                             my_match_t *match, uint32_t nmatch);
00349 
00350 
00351 /* Functions for 8bit */
00352 int my_mb_ctype_8bit(const CHARSET_INFO * const,int *, const unsigned char *,const unsigned char *);
00353 int my_mb_ctype_mb(const CHARSET_INFO * const,int *, const unsigned char *,const unsigned char *);
00354 
00355 size_t my_scan_8bit(const CHARSET_INFO * const cs, const char *b, const char *e, int sq);
00356 
00357 size_t my_snprintf_8bit(const CHARSET_INFO * const, char *to, size_t n,
00358                         const char *fmt, ...)
00359   __attribute__((format(printf, 4, 5)));
00360 
00361 long       my_strntol_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
00362                            char **e, int *err);
00363 unsigned long      my_strntoul_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
00364           char **e, int *err);
00365 int64_t   my_strntoll_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
00366           char **e, int *err);
00367 uint64_t my_strntoull_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
00368           char **e, int *err);
00369 double      my_strntod_8bit(const CHARSET_INFO * const, char *s, size_t l,char **e,
00370           int *err);
00371 size_t my_long10_to_str_8bit(const CHARSET_INFO * const, char *to, size_t l, int radix,
00372                              long int val);
00373 size_t my_int64_t10_to_str_8bit(const CHARSET_INFO * const, char *to, size_t l, int radix,
00374                                  int64_t val);
00375 
00376 int64_t my_strtoll10_8bit(const CHARSET_INFO * const cs,
00377                            const char *nptr, char **endptr, int *error);
00378 int64_t my_strtoll10_ucs2(CHARSET_INFO *cs,
00379                            const char *nptr, char **endptr, int *error);
00380 
00381 uint64_t my_strntoull10rnd_8bit(const CHARSET_INFO * const cs,
00382                                  const char *str, size_t length, int
00383                                  unsigned_fl, char **endptr, int *error);
00384 uint64_t my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
00385                                  const char *str, size_t length,
00386                                  int unsigned_fl, char **endptr, int *error);
00387 
00388 void my_fill_8bit(const CHARSET_INFO * const cs, char* to, size_t l, int fill);
00389 
00390 bool  my_like_range_simple(const CHARSET_INFO * const cs,
00391             const char *ptr, size_t ptr_length,
00392             char escape, char w_one, char w_many,
00393             size_t res_length,
00394             char *min_str, char *max_str,
00395             size_t *min_length, size_t *max_length);
00396 
00397 bool  my_like_range_mb(const CHARSET_INFO * const cs,
00398         const char *ptr, size_t ptr_length,
00399         char escape, char w_one, char w_many,
00400         size_t res_length,
00401         char *min_str, char *max_str,
00402         size_t *min_length, size_t *max_length);
00403 
00404 bool  my_like_range_ucs2(const CHARSET_INFO * const cs,
00405           const char *ptr, size_t ptr_length,
00406           char escape, char w_one, char w_many,
00407           size_t res_length,
00408           char *min_str, char *max_str,
00409           size_t *min_length, size_t *max_length);
00410 
00411 bool  my_like_range_utf16(const CHARSET_INFO * const cs,
00412            const char *ptr, size_t ptr_length,
00413            char escape, char w_one, char w_many,
00414            size_t res_length,
00415            char *min_str, char *max_str,
00416            size_t *min_length, size_t *max_length);
00417 
00418 bool  my_like_range_utf32(const CHARSET_INFO * const cs,
00419            const char *ptr, size_t ptr_length,
00420            char escape, char w_one, char w_many,
00421            size_t res_length,
00422            char *min_str, char *max_str,
00423            size_t *min_length, size_t *max_length);
00424 
00425 
00426 int my_wildcmp_8bit(const CHARSET_INFO * const,
00427         const char *str,const char *str_end,
00428         const char *wildstr,const char *wildend,
00429         int escape, int w_one, int w_many);
00430 
00431 int my_wildcmp_bin(const CHARSET_INFO * const,
00432        const char *str,const char *str_end,
00433        const char *wildstr,const char *wildend,
00434        int escape, int w_one, int w_many);
00435 
00436 size_t my_numchars_8bit(const CHARSET_INFO * const, const char *b, const char *e);
00437 size_t my_numcells_8bit(const CHARSET_INFO * const, const char *b, const char *e);
00438 size_t my_charpos_8bit(const CHARSET_INFO * const, const char *b, const char *e, size_t pos);
00439 size_t my_well_formed_len_8bit(const CHARSET_INFO * const, const char *b, const char *e,
00440                              size_t pos, int *error);
00441 typedef unsigned char *(*cs_alloc_func)(size_t);
00442 bool my_coll_init_simple(CHARSET_INFO *cs, cs_alloc_func alloc);
00443 bool my_cset_init_8bit(CHARSET_INFO *cs, cs_alloc_func alloc);
00444 uint32_t my_mbcharlen_8bit(const CHARSET_INFO * const, uint32_t c);
00445 
00446 
00447 /* Functions for multibyte charsets */
00448 extern size_t my_caseup_str_mb(const CHARSET_INFO * const, char *);
00449 extern size_t my_casedn_str_mb(const CHARSET_INFO * const, char *);
00450 extern size_t my_caseup_mb(const CHARSET_INFO * const, char *src, size_t srclen,
00451                                          char *dst, size_t dstlen);
00452 extern size_t my_casedn_mb(const CHARSET_INFO * const, char *src, size_t srclen,
00453                                          char *dst, size_t dstlen);
00454 extern int my_strcasecmp_mb(const CHARSET_INFO * const  cs, const char *s, const char *t);
00455 
00456 int my_wildcmp_mb(const CHARSET_INFO * const,
00457       const char *str,const char *str_end,
00458       const char *wildstr,const char *wildend,
00459       int escape, int w_one, int w_many);
00460 size_t my_numchars_mb(const CHARSET_INFO * const, const char *b, const char *e);
00461 size_t my_numcells_mb(const CHARSET_INFO * const, const char *b, const char *e);
00462 size_t my_charpos_mb(const CHARSET_INFO * const, const char *b, const char *e, size_t pos);
00463 size_t my_well_formed_len_mb(const CHARSET_INFO * const, const char *b, const char *e,
00464                              size_t pos, int *error);
00465 uint32_t my_instr_mb(const CHARSET_INFO * const,
00466                  const char *b, size_t b_length,
00467                  const char *s, size_t s_length,
00468                  my_match_t *match, uint32_t nmatch);
00469 
00470 int my_strnncoll_mb_bin(const CHARSET_INFO * const  cs,
00471                         const unsigned char *s, size_t slen,
00472                         const unsigned char *t, size_t tlen,
00473                         bool t_is_prefix);
00474 
00475 int my_strnncollsp_mb_bin(const CHARSET_INFO * const cs,
00476                           const unsigned char *a, size_t a_length,
00477                           const unsigned char *b, size_t b_length,
00478                           bool diff_if_only_endspace_difference);
00479 
00480 int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
00481                       const char *str,const char *str_end,
00482                       const char *wildstr,const char *wildend,
00483                       int escape, int w_one, int w_many);
00484 
00485 int my_strcasecmp_mb_bin(const CHARSET_INFO * const, const char *s, const char *t);
00486 
00487 void my_hash_sort_mb_bin(const CHARSET_INFO * const,
00488                          const unsigned char *key, size_t len, uint32_t *nr1, uint32_t *nr2);
00489 
00490 size_t my_strnxfrm_mb(const CHARSET_INFO * const,
00491                       unsigned char *dst, size_t dstlen, uint32_t nweights,
00492                       const unsigned char *src, size_t srclen, uint32_t flags);
00493 
00494 int my_wildcmp_unicode(const CHARSET_INFO * const cs,
00495                        const char *str, const char *str_end,
00496                        const char *wildstr, const char *wildend,
00497                        int escape, int w_one, int w_many,
00498                        MY_UNICASE_INFO **weights);
00499 
00500 extern bool my_parse_charset_xml(const char *bug, size_t len,
00501             int (*add)(CHARSET_INFO *cs));
00502 
00503 bool my_propagate_simple(const CHARSET_INFO * const cs, const unsigned char *str, size_t len);
00504 bool my_propagate_complex(const CHARSET_INFO * const cs, const unsigned char *str, size_t len);
00505 
00506 
00507 uint32_t my_strxfrm_flag_normalize(uint32_t flags, uint32_t nlevels);
00508 void my_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
00509                                  uint32_t flags, uint32_t level);
00510 size_t my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO * const cs,
00511                                        unsigned char *str, unsigned char *frmend, unsigned char *strend,
00512                                        uint32_t nweights, uint32_t flags, uint32_t level);
00513 
00514 bool my_charset_is_ascii_compatible(const CHARSET_INFO * const cs);
00515 
00516 /*
00517   Compare 0-terminated UTF8 strings.
00518 
00519   SYNOPSIS
00520     my_strcasecmp_utf8mb3()
00521     cs                  character set handler
00522     s                   First 0-terminated string to compare
00523     t                   Second 0-terminated string to compare
00524 
00525   IMPLEMENTATION
00526 
00527   RETURN
00528     - negative number if s < t
00529     - positive number if s > t
00530     - 0 is the strings are equal
00531 */
00532 int
00533 my_wc_mb_filename(const CHARSET_INFO * const,
00534                   my_wc_t wc, unsigned char *s, unsigned char *e);
00535 
00536 int
00537 my_mb_wc_filename(const CHARSET_INFO * const,
00538                   my_wc_t *pwc, const unsigned char *s, const unsigned char *e);
00539 
00540 
00541 unsigned int my_ismbchar_utf8mb4(const CHARSET_INFO * const cs,                                  const char *b, const char *e);
00542 unsigned int my_mbcharlen_utf8mb4(const CHARSET_INFO * const, uint32_t c);
00543 
00544 size_t my_strnxfrmlen_utf8mb4(const CHARSET_INFO * const, size_t len);
00545 size_t
00546 my_strnxfrm_utf8mb4(const CHARSET_INFO * const cs,
00547                     unsigned char *dst, size_t dstlen, uint32_t nweights,
00548                     const unsigned char *src, size_t srclen, uint32_t flags);
00549 
00550 int
00551 my_wildcmp_utf8mb4(const CHARSET_INFO * const cs,
00552                    const char *str, const char *strend,
00553                    const char *wildstr, const char *wildend,
00554                    int escape, int w_one, int w_many);
00555 int
00556 my_strnncollsp_utf8mb4(const CHARSET_INFO * const cs,
00557                        const unsigned char *s, size_t slen,
00558                        const unsigned char *t, size_t tlen,
00559                        bool diff_if_only_endspace_difference);
00560 int my_strcasecmp_utf8mb4(const CHARSET_INFO * const cs,
00561                           const char *s, const char *t);
00562 
00563 int
00564 my_strnncoll_utf8mb4(const CHARSET_INFO * const cs,
00565                      const unsigned char *s, size_t slen,
00566                      const unsigned char *t, size_t tlen,
00567                      bool t_is_prefix);
00568 
00569 int
00570 my_mb_wc_utf8mb4(const CHARSET_INFO * const cs,
00571                  my_wc_t * pwc, const unsigned char *s, const unsigned char *e);
00572 
00573 int
00574 my_wc_mb_utf8mb4(const CHARSET_INFO * const cs,
00575                  my_wc_t wc, unsigned char *r, unsigned char *e);
00576 
00577 size_t my_caseup_str_utf8mb4(const CHARSET_INFO * const cs, char *src);
00578 size_t my_casedn_str_utf8mb4(const CHARSET_INFO * const cs, char *src);
00579 
00580 size_t
00581 my_caseup_utf8mb4(const CHARSET_INFO * const cs, char *src, size_t srclen,
00582                   char *dst, size_t dstlen);
00583 size_t
00584 my_casedn_utf8mb4(const CHARSET_INFO * const cs,
00585                   char *src, size_t srclen,
00586                   char *dst, size_t dstlen);
00587 
00588 
00589 bool my_coll_init_uca(CHARSET_INFO *cs, cs_alloc_func alloc);
00590 
00591 int my_strnncoll_any_uca(const CHARSET_INFO * const cs,
00592                          const unsigned char *s, size_t slen,
00593                          const unsigned char *t, size_t tlen,
00594                          bool t_is_prefix);
00595 
00596 int my_strnncollsp_any_uca(const CHARSET_INFO * const cs,
00597                            const unsigned char *s, size_t slen,
00598                            const unsigned char *t, size_t tlen,
00599                            bool diff_if_only_endspace_difference);
00600 
00601 void my_hash_sort_any_uca(const CHARSET_INFO * const cs,
00602                           const unsigned char *s, size_t slen,
00603                           uint32_t *n1, uint32_t *n2);
00604 
00605 size_t my_strnxfrm_any_uca(const CHARSET_INFO * const cs,
00606                            unsigned char *dst, size_t dstlen, uint32_t nweights,
00607                            const unsigned char *src, size_t srclen,
00608                            uint32_t flags);
00609 
00610 int my_wildcmp_uca(const CHARSET_INFO * const cs,
00611                    const char *str,const char *str_end,
00612                    const char *wildstr,const char *wildend,
00613                    int escape, int w_one, int w_many);
00614 
00615 int my_strnncoll_8bit_bin(const CHARSET_INFO * const,
00616                           const unsigned char *s, size_t slen,
00617                           const unsigned char *t, size_t tlen,
00618                           bool t_is_prefix);
00619 int my_strnncollsp_8bit_bin(const CHARSET_INFO * const,
00620                             const unsigned char *a, size_t a_length,
00621                             const unsigned char *b, size_t b_length,
00622                             bool diff_if_only_endspace_difference);
00623 size_t my_case_str_bin(const CHARSET_INFO * const, char *);
00624 size_t my_case_bin(const CHARSET_INFO * const, char *,
00625                    size_t srclen, char *, size_t);
00626 int my_strcasecmp_bin(const CHARSET_INFO * const,
00627                       const char *s, const char *t);
00628 size_t
00629 my_strnxfrm_8bit_bin(const CHARSET_INFO * const cs,
00630                      unsigned char * dst, size_t dstlen, uint32_t nweights,
00631                      const unsigned char *src, size_t srclen, uint32_t flags);
00632 uint32_t my_instr_bin(const CHARSET_INFO * const,
00633                       const char *b, size_t b_length,
00634                       const char *s, size_t s_length,
00635                       my_match_t *match, uint32_t nmatch);
00636 size_t my_lengthsp_binary(const CHARSET_INFO * const,
00637                           const char *, size_t length);
00638 int my_mb_wc_bin(const CHARSET_INFO * const,
00639                  my_wc_t *wc, const unsigned char *str,
00640                  const unsigned char *end);
00641 int my_wc_mb_bin(const CHARSET_INFO * const, my_wc_t wc,
00642                  unsigned char *str, unsigned char *end);
00643 void my_hash_sort_8bit_bin(const CHARSET_INFO * const,
00644                            const unsigned char *key, size_t len,
00645                            uint32_t *nr1, uint32_t *nr2);
00646 bool my_coll_init_8bit_bin(CHARSET_INFO *cs,
00647                            cs_alloc_func);
00648 int my_strnncoll_binary(const CHARSET_INFO * const,
00649                         const unsigned char *s, size_t slen,
00650                         const unsigned char *t, size_t tlen,
00651                         bool t_is_prefix);
00652 int my_strnncollsp_binary(const CHARSET_INFO * const cs,
00653                           const unsigned char *s, size_t slen,
00654                           const unsigned char *t, size_t tlen,
00655                           bool);
00656 
00657 
00658 #define _MY_U 01  /* Upper case */
00659 #define _MY_L 02  /* Lower case */
00660 #define _MY_NMR 04  /* Numeral (digit) */
00661 #define _MY_SPC 010 /* Spacing character */
00662 #define _MY_PNT 020 /* Punctuation */
00663 #define _MY_CTR 040 /* Control character */
00664 #define _MY_B 0100  /* Blank */
00665 #define _MY_X 0200  /* heXadecimal digit */
00666 
00667 
00668 inline static bool my_isascii(char c)      
00669 {
00670   return (!(c & ~0177));
00671 }
00672 
00673 inline static char my_toascii(char c)
00674 {
00675   return (c & 0177);
00676 }
00677 
00678 inline static char my_tocntrl(char c) 
00679 {
00680   return (c & 31);
00681 }
00682 
00683 inline static char my_toprint(char c)
00684 {
00685   return (c | 64);
00686 }
00687 
00688 inline static char my_toupper(const charset_info_st *s, unsigned char c)
00689 {
00690   return s->to_upper[c];
00691 }
00692 
00693 inline static char my_tolower(const charset_info_st *s, unsigned char c)
00694 {
00695   return s->to_lower[c];
00696 }
00697 
00698 inline static bool my_isalpha(const charset_info_st *s, unsigned char c)
00699 {
00700   return (s->ctype+1)[c] & (_MY_U | _MY_L);
00701 }
00702 
00703 inline static bool my_isupper(const charset_info_st *s, unsigned char c)
00704 {
00705   return (s->ctype+1)[c] & _MY_U;
00706 }
00707 
00708 inline static bool my_islower(const charset_info_st *s, unsigned char c)
00709 {
00710   return (s->ctype+1)[c] & _MY_L;
00711 }
00712 
00713 inline static bool my_isdigit(const charset_info_st *s, unsigned char c)
00714 {
00715   return (s->ctype+1)[c] & _MY_NMR;
00716 }
00717 
00718 inline static bool my_isxdigit(const charset_info_st *s, unsigned char c)
00719 {
00720   return (s->ctype+1)[c] & _MY_X;
00721 }
00722 
00723 inline static bool my_isalnum(const charset_info_st *s, unsigned char c) 
00724 {
00725   return (s->ctype+1)[c] & (_MY_U | _MY_L | _MY_NMR);
00726 }
00727 
00728 inline static bool my_isspace(const charset_info_st *s, unsigned char c)
00729 {
00730   return (s->ctype+1)[c] & _MY_SPC;
00731 }
00732 
00733 inline static bool my_ispunct(const charset_info_st *s, unsigned char c)  
00734 {
00735   return (s->ctype+1)[c] & _MY_PNT;
00736 }
00737 
00738 inline static bool my_isprint(const charset_info_st *s, unsigned char c)  
00739 {
00740   return (s->ctype+1)[c] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B);
00741 }
00742 
00743 inline static bool my_isgraph(const charset_info_st *s, unsigned char c)
00744 {
00745   return (s->ctype+1)[c] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR);
00746 }
00747 
00748 inline static bool my_iscntrl(const charset_info_st *s, unsigned char c)  
00749 {
00750   return (s->ctype+1)[c] & _MY_CTR;
00751 }
00752 
00753 /* Some macros that should be cleaned up a little */
00754 inline static bool my_isvar(const charset_info_st *s, char c)
00755 {
00756   return my_isalnum(s,c) || (c) == '_';
00757 }
00758 
00759 inline static bool my_isvar_start(const charset_info_st *s, char c)
00760 {
00761   return my_isalpha(s,c) || (c) == '_';
00762 }
00763 
00764 inline static bool my_binary_compare(const charset_info_st *s)
00765 {
00766   return s->state  & MY_CS_BINSORT;
00767 }
00768 
00769 inline static bool use_strnxfrm(const charset_info_st *s)
00770 {
00771   return s->state & MY_CS_STRNXFRM;
00772 }
00773 
00774 inline static size_t my_strnxfrm(const charset_info_st *cs, 
00775                                  unsigned char *dst, 
00776                                  const size_t dstlen, 
00777                                  const unsigned char *src, 
00778                                  const uint32_t srclen)
00779 {
00780   return (cs->coll->strnxfrm(cs, dst, dstlen, dstlen, src, srclen, MY_STRXFRM_PAD_WITH_SPACE));
00781 }
00782 
00783 inline static int my_strnncoll(const charset_info_st *cs, 
00784                                const unsigned char *s, 
00785                                const size_t slen, 
00786                                const unsigned char *t,
00787                                const size_t tlen) 
00788 {
00789   return (cs->coll->strnncoll(cs, s, slen, t, tlen, 0));
00790 }
00791 
00792 inline static bool my_like_range(const charset_info_st *cs,
00793                                  const char *ptr, const size_t ptrlen,
00794                                  const char escape, 
00795                                  const char w_one,
00796                                  const char w_many, 
00797                                  const size_t reslen, 
00798                                  char *minstr, char *maxstr, 
00799                                  size_t *minlen, size_t *maxlen)
00800 {
00801   return (cs->coll->like_range(cs, ptr, ptrlen, escape, w_one, w_many, reslen, 
00802                                minstr, maxstr, minlen, maxlen));
00803 }
00804 
00805 inline static int my_wildcmp(const charset_info_st *cs,
00806                              const char *str, const char *strend,
00807                              const char *w_str, const char *w_strend,
00808                              const int escape,
00809                              const int w_one, const int w_many) 
00810 {
00811   return (cs->coll->wildcmp(cs, str, strend, w_str, w_strend, escape, w_one, w_many));
00812 }
00813 
00814 inline static int my_strcasecmp(const charset_info_st *cs, const char *s, const char *t)
00815 {
00816   return (cs->coll->strcasecmp(cs, s, t));
00817 }
00818 
00819 template <typename CHAR_T>
00820 inline static size_t my_charpos(const charset_info_st *cs, 
00821                                 const CHAR_T *b, const CHAR_T* e, size_t num)
00822 {
00823   return cs->cset->charpos(cs, reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(e), num);
00824 }
00825 
00826 inline static bool use_mb(const charset_info_st *cs)
00827 {
00828   return cs->cset->ismbchar != NULL;
00829 }
00830 
00831 inline static unsigned int  my_ismbchar(const charset_info_st *cs, const char *a, const char *b)
00832 {
00833   return cs->cset->ismbchar(cs, a, b);
00834 }
00835 
00836 inline static unsigned int my_mbcharlen(const charset_info_st *cs, uint32_t c)
00837 {
00838   return cs->cset->mbcharlen(cs, c);
00839 }
00840 
00841 
00842 inline static size_t my_caseup_str(const charset_info_st *cs, char *src)
00843 {
00844   return cs->cset->caseup_str(cs, src);
00845 }
00846 
00847 inline static size_t my_casedn_str(const charset_info_st *cs, char *src)
00848 {
00849   return cs->cset->casedn_str(cs, src);
00850 }
00851 
00852 inline static long my_strntol(const charset_info_st *cs, 
00853                               const char* s, const size_t l, const int base, char **e, int *err)
00854 {
00855   return (cs->cset->strntol(cs, s, l, base, e, err));
00856 }
00857 
00858 inline static unsigned long my_strntoul(const charset_info_st *cs, 
00859                                         const char* s, const size_t l, const int base, 
00860                                         char **e, int *err)
00861 {
00862   return (cs->cset->strntoul(cs, s, l, base, e, err));
00863 }
00864 
00865 inline static int64_t my_strntoll(const charset_info_st *cs, 
00866                                  const char* s, const size_t l, const int base, char **e, int *err)
00867 {
00868   return (cs->cset->strntoll(cs, s, l, base, e, err));
00869 }
00870 
00871 inline static int64_t my_strntoull(const charset_info_st *cs, 
00872                                    const char* s, const size_t l, const int base, 
00873                                    char **e, int *err)
00874 {
00875   return (cs->cset->strntoull(cs, s, l, base, e, err));
00876 }
00877 
00878 
00879 inline static double my_strntod(const charset_info_st *cs, 
00880                                 char* s, const size_t l, char **e, int *err)
00881 {
00882   return (cs->cset->strntod(cs, s, l, e, err));
00883 }
00884 
00885 int make_escape_code(const CHARSET_INFO * const cs, const char *escape);
00886 
00887 } /* namespace drizzled */
00888