Drizzled Public API Documentation

ctype-mb.cc

00001 /* Copyright (C) 2000 MySQL AB
00002 
00003    This program is free software; you can redistribute it and/or modify
00004    it under the terms of the GNU General Public License as published by
00005    the Free Software Foundation; version 2 of the License.
00006 
00007    This program is distributed in the hope that it will be useful,
00008    but WITHOUT ANY WARRANTY; without even the implied warranty of
00009    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00010    GNU General Public License for more details.
00011 
00012    You should have received a copy of the GNU General Public License
00013    along with this program; if not, write to the Free Software
00014    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
00015 
00016 #include <config.h>
00017 
00018 #include <drizzled/internal/m_string.h>
00019 #include <drizzled/charset_info.h>
00020 
00021 #include <algorithm>
00022 
00023 using namespace std;
00024 
00025 namespace drizzled
00026 {
00027 
00028 
00029 size_t my_caseup_str_mb(const CHARSET_INFO * const  cs, char *str)
00030 {
00031   uint32_t l;
00032   unsigned char *map= cs->to_upper;
00033   char *str_orig= str;
00034 
00035   while (*str)
00036   {
00037     /* Pointing after the '\0' is safe here. */
00038     if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
00039       str+= l;
00040     else
00041     {
00042       *str= (char) map[(unsigned char)*str];
00043       str++;
00044     }
00045   }
00046   return (size_t) (str - str_orig);
00047 }
00048 
00049 
00050 size_t my_casedn_str_mb(const CHARSET_INFO * const  cs, char *str)
00051 {
00052   uint32_t l;
00053   unsigned char *map= cs->to_lower;
00054   char *str_orig= str;
00055 
00056   while (*str)
00057   {
00058     /* Pointing after the '\0' is safe here. */
00059     if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
00060       str+= l;
00061     else
00062     {
00063       *str= (char) map[(unsigned char)*str];
00064       str++;
00065     }
00066   }
00067   return (size_t) (str - str_orig);
00068 }
00069 
00070 
00071 size_t my_caseup_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
00072                     char *dst, size_t dstlen)
00073 {
00074 #ifdef NDEBUG
00075   (void)dst;
00076   (void)dstlen;
00077 #endif
00078   uint32_t l;
00079   char *srcend= src + srclen;
00080   unsigned char *map= cs->to_upper;
00081 
00082   assert(src == dst && srclen == dstlen);
00083   while (src < srcend)
00084   {
00085     if ((l=my_ismbchar(cs, src, srcend)))
00086       src+= l;
00087     else
00088     {
00089       *src=(char) map[(unsigned char) *src];
00090       src++;
00091     }
00092   }
00093   return srclen;
00094 }
00095 
00096 
00097 size_t my_casedn_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
00098                     char *dst, size_t dstlen)
00099 {
00100 #ifdef NDEBUG
00101   (void)dst;
00102   (void)dstlen;
00103 #endif
00104   uint32_t l;
00105   char *srcend= src + srclen;
00106   unsigned char *map=cs->to_lower;
00107 
00108   assert(src == dst && srclen == dstlen);
00109   while (src < srcend)
00110   {
00111     if ((l= my_ismbchar(cs, src, srcend)))
00112       src+= l;
00113     else
00114     {
00115       *src= (char) map[(unsigned char)*src];
00116       src++;
00117     }
00118   }
00119   return srclen;
00120 }
00121 
00122 
00123 /*
00124   my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
00125  */
00126 
00127 int my_strcasecmp_mb(const CHARSET_INFO * const  cs,const char *s, const char *t)
00128 {
00129   uint32_t l;
00130   unsigned char *map=cs->to_upper;
00131 
00132   while (*s && *t)
00133   {
00134     /* Pointing after the '\0' is safe here. */
00135     if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
00136     {
00137       while (l--)
00138         if (*s++ != *t++)
00139           return 1;
00140     }
00141     else if (my_mbcharlen(cs, *t) > 1)
00142       return 1;
00143     else if (map[(unsigned char) *s++] != map[(unsigned char) *t++])
00144       return 1;
00145   }
00146   /* At least one of '*s' and '*t' is zero here. */
00147   return (*t != *s);
00148 }
00149 
00150 /*
00151 ** Compare string against string with wildcard
00152 **  0 if matched
00153 **  -1 if not matched with wildcard
00154 **   1 if matched with wildcard
00155 */
00156 
00157 inline static const char* inc_ptr(const charset_info_st *cs, const char *str, const char* str_end)
00158 {
00159   return str + (my_ismbchar(cs, str, str_end) ? my_ismbchar(cs, str, str_end) : 1);
00160 }
00161 
00162 inline static int likeconv(const charset_info_st *cs, const char c) 
00163 {
00164   return (unsigned char) cs->sort_order[(unsigned char) c];
00165 }
00166     
00167 int my_wildcmp_mb(const CHARSET_INFO * const cs,
00168       const char *str,const char *str_end,
00169       const char *wildstr,const char *wildend,
00170       int escape, int w_one, int w_many)
00171 {
00172   int result= -1;       /* Not found, using wildcards */
00173 
00174   while (wildstr != wildend)
00175   {
00176     while (*wildstr != w_many && *wildstr != w_one)
00177     {
00178       int l;
00179       if (*wildstr == escape && wildstr+1 != wildend)
00180   wildstr++;
00181       if ((l = my_ismbchar(cs, wildstr, wildend)))
00182       {
00183     if (str+l > str_end || memcmp(str, wildstr, l) != 0)
00184         return 1;
00185     str += l;
00186     wildstr += l;
00187       }
00188       else
00189       if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
00190   return(1);        /* No match */
00191       if (wildstr == wildend)
00192   return (str != str_end);    /* Match if both are at end */
00193       result=1;         /* Found an anchor char */
00194     }
00195     if (*wildstr == w_one)
00196     {
00197       do
00198       {
00199   if (str == str_end)     /* Skip one char if possible */
00200     return (result);
00201   inc_ptr(cs,str,str_end);
00202       } while (++wildstr < wildend && *wildstr == w_one);
00203       if (wildstr == wildend)
00204   break;
00205     }
00206     if (*wildstr == w_many)
00207     {           /* Found w_many */
00208       unsigned char cmp;
00209       const char* mb = wildstr;
00210       int mb_len=0;
00211 
00212       wildstr++;
00213       /* Remove any '%' and '_' from the wild search string */
00214       for (; wildstr != wildend ; wildstr++)
00215       {
00216   if (*wildstr == w_many)
00217     continue;
00218   if (*wildstr == w_one)
00219   {
00220     if (str == str_end)
00221       return (-1);
00222     inc_ptr(cs,str,str_end);
00223     continue;
00224   }
00225   break;          /* Not a wild character */
00226       }
00227       if (wildstr == wildend)
00228   return(0);        /* Ok if w_many is last */
00229       if (str == str_end)
00230   return -1;
00231 
00232       if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
00233   cmp= *++wildstr;
00234 
00235       mb=wildstr;
00236       mb_len= my_ismbchar(cs, wildstr, wildend);
00237       inc_ptr(cs,wildstr,wildend);    /* This is compared trough cmp */
00238       cmp=likeconv(cs,cmp);
00239       do
00240       {
00241         for (;;)
00242         {
00243           if (str >= str_end)
00244             return -1;
00245           if (mb_len)
00246           {
00247             if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
00248             {
00249               str += mb_len;
00250               break;
00251             }
00252           }
00253           else if (!my_ismbchar(cs, str, str_end) &&
00254                    likeconv(cs,*str) == cmp)
00255           {
00256             str++;
00257             break;
00258           }
00259           inc_ptr(cs,str, str_end);
00260         }
00261   {
00262     int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
00263                                 w_many);
00264     if (tmp <= 0)
00265       return (tmp);
00266   }
00267       } while (str != str_end && wildstr[0] != w_many);
00268       return(-1);
00269     }
00270   }
00271   return (str != str_end ? 1 : 0);
00272 }
00273 
00274 
00275 size_t my_numchars_mb(const CHARSET_INFO * const cs,
00276           const char *pos, const char *end)
00277 {
00278   size_t count= 0;
00279   while (pos < end)
00280   {
00281     uint32_t mb_len;
00282     pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
00283     count++;
00284   }
00285   return count;
00286 }
00287 
00288 
00289 size_t my_charpos_mb(const CHARSET_INFO * const cs,
00290          const char *pos, const char *end, size_t length)
00291 {
00292   const char *start= pos;
00293 
00294   while (length && pos < end)
00295   {
00296     uint32_t mb_len;
00297     pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
00298     length--;
00299   }
00300   return (size_t) (length ? end+2-start : pos-start);
00301 }
00302 
00303 
00304 size_t my_well_formed_len_mb(const CHARSET_INFO * const cs, const char *b, const char *e,
00305                              size_t pos, int *error)
00306 {
00307   const char *b_start= b;
00308   *error= 0;
00309   while (pos)
00310   {
00311     my_wc_t wc;
00312     int mb_len;
00313 
00314     if ((mb_len= cs->cset->mb_wc(cs, &wc, (const unsigned char*) b, (const unsigned char*) e)) <= 0)
00315     {
00316       *error= b < e ? 1 : 0;
00317       break;
00318     }
00319     b+= mb_len;
00320     pos--;
00321   }
00322   return (size_t) (b - b_start);
00323 }
00324 
00325 
00326 uint32_t my_instr_mb(const CHARSET_INFO * const cs,
00327                  const char *b, size_t b_length,
00328                  const char *s, size_t s_length,
00329                  my_match_t *match, uint32_t nmatch)
00330 {
00331   const char *end, *b0;
00332   int res= 0;
00333 
00334   if (s_length <= b_length)
00335   {
00336     if (!s_length)
00337     {
00338       if (nmatch)
00339       {
00340         match->beg= 0;
00341         match->end= 0;
00342         match->mb_len= 0;
00343       }
00344       return 1;   /* Empty string is always found */
00345     }
00346 
00347     b0= b;
00348     end= b+b_length-s_length+1;
00349 
00350     while (b < end)
00351     {
00352       int mb_len;
00353 
00354       if (!cs->coll->strnncoll(cs, (const unsigned char*) b, s_length,
00355                                    (const unsigned char*) s, s_length, 0))
00356       {
00357         if (nmatch)
00358         {
00359           match[0].beg= 0;
00360           match[0].end= (size_t) (b-b0);
00361           match[0].mb_len= res;
00362           if (nmatch > 1)
00363           {
00364             match[1].beg= match[0].end;
00365             match[1].end= match[0].end+s_length;
00366             match[1].mb_len= 0; /* Not computed */
00367           }
00368         }
00369         return 2;
00370       }
00371       mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
00372       b+= mb_len;
00373       b_length-= mb_len;
00374       res++;
00375     }
00376   }
00377   return 0;
00378 }
00379 
00380 
00381 /* BINARY collations handlers for MB charsets */
00382 
00383 int my_strnncoll_mb_bin(const CHARSET_INFO * const,
00384                         const unsigned char *s, size_t slen,
00385                         const unsigned char *t, size_t tlen,
00386                         bool t_is_prefix)
00387 {
00388   size_t len= min(slen,tlen);
00389   int cmp= memcmp(s,t,len);
00390   return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
00391 }
00392 
00393 
00394 /*
00395   Compare two strings.
00396 
00397   SYNOPSIS
00398     my_strnncollsp_mb_bin()
00399     cs      Chararacter set
00400     s     String to compare
00401     slen    Length of 's'
00402     t     String to compare
00403     tlen    Length of 't'
00404     diff_if_only_endspace_difference
00405             Set to 1 if the strings should be regarded as different
00406                         if they only difference in end space
00407 
00408   NOTE
00409    This function is used for character strings with binary collations.
00410    The shorter string is extended with end space to be as long as the longer
00411    one.
00412 
00413   RETURN
00414     A negative number if s < t
00415     A positive number if s > t
00416     0 if strings are equal
00417 */
00418 
00419 int my_strnncollsp_mb_bin(const CHARSET_INFO * const,
00420                           const unsigned char *a, size_t a_length,
00421                           const unsigned char *b, size_t b_length,
00422                           bool diff_if_only_endspace_difference)
00423 {
00424   const unsigned char *end;
00425   size_t length;
00426   int res;
00427 
00428 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
00429   diff_if_only_endspace_difference= 0;
00430 #endif
00431 
00432   end= a + (length= min(a_length, b_length));
00433   while (a < end)
00434   {
00435     if (*a++ != *b++)
00436       return ((int) a[-1] - (int) b[-1]);
00437   }
00438   res= 0;
00439   if (a_length != b_length)
00440   {
00441     int swap= 1;
00442     if (diff_if_only_endspace_difference)
00443       res= 1;                                   /* Assume 'a' is bigger */
00444     /*
00445       Check the next not space character of the longer key. If it's < ' ',
00446       then it's smaller than the other key.
00447     */
00448     if (a_length < b_length)
00449     {
00450       /* put shorter key in s */
00451       a_length= b_length;
00452       a= b;
00453       swap= -1;         /* swap sign of result */
00454       res= -res;
00455     }
00456     for (end= a + a_length-length; a < end ; a++)
00457     {
00458       if (*a != ' ')
00459   return (*a < ' ') ? -swap : swap;
00460     }
00461   }
00462   return res;
00463 }
00464 
00465 
00466 /*
00467   Copy one non-ascii character.
00468   "dst" must have enough room for the character.
00469   Note, we don't use sort_order[] in this macros.
00470   This is correct even for case insensitive collations:
00471   - basic Latin letters are processed outside this macros;
00472   - for other characters sort_order[x] is equal to x.
00473 */
00474 #define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
00475 {                                                                        \
00476   switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
00477   case 4:                                                                \
00478     *dst++= *src++;                                                      \
00479     /* fall through */                                                   \
00480   case 3:                                                                \
00481     *dst++= *src++;                                                      \
00482     /* fall through */                                                   \
00483   case 2:                                                                \
00484     *dst++= *src++;                                                      \
00485     /* fall through */                                                   \
00486   case 0:                                                                \
00487     *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
00488   }                                                                      \
00489 }
00490 
00491 
00492 /*
00493   For character sets with two or three byte multi-byte
00494   characters having multibyte weights *equal* to their codes:
00495   cp932, euckr, gb2312, sjis, eucjpms, ujis.
00496 */
00497 size_t
00498 my_strnxfrm_mb(const CHARSET_INFO * const cs,
00499                unsigned char *dst, size_t dstlen, uint32_t nweights,
00500                const unsigned char *src, size_t srclen, uint32_t flags)
00501 {
00502   unsigned char *d0= dst;
00503   unsigned char *de= dst + dstlen;
00504   const unsigned char *se= src + srclen;
00505   const unsigned char *sort_order= cs->sort_order;
00506 
00507   assert(cs->mbmaxlen <= 4);
00508 
00509   /*
00510     If "srclen" is smaller than both "dstlen" and "nweights"
00511     then we can run a simplified loop -
00512     without checking "nweights" and "de".
00513   */
00514   if (dstlen >= srclen && nweights >= srclen)
00515   {
00516     if (sort_order)
00517     {
00518       /* Optimized version for a case insensitive collation */
00519       for (; src < se; nweights--)
00520       {
00521         if (*src < 128) /* quickly catch ASCII characters */
00522           *dst++= sort_order[*src++];
00523         else
00524           my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
00525       }
00526     }
00527     else
00528     {
00529       /* Optimized version for a case sensitive collation (no sort_order) */
00530       for (; src < se; nweights--)
00531       {
00532         if (*src < 128) /* quickly catch ASCII characters */
00533           *dst++= *src++;
00534         else
00535           my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
00536       }
00537     }
00538     goto pad;
00539   }
00540 
00541   /*
00542     A thourough loop, checking all possible limits:
00543     "se", "nweights" and "de".
00544   */
00545   for (; src < se && nweights; nweights--)
00546   {
00547     int chlen;
00548     if (*src < 128 ||
00549         !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
00550     {
00551       /* Single byte character */
00552       if (dst >= de)
00553         break;
00554       *dst++= sort_order ? sort_order[*src++] : *src++;
00555     }
00556     else
00557     {
00558       /* Multi-byte character */
00559       if (dst + chlen > de)
00560         break;
00561       *dst++= *src++;
00562       *dst++= *src++;
00563       if (chlen == 3)
00564         *dst++= *src++;
00565     }
00566   }
00567 
00568 pad:
00569   return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
00570 }
00571 
00572 
00573 int my_strcasecmp_mb_bin(const CHARSET_INFO * const,
00574                          const char *s, const char *t)
00575 {
00576   return strcmp(s,t);
00577 }
00578 
00579 
00580 void my_hash_sort_mb_bin(const CHARSET_INFO * const,
00581                          const unsigned char *key, size_t len,
00582                          uint32_t *nr1, uint32_t *nr2)
00583 {
00584   const unsigned char *pos = key;
00585 
00586   /*
00587      Remove trailing spaces. We have to do this to be able to compare
00588     'A ' and 'A' as identical
00589   */
00590   key= internal::skip_trailing_space(key, len);
00591 
00592   for (; pos < (const unsigned char*) key ; pos++)
00593   {
00594     nr1[0]^=(ulong) ((((uint32_t) nr1[0] & 63)+nr2[0]) *
00595        ((uint32_t)*pos)) + (nr1[0] << 8);
00596     nr2[0]+=3;
00597   }
00598 }
00599 
00600 
00601 /*
00602   Fill the given buffer with 'maximum character' for given charset
00603   SYNOPSIS
00604       pad_max_char()
00605       cs   Character set
00606       str  Start of buffer to fill
00607       end  End of buffer to fill
00608 
00609   DESCRIPTION
00610       Write max key:
00611       - for non-Unicode character sets:
00612         just set to 255.
00613       - for Unicode character set (utf-8):
00614         create a buffer with multibyte representation of the max_sort_char
00615         character, and copy it into max_str in a loop.
00616 */
00617 static void pad_max_char(const CHARSET_INFO * const cs, char *str, char *end)
00618 {
00619   char buf[10];
00620   char buflen;
00621 
00622   if (!(cs->state & MY_CS_UNICODE))
00623   {
00624     memset(str, 255, end - str);
00625     return;
00626   }
00627 
00628   buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (unsigned char*) buf,
00629                           (unsigned char*) buf + sizeof(buf));
00630 
00631   assert(buflen > 0);
00632   do
00633   {
00634     if ((str + buflen) < end)
00635     {
00636       /* Enough space for the characer */
00637       memcpy(str, buf, buflen);
00638       str+= buflen;
00639     }
00640     else
00641     {
00642       /*
00643         There is no space for whole multibyte
00644         character, then add trailing spaces.
00645       */
00646       *str++= ' ';
00647     }
00648   } while (str < end);
00649 }
00650 
00651 /*
00652 ** Calculate min_str and max_str that ranges a LIKE string.
00653 ** Arguments:
00654 ** ptr    Pointer to LIKE string.
00655 ** ptr_length Length of LIKE string.
00656 ** escape Escape character in LIKE.  (Normally '\').
00657 **    All escape characters should be removed from
00658 **              min_str and max_str
00659 ** w_one        Single char matching char in LIKE (Normally '_')
00660 ** w_many       Multiple char matching char in LIKE (Normally '%')
00661 ** res_length Length of min_str and max_str.
00662 ** min_str  Smallest case sensitive string that ranges LIKE.
00663 **    Should be space padded to res_length.
00664 ** max_str  Largest case sensitive string that ranges LIKE.
00665 **    Normally padded with the biggest character sort value.
00666 **
00667 ** The function should return 0 if ok and 1 if the LIKE string can't be
00668 ** optimized !
00669 */
00670 
00671 bool my_like_range_mb(const CHARSET_INFO * const cs,
00672                          const char *ptr,size_t ptr_length,
00673                          char escape, char w_one, char w_many,
00674                          size_t res_length,
00675                          char *min_str,char *max_str,
00676                          size_t *min_length,size_t *max_length)
00677 {
00678   uint32_t mb_len;
00679   const char *end= ptr + ptr_length;
00680   char *min_org= min_str;
00681   char *min_end= min_str + res_length;
00682   char *max_end= max_str + res_length;
00683   size_t maxcharlen= res_length / cs->mbmaxlen;
00684   const char *contraction_flags= cs->contractions ?
00685               ((const char*) cs->contractions) + 0x40*0x40 : NULL;
00686 
00687   for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
00688   {
00689     /* We assume here that escape, w_any, w_namy are one-byte characters */
00690     if (*ptr == escape && ptr+1 != end)
00691       ptr++;                                    /* Skip escape */
00692     else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
00693     {
00694 fill_max_and_min:
00695       /*
00696         Calculate length of keys:
00697         'a\0\0... is the smallest possible string when we have space expand
00698         a\ff\ff... is the biggest possible string
00699       */
00700       *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
00701                     res_length);
00702       *max_length= res_length;
00703       /* Create min key  */
00704       do
00705       {
00706   *min_str++= (char) cs->min_sort_char;
00707       } while (min_str != min_end);
00708 
00709       /*
00710         Write max key: create a buffer with multibyte
00711         representation of the max_sort_char character,
00712         and copy it into max_str in a loop.
00713       */
00714       *max_length= res_length;
00715       pad_max_char(cs, max_str, max_end);
00716       return 0;
00717     }
00718     if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
00719     {
00720       if (ptr+mb_len > end || min_str+mb_len > min_end)
00721         break;
00722       while (mb_len--)
00723        *min_str++= *max_str++= *ptr++;
00724     }
00725     else
00726     {
00727       /*
00728         Special case for collations with contractions.
00729         For example, in Chezh, 'ch' is a separate letter
00730         which is sorted between 'h' and 'i'.
00731         If the pattern 'abc%', 'c' at the end can mean:
00732         - letter 'c' itself,
00733         - beginning of the contraction 'ch'.
00734 
00735         If we simply return this LIKE range:
00736 
00737          'abc\min\min\min' and 'abc\max\max\max'
00738 
00739         then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
00740         will only find values starting from 'abc[^h]',
00741         but won't find values starting from 'abch'.
00742 
00743         We must ignore contraction heads followed by w_one or w_many.
00744         ('Contraction head' means any letter which can be the first
00745         letter in a contraction)
00746 
00747         For example, for Czech 'abc%', we will return LIKE range,
00748         which is equal to LIKE range for 'ab%':
00749 
00750         'ab\min\min\min\min' and 'ab\max\max\max\max'.
00751 
00752       */
00753       if (contraction_flags && ptr + 1 < end &&
00754           contraction_flags[(unsigned char) *ptr])
00755       {
00756         /* Ptr[0] is a contraction head. */
00757 
00758         if (ptr[1] == w_one || ptr[1] == w_many)
00759         {
00760           /* Contraction head followed by a wildcard, quit. */
00761           goto fill_max_and_min;
00762         }
00763 
00764         /*
00765           Some letters can be both contraction heads and contraction tails.
00766           For example, in Danish 'aa' is a separate single letter which
00767           is sorted after 'z'. So 'a' can be both head and tail.
00768 
00769           If ptr[0]+ptr[1] is a contraction,
00770           then put both letters together.
00771 
00772           If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
00773           is not a contraction, then we put only ptr[0],
00774           and continue with ptr[1] on the next loop.
00775         */
00776         if (contraction_flags[(unsigned char) ptr[1]] &&
00777             cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
00778         {
00779           /* Contraction found */
00780           if (maxcharlen == 1 || min_str + 1 >= min_end)
00781           {
00782             /* Both contraction parts don't fit, quit */
00783             goto fill_max_and_min;
00784           }
00785 
00786           /* Put contraction head */
00787           *min_str++= *max_str++= *ptr++;
00788           maxcharlen--;
00789         }
00790       }
00791       /* Put contraction tail, or a single character */
00792       *min_str++= *max_str++= *ptr++;
00793     }
00794   }
00795 
00796   *min_length= *max_length = (size_t) (min_str - min_org);
00797   while (min_str != min_end)
00798     *min_str++= *max_str++= ' ';           /* Because if key compression */
00799   return 0;
00800 }
00801 
00802 
00803 int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
00804                       const char *str,const char *str_end,
00805                       const char *wildstr,const char *wildend,
00806                       int escape, int w_one, int w_many)
00807 {
00808   int result= -1;       /* Not found, using wildcards */
00809 
00810   while (wildstr != wildend)
00811   {
00812     while (*wildstr != w_many && *wildstr != w_one)
00813     {
00814       int l;
00815       if (*wildstr == escape && wildstr+1 != wildend)
00816   wildstr++;
00817       if ((l = my_ismbchar(cs, wildstr, wildend)))
00818       {
00819     if (str+l > str_end || memcmp(str, wildstr, l) != 0)
00820         return 1;
00821     str += l;
00822     wildstr += l;
00823       }
00824       else
00825       if (str == str_end || *wildstr++ != *str++)
00826   return(1);        /* No match */
00827       if (wildstr == wildend)
00828   return (str != str_end);    /* Match if both are at end */
00829       result=1;         /* Found an anchor char */
00830     }
00831     if (*wildstr == w_one)
00832     {
00833       do
00834       {
00835   if (str == str_end)     /* Skip one char if possible */
00836     return (result);
00837   inc_ptr(cs,str,str_end);
00838       } while (++wildstr < wildend && *wildstr == w_one);
00839       if (wildstr == wildend)
00840   break;
00841     }
00842     if (*wildstr == w_many)
00843     {           /* Found w_many */
00844       unsigned char cmp;
00845       const char* mb = wildstr;
00846       int mb_len=0;
00847 
00848       wildstr++;
00849       /* Remove any '%' and '_' from the wild search string */
00850       for (; wildstr != wildend ; wildstr++)
00851       {
00852   if (*wildstr == w_many)
00853     continue;
00854   if (*wildstr == w_one)
00855   {
00856     if (str == str_end)
00857       return (-1);
00858     inc_ptr(cs,str,str_end);
00859     continue;
00860   }
00861   break;          /* Not a wild character */
00862       }
00863       if (wildstr == wildend)
00864   return(0);        /* Ok if w_many is last */
00865       if (str == str_end)
00866   return -1;
00867 
00868       if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
00869   cmp= *++wildstr;
00870 
00871       mb=wildstr;
00872       mb_len= my_ismbchar(cs, wildstr, wildend);
00873       inc_ptr(cs,wildstr,wildend);    /* This is compared trough cmp */
00874       do
00875       {
00876         for (;;)
00877         {
00878           if (str >= str_end)
00879             return -1;
00880           if (mb_len)
00881           {
00882             if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
00883             {
00884               str += mb_len;
00885               break;
00886             }
00887           }
00888           else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
00889           {
00890             str++;
00891             break;
00892           }
00893           inc_ptr(cs,str, str_end);
00894         }
00895   {
00896     int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
00897     if (tmp <= 0)
00898       return (tmp);
00899   }
00900       } while (str != str_end && wildstr[0] != w_many);
00901       return(-1);
00902     }
00903   }
00904   return (str != str_end ? 1 : 0);
00905 }
00906 
00907 
00908 /*
00909   Data was produced from EastAsianWidth.txt
00910   using utt11-dump utility.
00911 */
00912 static char pg11[256]=
00913 {
00914 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00915 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00916 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
00917 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00918 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00919 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00920 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00921 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
00922 };
00923 
00924 static char pg23[256]=
00925 {
00926 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00927 0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00928 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00929 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00930 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00931 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00932 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00933 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
00934 };
00935 
00936 static char pg2E[256]=
00937 {
00938 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00939 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00940 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00941 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00942 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
00943 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00944 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00945 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
00946 };
00947 
00948 static char pg2F[256]=
00949 {
00950 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00951 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00952 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00953 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00954 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00955 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00956 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
00957 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
00958 };
00959 
00960 static char pg30[256]=
00961 {
00962 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00963 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
00964 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00965 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00966 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
00967 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00968 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00969 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
00970 };
00971 
00972 static char pg31[256]=
00973 {
00974 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00975 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00976 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00977 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00978 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00979 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
00980 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00981 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
00982 };
00983 
00984 static char pg32[256]=
00985 {
00986 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
00987 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00988 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00989 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
00990 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00991 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00992 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00993 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
00994 };
00995 
00996 static char pg4D[256]=
00997 {
00998 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00999 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01000 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01001 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01002 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01003 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
01004 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01005 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01006 };
01007 
01008 static char pg9F[256]=
01009 {
01010 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01011 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01012 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01013 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01014 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01015 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01016 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01017 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01018 };
01019 
01020 static char pgA4[256]=
01021 {
01022 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01023 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01024 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01025 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01026 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01027 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01028 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01029 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01030 };
01031 
01032 static char pgD7[256]=
01033 {
01034 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01035 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01036 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01037 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01038 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01039 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01040 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01041 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01042 };
01043 
01044 static char pgFA[256]=
01045 {
01046 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01047 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01048 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01049 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01050 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01051 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01052 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01053 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01054 };
01055 
01056 static char pgFE[256]=
01057 {
01058 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01059 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01060 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
01061 1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01062 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01063 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01064 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01065 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01066 };
01067 
01068 static char pgFF[256]=
01069 {
01070 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01071 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01072 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
01073 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01074 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01075 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01076 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
01077 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01078 };
01079 
01080 static class {
01081 public:
01082   int page; 
01083   char *p;
01084 } 
01085   utr11_data[256]=
01086 {
01087 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01088 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01089 {0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01090 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01091 {0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01092 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
01093 {0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01094 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01095 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01096 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
01097 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01098 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01099 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01100 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01101 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01102 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01103 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01104 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01105 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01106 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
01107 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
01108 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01109 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01110 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01111 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01112 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
01113 {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
01114 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01115 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01116 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01117 {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
01118 {0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
01119 };
01120 
01121 
01122 size_t my_numcells_mb(const CHARSET_INFO * const cs, const char *b, const char *e)
01123 {
01124   my_wc_t wc;
01125   size_t clen= 0;
01126 
01127   while (b < e)
01128   {
01129     int mb_len;
01130     uint32_t pg;
01131     if ((mb_len= cs->cset->mb_wc(cs, &wc, (unsigned char*) b, (unsigned char*) e)) <= 0 ||
01132         wc > 0xFFFF)
01133     {
01134       /*
01135         Let's think a wrong sequence takes 1 dysplay cell.
01136         Also, consider supplementary characters as taking one cell.
01137       */
01138       mb_len= 1;
01139       b++;
01140       continue;
01141     }
01142     b+= mb_len;
01143     pg= (wc >> 8) & 0xFF;
01144     clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
01145     clen++;
01146   }
01147   return clen;
01148 }
01149 
01150 
01151 
01152 int my_mb_ctype_mb(const CHARSET_INFO * const cs, int *ctype,
01153                    const unsigned char *s, const unsigned char *e)
01154 {
01155   my_wc_t wc;
01156   int res= cs->cset->mb_wc(cs, &wc, s, e);
01157   if (res <= 0 || wc > 0xFFFF)
01158     *ctype= 0;
01159   else
01160     *ctype= my_uni_ctype[wc>>8].ctype ?
01161             my_uni_ctype[wc>>8].ctype[wc&0xFF] :
01162             my_uni_ctype[wc>>8].pctype;
01163   return res;
01164 }
01165 
01166 
01167 MY_COLLATION_HANDLER my_collation_mb_bin_handler =
01168 {
01169     NULL,              /* init */
01170     my_strnncoll_mb_bin,
01171     my_strnncollsp_mb_bin,
01172     my_strnxfrm_mb,
01173     my_strnxfrmlen_simple,
01174     my_like_range_mb,
01175     my_wildcmp_mb_bin,
01176     my_strcasecmp_mb_bin,
01177     my_instr_mb,
01178     my_hash_sort_mb_bin,
01179     my_propagate_simple
01180 };
01181 
01182 } /* namespace drizzled */