Drizzled Public API Documentation

utf8.h

00001 /* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002    vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003 
00004   Copyright (C) 2010 Monty Taylor
00005 
00006   This program is free software; you can redistribute it and/or modify
00007   it under the terms of the GNU General Public License as published by
00008   the Free Software Foundation; version 2 of the License.
00009  
00010   This program is distributed in the hope that it will be useful,
00011   but WITHOUT ANY WARRANTY; without even the implied warranty of
00012   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013   GNU General Public License for more details.
00014 
00015   You should have received a copy of the GNU General Public License
00016   along with this program; if not, write to the Free Software
00017   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018 
00019   Copyright (C) 1995-2006 International Business Machines Corporation and others
00020 
00021   All rights reserved.
00022 
00023   Permission is hereby granted, free of charge, to any person obtaining a copy
00024   of this software and associated documentation files (the "Software"),
00025   to deal in the Software without restriction, including without limitation
00026   the rights to use, copy, modify, merge, publish, distribute, and/or sell
00027   copies of the Software, and to permit persons
00028   to whom the Software is furnished to do so, provided that the above
00029   copyright notice(s) and this permission notice appear in all copies
00030   of the Software and that both the above copyright notice(s) and this
00031   permission notice appear in supporting documentation.
00032 
00033   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00034   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00035   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
00036   IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
00037   LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
00038   ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
00039   IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
00040   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00041 
00042   Except as contained in this notice, the name of a copyright holder shall not
00043   be used in advertising or otherwise to promote the sale, use or other dealings
00044   in this Software without prior written authorization of the copyright holder.
00045 */
00046 
00047 #pragma once
00048 
00049 #include <string>
00050 
00051 #include <drizzled/utf8/checked.h>
00052 #include <drizzled/utf8/unchecked.h>
00053 
00054 namespace drizzled
00055 {
00056 namespace utf8
00057 {
00058 
00062 static const int MAX_LENGTH= 4;
00063 
00069 template <class T>
00070 bool is_single(T c)
00071 {
00072   return (static_cast<uint8_t>(c) & 0x80) == 0;
00073 }
00074 
00081 static inline int codepoint_length(uint32_t c)
00082 {
00083   return (c <= 0x7f ? 1 :
00084           (c <= 0x7ff ? 2 :
00085            (c <= 0xd7ff ? 3 :
00086             (c <= 0xdfff || c>0x10ffff ? 0 :
00087              (c <= 0xffff ? 3 : 4)))));
00088 }
00089 
00096 template <class T>
00097 int sequence_length(T c)
00098 {
00099   return (static_cast<uint8_t>(c) < 0x80 ? 1 :
00100           ((static_cast<uint8_t>(c) >> 5) == 0x6 ? 2 :
00101            ((static_cast<uint8_t>(c) >> 4) == 0xe ? 3 :
00102             ((static_cast<uint8_t>(c) >> 3) == 0x1e ? 4 : 0))));
00103 }
00104 
00105 
00112 static inline uint32_t char_length(const std::string &in_string)
00113 {
00114   uint32_t length= 0;
00115   int seq_length= 0;
00116   std::string::const_iterator iter= in_string.begin();
00117   while (iter < in_string.end())
00118   {
00119     length++;
00120     seq_length= sequence_length(*iter);
00121     iter += (seq_length > 0) ? seq_length : 1;
00122   }
00123   return length;
00124 }
00125 
00132 static inline uint32_t char_length(const char *in_string)
00133 {
00134   const std::string process_string(in_string);
00135   return char_length(process_string);
00136 }
00137 
00138 
00139 } /* namespace utf8 */
00140 } /* namespace drizzled */
00141