sofia-sip/utf8.h

Go to the documentation of this file.
00001 /*
00002  * This file is part of the Sofia-SIP package
00003  *
00004  * Copyright (C) 2005 Nokia Corporation.
00005  *
00006  * Contact: Pekka Pessi <pekka.pessi@nokia-email.address.hidden>
00007  *
00008  * This library is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public License
00010  * as published by the Free Software Foundation; either version 2.1 of
00011  * the License, or (at your option) any later version.
00012  *
00013  * This library is distributed in the hope that it will be useful, but
00014  * WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with this library; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
00021  * 02110-1301 USA
00022  *
00023  */
00024 
00041 #ifndef UTF8_H
00042 
00043 #define UTF8_H
00044 
00045 #ifndef SU_TYPES_H
00046 #include <sofia-sip/su_types.h>
00047 #endif
00048 
00049 SOFIA_BEGIN_DECLS
00050 
00051 typedef unsigned char  utf8;
00052 typedef unsigned short utf16;
00053 typedef unsigned char  ucs1;
00054 typedef unsigned short ucs2;
00055 typedef unsigned int   ucs4;
00056 
00057 SOFIAPUBFUN size_t utf8_width(const utf8 *);
00058 
00059 /* Latin-1 encoding/decoding */
00060 SOFIAPUBFUN size_t ucs18decode(char *dst, size_t dst_size, const utf8 *s);
00061 SOFIAPUBFUN size_t ucs1encode(utf8 *dst, const ucs1 *s, size_t n,
00062                               const char quote[128]);
00063 SOFIAPUBFUN size_t ucs1declen(const utf8 *s);
00064 SOFIAPUBFUN size_t ucs1enclen(const ucs1 *s, size_t n, const char quote[128]);
00065 
00066 /* UCS2 (BMP) encoding/decoding */
00067 size_t ucs2decode(ucs2 *dst, size_t dst_size, const utf8 *s);
00068 size_t ucs2encode(utf8 *dst, const ucs2 *s, size_t n, const char quote[128]);
00069 size_t ucs2declen(const utf8 *s);
00070 size_t ucs2enclen(const ucs2 *s, size_t n, const char quote[128]);
00071 
00072 size_t ucs4decode(ucs4 *dst, size_t dst_size, const utf8 *s);
00073 size_t ucs4encode(utf8 *dst, const ucs4 *s, size_t n, const char quote[128]);
00074 size_t ucs4declen(const utf8 *s);
00075 size_t ucs4enclen(const ucs4 *s, size_t n, const char quote[128]);
00076 
00077 size_t ucs2len(ucs2 const *s);
00078 int ucs2cmp(ucs2 const *s1, ucs2 const *s2);
00079 int ucs2ncmp(ucs2 const *s1, ucs2 const *s2, size_t n);
00080 
00081 size_t ucs4len(ucs4 const *s);
00082 int ucs4cmp(ucs4 const *s1, ucs4 const *s2);
00083 int ucs4ncmp(ucs4 const *s1, ucs4 const *s2, size_t n);
00084 
00085 /*
00086  * IS_UCS4_n tests whether UCS4 character should be represented 
00087  * with 'n' byte utf8 string
00088  */
00089 #define IS_UCS4_1(x) ((ucs4)(x) <= 0x7fu)
00090 #define IS_UCS4_2(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0x7ffu)
00091 #define IS_UCS4_3(x) (0x800u <= (ucs4)(x) && (ucs4)(x) <= 0xffffu)
00092 #define IS_UCS4_4(x) (0x10000u <= (ucs4)(x) && (ucs4)(x) <= 0x1fFFFFu)
00093 #define IS_UCS4_5(x) (0x200000u <= (ucs4)(x) && (ucs4)(x) <= 0x3ffFFFFu)
00094 #define IS_UCS4_6(x) (0x4000000u <= (ucs4)(x) && (ucs4)(x) <= 0x7fffFFFFu)
00095 
00096 /* Special test for ISO-8859-1 characters */
00097 #define IS_UCS4_I(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0xffu)
00098 
00099 /* Length of an UCS4 character in UTF8 encoding */
00100 #define UTF8_LEN4(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || \
00101                       IS_UCS4_3(x) && 3 || IS_UCS4_4(x) && 4 || \
00102                       IS_UCS4_5(x) && 5 || IS_UCS4_6(x) && 6)
00103 
00104 /* Length of an UCS2 character in UTF8 encoding */
00105 #define UTF8_LEN2(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || IS_UCS4_3(x) && 3)
00106 
00107 /*
00108  * IS_UTF8_n tests the length of the next wide character
00109  */
00110 #define IS_UTF8_1(c) (0x00 == ((c) & 0x80))
00111 #define IS_UTF8_2(c) (0xc0 == ((c) & 0xe0))
00112 #define IS_UTF8_3(c) (0xe0 == ((c) & 0xf0))
00113 #define IS_UTF8_4(c) (0xf0 == ((c) & 0xf8))
00114 #define IS_UTF8_5(c) (0xf8 == ((c) & 0xfc))
00115 #define IS_UTF8_6(c) (0xfc == ((c) & 0xfe))
00116 
00117 /* Extension byte? */
00118 #define IS_UTF8_X(c) (0x80 == ((c) & 0xc0))
00119 /* ISO-8859-1 character? */
00120 #define IS_UTF8_I(c) (0xc0 == ((c) & 0xfc))
00121 
00122 #define IS_UTF8_S1(s) \
00123 (IS_UTF8_1(s[0]))
00124 #define IS_UTF8_S2(s) \
00125 (IS_UTF8_2(s[0])&&((s)[1]&192)==128)
00126 #define IS_UTF8_SI(s) \
00127 (IS_UTF8_I(s[0])&&((s)[1]&192)==128)
00128 #define IS_UTF8_S3(s) \
00129 (IS_UTF8_3(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128)
00130 #define IS_UTF8_S4(s) \
00131 (IS_UTF8_4(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&((s)[3]&192)==128)
00132 #define IS_UTF8_S5(s) \
00133 (IS_UTF8_5(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&\
00134  ((s)[3]&192)==128&&((s)[4]&192)==128)
00135 #define IS_UTF8_S6(s) \
00136 (IS_UTF8_6(s[0])&& ((s)[1]&192)==128&&((s)[2]&192)==128&&((s)[3]&192)==128&&\
00137  ((s)[4]&192)==128&&((s)[5]&192)==128)
00138 
00139 #define UCS4_S1(s) ((ucs4)(s[0]))
00140 #define UCS4_S2(s) ((ucs4)\
00141                     (((s[0])&31)<<6)|((s[1])&63))
00142 #define UCS4_S3(s) ((ucs4)\
00143                     (((s[0])&15)<<12)|(((s[1])&63)<<6)|((s[2])&63))
00144 #define UCS4_S4(s) ((ucs4)\
00145                     (((s[0])&7)<<18)|(((s[1])&63)<<12)|(((s[2])&63)<<6)|\
00146                     ((s[3])&63))
00147 #define UCS4_S5(s) ((ucs4)\
00148                     (((s[0])&3)<<24)|(((s[1])&63)<<18)|(((s[2])&63)<<12)|\
00149                     (((s[3])&63)<<6)|((s[4])&63))
00150 #define UCS4_S6(s) ((ucs4)\
00151                     (((s[0])&1)<<30)|(((s[1])&63)<<24)|(((s[2])&63)<<18)|\
00152                     (((s[3])&63)<<12)|(((s[4])&63)<<6)|((s[5])&63))
00153 
00154 #define UTF8_S1(s,c) ((s)[0]=(c))
00155 #define UTF8_S2(s,c) ((s)[0]=(((c)>>6)&31)|0xc0,\
00156                       (s)[1]=((c)&63)|128)
00157 #define UTF8_S3(s,c) ((s)[0]=(((c)>>12)&15)|0xe0,\
00158                       (s)[1]=((c>>6)&63)|128,\
00159                       (s)[2]=((c)&63)|128)
00160 #define UTF8_S4(s,c) ((s)[0]=(((c)>>18)&7)|0xf0,\
00161                       (s)[1]=((c>>12)&63)|128,\
00162                       (s)[2]=((c>>6)&63)|128,\
00163                       (s)[3]=((c)&63)|128)
00164 #define UTF8_S5(s,c) ((s)[0]=(((c)>>24)&3)|0xf8,\
00165                       (s)[1]=((c>>18)&63)|128,\
00166                       (s)[2]=((c>>12)&63)|128,\
00167                       (s)[3]=((c>>6)&63)|128,\
00168                       (s)[4]=((c)&63)|128)
00169 #define UTF8_S6(s,c) ((s)[0]=(((c)>>30)&1)|0xfc,\
00170                       (s)[1]=((c>>24)&63)|128,\
00171                       (s)[2]=((c>>18)&63)|128,\
00172                       (s)[3]=((c>>12)&63)|128,\
00173                       (s)[4]=((c>>6)&63)|128,\
00174                       (s)[5]=((c)&63)|128)
00175      
00176 SOFIA_END_DECLS
00177 
00178 #endif /* UTF8_H */

Sofia-SIP 1.12.1 - Copyright (C) 2006 Nokia Corporation. All rights reserved. Licensed under the terms of the GNU Lesser General Public License.