00001 /** @file scim_utility.h 00002 * @brief various utility functions. 00003 */ 00004 00005 /* 00006 * Smart Common Input Method 00007 * 00008 * Copyright (c) 2004 James Su <suzhe@turbolinux.com.cn> 00009 * Copyright (c) 2003 James Su <suzhe@turbolinux.com.cn> 00010 * Copyright (c) 2002 James Su <suzhe@turbolinux.com.cn> 00011 * 00012 * 00013 * This library is free software; you can redistribute it and/or 00014 * modify it under the terms of the GNU Lesser General Public 00015 * License as published by the Free Software Foundation; either 00016 * version 2 of the License, or (at your option) any later version. 00017 * 00018 * This library is distributed in the hope that it will be useful, 00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00021 * GNU Lesser General Public License for more details. 00022 * 00023 * You should have received a copy of the GNU Lesser General Public 00024 * License along with this program; if not, write to the 00025 * Free Software Foundation, Inc., 59 Temple Place, Suite 330, 00026 * Boston, MA 02111-1307 USA 00027 * 00028 * $Id: scim_utility.h,v 1.18 2004/04/22 10:19:38 suzhe Exp $ 00029 */ 00030 00031 #ifndef __SCIM_UTILITY_H 00032 #define __SCIM_UTILITY_H 00033 00034 namespace scim { 00035 /** 00036 * @addtogroup Helper 00037 * @{ 00038 */ 00039 00040 #define SCIM_PATH_DELIM_STRING "/" 00041 #define SCIM_PATH_DELIM '/' 00042 00043 // UTF-8 <-> ucs4_t convert 00044 00045 /* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */ 00046 #define RET_ILSEQ 0 00047 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 00048 #define RET_TOOFEW(n) (-1-(n)) 00049 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 00050 #define RET_TOOSMALL -1 00051 /* Replacement character for invalid multibyte sequence or wide character. */ 00052 #define BAD_WCHAR ((ucs4_t) 0xfffd) 00053 #define BAD_CHAR '?' 00054 00055 /** 00056 * @brief Convert an utf8 char sequence to ucs4. 00057 * 00058 * @param pwc destination buffer to store the ucs4 code. 00059 * @param src source buffer contains the utf8 char sequence. 00060 * @param src_len the size of source buffer. 00061 * 00062 * @return number of chars in s actually converted. 00063 */ 00064 int utf8_mbtowc (ucs4_t *pwc, const unsigned char *src, int src_len); 00065 00066 /** 00067 * @brief Convert an ucs4 code to utf8 char sequence. 00068 * 00069 * @param dest destination buffer to store utf8 char sequence. 00070 * @param wc the ucs4 code to be converted. 00071 * @param dest_size the size of destination buffer. 00072 * 00073 * @return the number of bytes actually written into dest. 00074 */ 00075 int utf8_wctomb (unsigned char *dest, ucs4_t wc, int dest_size); 00076 00077 /** 00078 * @brief Convert an utf8 string to an ucs4 string. 00079 * 00080 * @param str source utf8 string. 00081 * @return the destination widestring. 00082 */ 00083 WideString utf8_mbstowcs (const String & str); 00084 00085 /** 00086 * @brief Convert an utf8 string to an ucs4 string. 00087 * 00088 * @param str source utf8 string. 00089 * @param len length of the source string. 00090 * @return the destination widestring. 00091 */ 00092 WideString utf8_mbstowcs (const char *str, int len = -1); 00093 00094 /** 00095 * @brief Convert an ucs4 string to an utf8 string. 00096 * 00097 * @param wstr source ucs4 string. 00098 * 00099 * @return the destination utf8 string. 00100 */ 00101 String utf8_wcstombs (const WideString & wstr); 00102 00103 /** 00104 * @brief Convert an ucs4 string to an utf8 string. 00105 * 00106 * @param wstr source ucs4 string. 00107 * @param len length of the source string. 00108 * 00109 * @return the destination utf8 string. 00110 */ 00111 String utf8_wcstombs (const ucs4_t *wstr, int len = -1); 00112 00113 /** 00114 * @brief Read a wide char from istream. 00115 * 00116 * The content in the istream are actually in utf-8 encoding. 00117 * 00118 * @param is the stream to be read. 00119 * 00120 * @return if equal to 0 then got the end of the stream or error occurred. 00121 */ 00122 ucs4_t utf8_read_wchar (std::istream &is); 00123 00124 /** 00125 * @brief Write a wide char to ostream. 00126 * 00127 * The content written into the ostream will be converted into utf-8 encoding. 00128 * 00129 * @param os the stream to be written. 00130 * @param wc the wide char to be written to the stream. 00131 * @return the same stream object reference. 00132 */ 00133 std::ostream & utf8_write_wchar (std::ostream &os, ucs4_t wc); 00134 00135 /** 00136 * @brief Read a wide string from istream. 00137 * 00138 * The content in the istream are actually in utf-8 encoding. 00139 * 00140 * @param is the stream to be read. 00141 * @param delim the delimiter of the string. 00142 * @param rm_delim if the delim should be removed from the destination string. 00143 * @return the wide string read from the given stream. 00144 */ 00145 WideString utf8_read_wstring (std::istream &is, ucs4_t delim = (ucs4_t) '\n', bool rm_delim = true); 00146 00147 /** 00148 * @brief Write a wide string to ostream. 00149 * 00150 * The content written into the ostream will be converted into utf-8 encoding. 00151 * 00152 * @param os the stream to be written. 00153 * @param wstr the wide string to be written into the stream. 00154 * @return the same stream object reference. 00155 */ 00156 std::ostream & utf8_write_wstring (std::ostream &os, const WideString & wstr); 00157 00158 /** 00159 * @brief Convert an uint32 variable into a sequence of bytes. 00160 * 00161 * @param bytes the buffer to store the result. 00162 * @param n the variable to be converted. 00163 */ 00164 inline 00165 void scim_uint32tobytes (unsigned char *bytes, uint32 n) 00166 { 00167 bytes [0] = (unsigned char) ((n & 0xFF)); 00168 bytes [1] = (unsigned char) ((n >> 8) & 0xFF); 00169 bytes [2] = (unsigned char) ((n >> 16) & 0xFF); 00170 bytes [3] = (unsigned char) ((n >> 24) & 0xFF); 00171 } 00172 00173 /** 00174 * @brief Convert a sequence of bytes into an uint32 value. 00175 * 00176 * @param bytes the buffer contains the bytes to be converted. 00177 * @return the result uint32 value. 00178 */ 00179 inline 00180 uint32 scim_bytestouint32 (const unsigned char *bytes) 00181 { 00182 return ((uint32) bytes [0]) 00183 | (((uint32) bytes [1]) << 8) 00184 | (((uint32) bytes [2]) << 16) 00185 | (((uint32) bytes [3]) << 24); 00186 } 00187 00188 /** 00189 * @brief Convert an uint16 variable into a sequence of bytes. 00190 * 00191 * @param bytes the buffer to store the result. 00192 * @param n the variable to be converted. 00193 */ 00194 inline 00195 void scim_uint16tobytes (unsigned char *bytes, uint16 n) 00196 { 00197 bytes [0] = (unsigned char) ((n & 0xFF)); 00198 bytes [1] = (unsigned char) ((n >> 8) & 0xFF); 00199 } 00200 00201 /** 00202 * @brief Convert a sequence of bytes into an uint16 value. 00203 * 00204 * @param bytes the buffer contains the bytes to be converted. 00205 * @return the result uint16 value. 00206 */ 00207 inline 00208 uint16 scim_bytestouint16 (const unsigned char *bytes) 00209 { 00210 return ((uint16) bytes [0]) | (((uint16) bytes [1]) << 8); 00211 } 00212 00213 /** 00214 * @brief Test if the locale is valid, and return the good locale name. 00215 * 00216 * @param locale the locale to be tested. 00217 * @return If the locale is valid, it's the good locale name, otherwise empty. 00218 */ 00219 String scim_validate_locale (const String& locale); 00220 00221 /** 00222 * @brief Get the encoding for a locale. 00223 * 00224 * @param locale the name of the locale. 00225 * @return The encoding used by the given locale. 00226 */ 00227 String scim_get_locale_encoding (const String& locale); 00228 00229 /** 00230 * @brief Get current system locale. 00231 * @return The current system locale. 00232 */ 00233 String scim_get_current_locale (); 00234 00235 /** 00236 * @brief Get the max length of the multibyte char of a locale. 00237 * 00238 * @param locale the name of the locale. 00239 * @return the maxlen of this locale. 00240 */ 00241 int scim_get_locale_maxlen (const String& locale); 00242 00243 /** 00244 * @brief Split string list into a string vector according to the delim char. 00245 * 00246 * @param vec the string vector to store the result. 00247 * @param str the string to be splitted. 00248 * @param delim the delimiter to split the strings. 00249 * @return the number of the strings in the result list. 00250 */ 00251 int scim_split_string_list (std::vector<String>& vec, const String& str, char delim = ','); 00252 00253 /** 00254 * @brief Combine a string vector into one string list, separated by char delim. 00255 * 00256 * @param vec the string vector which contains the strings to be combined. 00257 * @param delim the delimiter which should be put between two strings. 00258 * @return the result string. 00259 */ 00260 String scim_combine_string_list (const std::vector<String>& vec, char delim = ','); 00261 00262 /** 00263 * @brief Get machine endian type 00264 * @return 1 little endian, 0 big endian 00265 */ 00266 bool scim_is_little_endian (); 00267 00268 /** 00269 * @brief Test if wchar_t is using UCS4 encoding. 00270 */ 00271 bool scim_if_wchar_ucs4_equal (); 00272 00273 /** 00274 * @brief Convert a half width unicode char to its full width counterpart. 00275 */ 00276 ucs4_t scim_wchar_to_full_width (ucs4_t code); 00277 00278 /** 00279 * @brief Convert a full width unicode char to its half width counterpart. 00280 */ 00281 ucs4_t scim_wchar_to_half_width (ucs4_t code); 00282 00283 /** 00284 * @brief Get the home dir of current user. 00285 */ 00286 String scim_get_home_dir (); 00287 00288 /** 00289 * @brief Get the name of current user. 00290 */ 00291 String scim_get_user_name (); 00292 00293 /** 00294 * @brief Load a file into memory. 00295 * 00296 * @param filename the name of the file to be loaded. 00297 * @param bufptr the place to store the newly allocated buffer pointer, 00298 * if bufptr == NULL then the file is not actually loaded. 00299 * the pointer *bufptr must be deleted afterwards. 00300 * @return the size of the data actually loaded (mostly, it's the file size), 00301 * zero means load failed. 00302 */ 00303 size_t scim_load_file (const String &filename, char **bufptr); 00304 00305 /** 00306 * @brief Make a directory. 00307 * 00308 * @param dir the dir path to be created. 00309 * 00310 * @return true if sucess. 00311 */ 00312 bool scim_make_dir (const String &dir); 00313 00314 /** @} */ 00315 00316 } // namespace scim 00317 00318 #endif //__SCIM_UTILITY_H 00319 /* 00320 vi:ts=4:nowrap:ai:expandtab 00321 */