filters

ustring.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Library General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Library General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Library General Public License
00017  *  along with this library; see the file COPYING.LIB.  If not, write to
00018  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  * Boston, MA 02110-1301, USA.
00020  */
00021 
00022 #include "ustring.h"
00023 
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027 
00028 
00029 #define USTRING_SIMPLIFIED
00030 
00031 #ifdef USTRING_SIMPLIFIED
00032 #include <string.h>
00033 #include <ctype.h>
00034 #include <stdlib.h>
00035 #include <stdio.h>
00036 #include <math.h>
00037 #endif
00038 
00039 
00040 #ifndef USTRING_SIMPLIFIED
00041 
00042 #include <stdlib.h>
00043 #include <stdio.h>
00044 #include <ctype.h>
00045 #ifdef HAVE_STRING_H
00046 #include <string.h>
00047 #endif
00048 #ifdef HAVE_STRINGS_H
00049 #include <strings.h>
00050 #endif
00051 
00052 #ifdef HAVE_MATH_H
00053 #include <math.h>
00054 #endif
00055 #ifdef HAVE_FLOAT_H
00056 #include <float.h>
00057 #endif
00058 #ifdef HAVE_IEEEFP_H
00059 #include <ieeefp.h>
00060 #endif
00061 
00062 #endif
00063 
00064 namespace Swinder {
00065 #ifdef WORDS_BIGENDIAN
00066   unsigned char NaN_Bytes[] = { 0x7f, 0xf8, 0, 0, 0, 0, 0, 0 };
00067   unsigned char Inf_Bytes[] = { 0x7f, 0xf0, 0, 0, 0, 0, 0, 0 };
00068 #elif defined(arm)
00069   unsigned char NaN_Bytes[] = { 0, 0, 0xf8, 0x7f, 0, 0, 0, 0 };
00070   unsigned char Inf_Bytes[] = { 0, 0, 0xf0, 0x7f, 0, 0, 0, 0 };
00071 #else
00072   unsigned char NaN_Bytes[] = { 0, 0, 0, 0, 0, 0, 0xf8, 0x7f };
00073   unsigned char Inf_Bytes[] = { 0, 0, 0, 0, 0, 0, 0xf0, 0x7f };
00074 #endif
00075 
00076   const double NaN = *( reinterpret_cast<const double*>( NaN_Bytes ) );
00077   const double Inf = *( reinterpret_cast<const double*>( Inf_Bytes ) );
00078 }
00079 
00080 using namespace Swinder;
00081 
00082 #ifdef USTRING_SIMPLIFIED
00083 bool Swinder::isNaN(double)
00084 {
00085   return false;
00086 }
00087 #else
00088 bool Swinder::isNaN(double d)
00089 {
00090 #ifdef HAVE_FUNC_ISNAN
00091   return isnan(d);
00092 #elif defined HAVE_FLOAT_H
00093   return _isnan(d) != 0;
00094 #else
00095   return !(d == d);
00096 #endif
00097 }
00098 #endif
00099 
00100 #ifdef USTRING_SIMPLIFIED
00101 bool Swinder::isPosInf(double)
00102 {
00103   return false;
00104 }
00105 #else
00106 bool Swinder::isPosInf(double d)
00107 {
00108 #if defined(HAVE_FUNC_ISINF)
00109   return (isinf(d) == 1);
00110 #elif HAVE_FUNC_FINITE
00111   return finite(d) == 0 && d == d; // ### can we distinguish between + and - ?
00112 #elif HAVE_FUNC__FINITE
00113   return _finite(d) == 0 && d == d; // ###
00114 #else
00115   return false;
00116 #endif
00117 }
00118 #endif
00119 
00120 #ifdef USTRING_SIMPLIFIED
00121 bool Swinder::isNegInf(double)
00122 {
00123   return false;
00124 }
00125 #else
00126 bool Swinder::isNegInf(double d)
00127 {
00128 #if defined(HAVE_FUNC_ISINF)
00129   return (isinf(d) == -1);
00130 #elif HAVE_FUNC_FINITE
00131   return finite(d) == 0 && d == d; // ###
00132 #elif HAVE_FUNC__FINITE
00133   return _finite(d) == 0 && d == d; // ###
00134 #else
00135   return false;
00136 #endif
00137 }
00138 #endif
00139 
00140 CString::CString(const char *c)
00141 {
00142   data = new char[strlen(c)+1];
00143   strcpy(data, c);
00144 }
00145 
00146 CString::CString(const CString &b)
00147 {
00148   data = new char[b.length()+1];
00149   strcpy(data, b.c_str());
00150 }
00151 
00152 CString::~CString()
00153 {
00154   delete [] data;
00155 }
00156 
00157 CString &CString::append(const CString &t)
00158 {
00159   char *n;
00160   if (data) {
00161     n = new char[strlen(data)+t.length()+1];
00162     strcpy(n, data);
00163   } else {
00164     n = new char[t.length()+1];
00165     n[0] = '\0';
00166   }
00167   strcat(n, t.c_str());
00168 
00169   delete [] data;
00170   data = n;
00171 
00172   return *this;
00173 }
00174 
00175 CString &CString::operator=(const char *c)
00176 {
00177   if (data)
00178     delete [] data;
00179   data = new char[strlen(c)+1];
00180   strcpy(data, c);
00181 
00182   return *this;
00183 }
00184 
00185 CString &CString::operator=(const CString &str)
00186 {
00187   if (this == &str)
00188     return *this;
00189 
00190   if (data)
00191     delete [] data;
00192   data = new char[str.length()+1];
00193   strcpy(data, str.c_str());
00194 
00195   return *this;
00196 }
00197 
00198 CString &CString::operator+=(const CString &str)
00199 {
00200   return append(CString(str.c_str()));
00201 }
00202 
00203 int CString::length() const
00204 {
00205   return strlen(data);
00206 }
00207 
00208 bool Swinder::operator==(const Swinder::CString& c1, const Swinder::CString& c2)
00209 {
00210   return (strcmp(c1.c_str(), c2.c_str()) == 0);
00211 }
00212 
00213 UChar UChar::null;
00214 UString::Rep UString::Rep::null = { 0, 0, 1 };
00215 UString UString::null;
00216 static char *statBuffer = 0L;
00217 
00218 UChar::UChar(const UCharReference &c)
00219     : uc( c.unicode() )
00220 {
00221 }
00222 
00223 UChar UChar::toLower() const
00224 {
00225   // ### properly support unicode tolower
00226   if (uc >= 256 || islower(uc))
00227     return *this;
00228 
00229   return UChar(tolower(uc));
00230 }
00231 
00232 UChar UChar::toUpper() const
00233 {
00234   if (uc >= 256 || isupper(uc))
00235     return *this;
00236 
00237   return UChar(toupper(uc));
00238 }
00239 
00240 UCharReference& UCharReference::operator=(UChar c)
00241 {
00242   str->detach();
00243   if (offset < str->rep->len)
00244     *(str->rep->dat + offset) = c;
00245   /* TODO: lengthen string ? */
00246   return *this;
00247 }
00248 
00249 UChar& UCharReference::ref() const
00250 {
00251   if (offset < str->rep->len)
00252     return *(str->rep->dat + offset);
00253   else
00254     return UChar::null;
00255 }
00256 
00257 namespace {
00258   // return an uninitialized UChar array of size s
00259   static inline UChar* allocateChars(int s)
00260   {
00261     // work around default UChar constructor code
00262     return reinterpret_cast<UChar*>(new short[s]);
00263   }
00264 }
00265 
00266 UString::Rep *UString::Rep::create(UChar *d, int l)
00267 {
00268   Rep *r = new Rep;
00269   r->dat = d;
00270   r->len = l;
00271   r->rc = 1;
00272 
00273   return r;
00274 }
00275 
00276 UString::UString()
00277 {
00278   null.rep = &Rep::null;
00279   attach(&Rep::null);
00280 }
00281 
00282 UString::UString(char c)
00283 {
00284   UChar *d = allocateChars( 1 );
00285   d[0] = UChar(0, c);
00286   rep = Rep::create(d, 1);
00287 }
00288 
00289 UString::UString(UChar c)
00290 {
00291   UChar *d = allocateChars( 1 );
00292   d[0] = c;
00293   rep = Rep::create(d, 1);
00294 }
00295 
00296 UString::UString(const char *c)
00297 {
00298   attach(&Rep::null);
00299   operator=(c);
00300 }
00301 
00302 UString::UString(const UChar *c, int length)
00303 {
00304   UChar *d = allocateChars( length );
00305   memcpy(d, c, length * sizeof(UChar));
00306   rep = Rep::create(d, length);
00307 }
00308 
00309 UString::UString(UChar *c, int length, bool copy)
00310 {
00311   UChar *d;
00312   if (copy) {
00313     d = allocateChars( length );
00314     memcpy(d, c, length * sizeof(UChar));
00315   } else
00316     d = c;
00317   rep = Rep::create(d, length);
00318 }
00319 
00320 UString::UString(const UString &b)
00321 {
00322   attach(b.rep);
00323 }
00324 
00325 UString::~UString()
00326 {
00327   release();
00328 }
00329 
00330 UString UString::from(int i)
00331 {
00332   char buf[40];
00333   sprintf(buf, "%d", i);
00334 
00335   return UString(buf);
00336 }
00337 
00338 UString UString::from(unsigned int u)
00339 {
00340   char buf[40];
00341   sprintf(buf, "%u", u);
00342 
00343   return UString(buf);
00344 }
00345 
00346 UString UString::from(double d)
00347 {
00348   char buf[40];
00349 
00350   if (d == -0)
00351     strcpy(buf,"0");
00352   else if (isNaN(d))
00353     strcpy(buf,"NaN");
00354   else if (isPosInf(d))
00355     strcpy(buf,"Infinity");
00356   else if (isNegInf(d))
00357     strcpy(buf,"-Infinity");
00358   else
00359     sprintf(buf, "%.16g", d);   // does the right thing
00360 
00361   // ECMA 3rd ed. 9.8.1 9 e: "with no leading zeros"
00362   int buflen = strlen(buf);
00363   if (buflen >= 4 && buf[buflen-4] == 'e' && buf[buflen-2] == '0') {
00364     buf[buflen-2] = buf[buflen-1];
00365     buf[buflen-1] = 0;
00366   }
00367 
00368   return UString(buf);
00369 }
00370 
00371 UString &UString::append(const UString &t)
00372 {
00373   int l = length();
00374   UChar *n = allocateChars( l+t.length() );
00375   memcpy(n, data(), l * sizeof(UChar));
00376   memcpy(n+l, t.data(), t.length() * sizeof(UChar));
00377   release();
00378   rep = Rep::create(n, l + t.length());
00379 
00380   return *this;
00381 }
00382 
00383 CString UString::cstring() const
00384 {
00385   return CString(ascii());
00386 }
00387 
00388 char *UString::ascii() const
00389 {
00390   if (statBuffer)
00391     delete [] statBuffer;
00392 
00393   statBuffer = new char[length()+1];
00394   for(int i = 0; i < length(); i++)
00395     statBuffer[i] = data()[i].low();
00396   statBuffer[length()] = '\0';
00397 
00398   return statBuffer;
00399 }
00400 
00401 UString &UString::operator=(const char *c)
00402 {
00403   release();
00404   int l = c ? strlen(c) : 0;
00405   UChar *d = allocateChars( l );
00406   for (int i = 0; i < l; i++)
00407     d[i].uc = static_cast<unsigned char>( c[i] );
00408   rep = Rep::create(d, l);
00409 
00410   return *this;
00411 }
00412 
00413 UString &UString::operator=(const UString &str)
00414 {
00415   str.rep->ref();
00416   release();
00417   rep=str.rep;
00418 
00419   return *this;
00420 }
00421 
00422 UString &UString::operator+=(const UString &s)
00423 {
00424   return append(s);
00425 }
00426 
00427 bool UString::is8Bit() const
00428 {
00429   const UChar *u = data();
00430   for(int i = 0; i < length(); i++, u++)
00431     if (u->uc > 0xFF)
00432       return false;
00433 
00434   return true;
00435 }
00436 
00437 UChar UString::operator[](int pos) const
00438 {
00439   if (pos >= length())
00440     return UChar::null;
00441 
00442   return static_cast<const UChar *>( data() )[pos];
00443 }
00444 
00445 UCharReference UString::operator[](int pos)
00446 {
00447   /* TODO: boundary check */
00448   return UCharReference(this, pos);
00449 }
00450 
00451 double UString::toDouble( bool tolerant ) const
00452 {
00453   double d;
00454 
00455   if (!is8Bit())
00456     return NaN;
00457 
00458   CString str = cstring();
00459   const char *c = str.c_str();
00460 
00461   // skip leading white space
00462   while (isspace(*c))
00463     c++;
00464 
00465   // empty string ?
00466   if (*c == '\0')
00467     return tolerant ? NaN : 0.0;
00468 
00469   // hex number ?
00470   if (*c == '0' && (*(c+1) == 'x' || *(c+1) == 'X')) {
00471     c++;
00472     d = 0.0;
00473     while (*(++c)) {
00474       if (*c >= '0' && *c <= '9')
00475     d = d * 16.0 + *c - '0';
00476       else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
00477     d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
00478       else
00479     break;
00480     }
00481   } else {
00482     // regular number ?
00483     char *end;
00484     d = strtod(c, &end);
00485     if ((d != 0.0 || end != c) && d != HUGE_VAL && d != -HUGE_VAL) {
00486       c = end;
00487     } else {
00488       // infinity ?
00489       d = 1.0;
00490       if (*c == '+')
00491     c++;
00492       else if (*c == '-') {
00493     d = -1.0;
00494     c++;
00495       }
00496       if (strncmp(c, "Infinity", 8) != 0)
00497     return NaN;
00498       d = d * Inf;
00499       c += 8;
00500     }
00501   }
00502 
00503   // allow trailing white space
00504   while (isspace(*c))
00505     c++;
00506   // don't allow anything after - unless tolerant=true
00507   if ( !tolerant && *c != '\0')
00508     d = NaN;
00509 
00510   return d;
00511 }
00512 
00513 unsigned long UString::toULong(bool *ok) const
00514 {
00515   double d = toDouble();
00516   bool b = true;
00517 
00518   if (isNaN(d) || d != static_cast<unsigned long>(d)) {
00519     b = false;
00520     d = 0;
00521   }
00522 
00523   if (ok)
00524     *ok = b;
00525 
00526   return static_cast<unsigned long>(d);
00527 }
00528 
00529 int UString::find(const UString &f, int pos) const
00530 {
00531   if (isNull())
00532     return -1;
00533   long fsize = f.length() * sizeof(UChar);
00534   if (pos < 0)
00535     pos = 0;
00536   const UChar *end = data() + length() - f.length();
00537   for (const UChar *c = data() + pos; c <= end; c++)
00538     if (!memcmp(c, f.data(), fsize))
00539       return (c-data());
00540 
00541   return -1;
00542 }
00543 
00544 int UString::rfind(const UString &f, int pos) const
00545 {
00546   if (isNull())
00547     return -1;
00548   if (pos + f.length() >= length())
00549     pos = length() - f.length();
00550   long fsize = f.length() * sizeof(UChar);
00551   for (const UChar *c = data() + pos; c >= data(); c--) {
00552     if (!memcmp(c, f.data(), fsize))
00553       return (c-data());
00554   }
00555 
00556   return -1;
00557 }
00558 
00559 UString UString::substr(int pos, int len) const
00560 {
00561   if (isNull())
00562     return UString();
00563   if (pos < 0)
00564     pos = 0;
00565   else if (pos >= static_cast<int>( length() ))
00566     pos = length();
00567   if (len < 0)
00568     len = length();
00569   if (pos + len >= static_cast<int>( length() ))
00570     len = length() - pos;
00571 
00572   UChar *tmp = allocateChars( len );
00573   memcpy(tmp, data()+pos, len * sizeof(UChar));
00574   UString result(tmp, len);
00575   delete [] tmp;
00576 
00577   return result;
00578 }
00579 
00580 void UString::attach(Rep *r)
00581 {
00582   rep = r;
00583   rep->ref();
00584 }
00585 
00586 void UString::detach()
00587 {
00588   if (rep->rc > 1) {
00589     int l = length();
00590     UChar *n = allocateChars( l );
00591     memcpy(n, data(), l * sizeof(UChar));
00592     release();
00593     rep = Rep::create(n, l);
00594   }
00595 }
00596 
00597 void UString::release()
00598 {
00599   if (!rep->deref()) {
00600     delete [] rep->dat;
00601     delete rep;
00602   }
00603 }
00604 
00605 bool Swinder::operator==(const UString& s1, const UString& s2)
00606 {
00607   if (s1.rep->len != s2.rep->len)
00608     return false;
00609 
00610   return (memcmp(s1.rep->dat, s2.rep->dat,
00611          s1.rep->len * sizeof(UChar)) == 0);
00612 }
00613 
00614 bool Swinder::operator==(const UString& s1, const char *s2)
00615 {
00616   if (s2 == 0L && s1.isNull())
00617     return true;
00618 
00619   if (s1.length() != static_cast<int>( strlen(s2) ))
00620     return false;
00621 
00622   const UChar *u = s1.data();
00623   while (*s2) {
00624     if (u->uc != *s2 )
00625       return false;
00626     s2++;
00627     u++;
00628   }
00629 
00630   return true;
00631 }
00632 
00633 bool Swinder::operator<(const UString& s1, const UString& s2)
00634 {
00635   const int l1 = s1.length();
00636   const int l2 = s2.length();
00637   const int lmin = l1 < l2 ? l1 : l2;
00638   const UChar *c1 = s1.data();
00639   const UChar *c2 = s2.data();
00640   int l = 0;
00641   while (l < lmin && *c1 == *c2) {
00642     c1++;
00643     c2++;
00644     l++;
00645   }
00646   if (l < lmin)
00647     return (c1->unicode() < c2->unicode());
00648 
00649   return (l1 < l2);
00650 }
00651 
00652 UString Swinder::operator+(const UString& s1, const UString& s2)
00653 {
00654   UString tmp(s1);
00655   tmp.append(s2);
00656 
00657   return tmp;
00658 }
00659 
00660 
00661 UConstString::UConstString( UChar* data, unsigned int length ) : UString( data, length, false )
00662 {
00663 }
00664 
00665 UConstString::~UConstString()
00666 {
00667   if ( rep->rc > 1 ) {
00668     int l = length();
00669     UChar* n = allocateChars( l );
00670     memcpy( n, data(), l * sizeof( UChar ) );
00671     rep->dat = n;
00672   }
00673   else
00674     rep->dat = 0;
00675 }
KDE Home | KDE Accessibility Home | Description of Access Keys