Blender  V2.59
string.c
Go to the documentation of this file.
00001 /* util.c
00002  *
00003  * various string, file, list operations.
00004  *
00005  *
00006  * $Id: string.c 36933 2011-05-26 21:04:01Z campbellbarton $
00007  *
00008  * ***** BEGIN GPL LICENSE BLOCK *****
00009  *
00010  * This program is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU General Public License
00012  * as published by the Free Software Foundation; either version 2
00013  * of the License, or (at your option) any later version.
00014  *
00015  * This program is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  * GNU General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU General Public License
00021  * along with this program; if not, write to the Free Software Foundation,
00022  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00023  *
00024  * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
00025  * All rights reserved.
00026  *
00027  * The Original Code is: all of this file.
00028  *
00029  * Contributor(s): none yet.
00030  *
00031  * ***** END GPL LICENSE BLOCK *****
00032  * 
00033  */
00034 
00040 #include <string.h>
00041 #include <stdlib.h>
00042 #include <stdarg.h>
00043 #include <ctype.h>
00044 
00045 #include "MEM_guardedalloc.h"
00046 
00047 #include "BLI_dynstr.h"
00048 #include "BLI_string.h"
00049 
00050 char *BLI_strdupn(const char *str, const size_t len) {
00051         char *n= MEM_mallocN(len+1, "strdup");
00052         memcpy(n, str, len);
00053         n[len]= '\0';
00054         
00055         return n;
00056 }
00057 char *BLI_strdup(const char *str) {
00058         return BLI_strdupn(str, strlen(str));
00059 }
00060 
00061 char *BLI_strdupcat(const char *str1, const char *str2)
00062 {
00063         size_t len;
00064         char *n;
00065         
00066         len= strlen(str1)+strlen(str2);
00067         n= MEM_mallocN(len+1, "strdupcat");
00068         strcpy(n, str1);
00069         strcat(n, str2);
00070         
00071         return n;
00072 }
00073 
00074 char *BLI_strncpy(char *dst, const char *src, const size_t maxncpy) {
00075         size_t srclen= strlen(src);
00076         size_t cpylen= (srclen>(maxncpy-1))?(maxncpy-1):srclen;
00077         
00078         memcpy(dst, src, cpylen);
00079         dst[cpylen]= '\0';
00080         
00081         return dst;
00082 }
00083 
00084 size_t BLI_snprintf(char *buffer, size_t count, const char *format, ...)
00085 {
00086         size_t n;
00087         va_list arg;
00088 
00089         va_start(arg, format);
00090         n = vsnprintf(buffer, count, format, arg);
00091         
00092         if (n != -1 && n < count) {
00093                 buffer[n] = '\0';
00094         } else {
00095                 buffer[count-1] = '\0';
00096         }
00097         
00098         va_end(arg);
00099         return n;
00100 }
00101 
00102 char *BLI_sprintfN(const char *format, ...)
00103 {
00104         DynStr *ds;
00105         va_list arg;
00106         char *n;
00107 
00108         va_start(arg, format);
00109 
00110         ds= BLI_dynstr_new();
00111         BLI_dynstr_vappendf(ds, format, arg);
00112         n= BLI_dynstr_get_cstring(ds);
00113         BLI_dynstr_free(ds);
00114 
00115         va_end(arg);
00116 
00117         return n;
00118 }
00119 
00120 /* Makes a copy of the text within the "" that appear after some text 'blahblah'
00121  * i.e. for string 'pose["apples"]' with prefix 'pose[', it should grab "apples"
00122  * 
00123  *      - str: is the entire string to chop
00124  *      - prefix: is the part of the string to leave out 
00125  *
00126  * Assume that the strings returned must be freed afterwards, and that the inputs will contain 
00127  * data we want...
00128  */
00129 char *BLI_getQuotedStr (const char *str, const char *prefix)
00130 {
00131         size_t prefixLen = strlen(prefix);
00132         char *startMatch, *endMatch;
00133         
00134         /* get the starting point (i.e. where prefix starts, and add prefixLen+1 to it to get be after the first " */
00135         startMatch= strstr(str, prefix) + prefixLen + 1;
00136         
00137         /* get the end point (i.e. where the next occurance of " is after the starting point) */
00138         endMatch= strchr(startMatch, '"'); // "  NOTE: this comment here is just so that my text editor still shows the functions ok...
00139         
00140         /* return the slice indicated */
00141         return BLI_strdupn(startMatch, (size_t)(endMatch-startMatch));
00142 }
00143 
00144 /* Replaces all occurances of oldText with newText in str, returning a new string that doesn't 
00145  * contain the 'replaced' occurances.
00146  */
00147 // A rather wasteful string-replacement utility, though this shall do for now...
00148 // Feel free to replace this with an even safe + nicer alternative 
00149 char *BLI_replacestr(char *str, const char *oldText, const char *newText)
00150 {
00151         DynStr *ds= NULL;
00152         size_t lenOld= strlen(oldText);
00153         char *match;
00154         
00155         /* sanity checks */
00156         if ((str == NULL) || (str[0]==0))
00157                 return NULL;
00158         else if ((oldText == NULL) || (newText == NULL) || (oldText[0]==0))
00159                 return BLI_strdup(str);
00160         
00161         /* while we can still find a match for the old substring that we're searching for, 
00162          * keep dicing and replacing
00163          */
00164         while ( (match = strstr(str, oldText)) ) {
00165                 /* the assembly buffer only gets created when we actually need to rebuild the string */
00166                 if (ds == NULL)
00167                         ds= BLI_dynstr_new();
00168                         
00169                 /* if the match position does not match the current position in the string, 
00170                  * copy the text up to this position and advance the current position in the string
00171                  */
00172                 if (str != match) {
00173                         /* replace the token at the 'match' position with \0 so that the copied string will be ok,
00174                          * add the segment of the string from str to match to the buffer, then restore the value at match
00175                          */
00176                         match[0]= 0;
00177                         BLI_dynstr_append(ds, str);
00178                         match[0]= oldText[0];
00179                         
00180                         /* now our current position should be set on the start of the match */
00181                         str= match;
00182                 }
00183                 
00184                 /* add the replacement text to the accumulation buffer */
00185                 BLI_dynstr_append(ds, newText);
00186                 
00187                 /* advance the current position of the string up to the end of the replaced segment */
00188                 str += lenOld;
00189         }
00190         
00191         /* finish off and return a new string that has had all occurances of */
00192         if (ds) {
00193                 char *newStr;
00194                 
00195                 /* add what's left of the string to the assembly buffer 
00196                  *      - we've been adjusting str to point at the end of the replaced segments
00197                  */
00198                 if (str != NULL)
00199                         BLI_dynstr_append(ds, str);
00200                 
00201                 /* convert to new c-string (MEM_malloc'd), and free the buffer */
00202                 newStr= BLI_dynstr_get_cstring(ds);
00203                 BLI_dynstr_free(ds);
00204                 
00205                 return newStr;
00206         }
00207         else {
00208                 /* just create a new copy of the entire string - we avoid going through the assembly buffer 
00209                  * for what should be a bit more efficiency...
00210                  */
00211                 return BLI_strdup(str);
00212         }
00213 } 
00214 
00215 int BLI_strcaseeq(const char *a, const char *b) 
00216 {
00217         return (BLI_strcasecmp(a, b)==0);
00218 }
00219 
00220 /* strcasestr not available in MSVC */
00221 char *BLI_strcasestr(const char *s, const char *find)
00222 {
00223         register char c, sc;
00224         register size_t len;
00225         
00226         if ((c = *find++) != 0) {
00227                 c= tolower(c);
00228                 len = strlen(find);
00229                 do {
00230                         do {
00231                                 if ((sc = *s++) == 0)
00232                                         return (NULL);
00233                                 sc= tolower(sc);
00234                         } while (sc != c);
00235                 } while (BLI_strncasecmp(s, find, len) != 0);
00236                 s--;
00237         }
00238         return ((char *) s);
00239 }
00240 
00241 
00242 int BLI_strcasecmp(const char *s1, const char *s2) {
00243         int i;
00244 
00245         for (i=0; ; i++) {
00246                 char c1 = tolower(s1[i]);
00247                 char c2 = tolower(s2[i]);
00248 
00249                 if (c1<c2) {
00250                         return -1;
00251                 } else if (c1>c2) {
00252                         return 1;
00253                 } else if (c1==0) {
00254                         break;
00255                 }
00256         }
00257 
00258         return 0;
00259 }
00260 
00261 int BLI_strncasecmp(const char *s1, const char *s2, size_t len) {
00262         int i;
00263 
00264         for (i=0; i<len; i++) {
00265                 char c1 = tolower(s1[i]);
00266                 char c2 = tolower(s2[i]);
00267 
00268                 if (c1<c2) {
00269                         return -1;
00270                 } else if (c1>c2) {
00271                         return 1;
00272                 } else if (c1==0) {
00273                         break;
00274                 }
00275         }
00276 
00277         return 0;
00278 }
00279 
00280 /* natural string compare, keeping numbers in order */
00281 int BLI_natstrcmp(const char *s1, const char *s2)
00282 {
00283         int d1= 0, d2= 0;
00284         
00285         /* if both chars are numeric, to a strtol().
00286            then increase string deltas as long they are 
00287            numeric, else do a tolower and char compare */
00288         
00289         while(1) {
00290                 char c1 = tolower(s1[d1]);
00291                 char c2 = tolower(s2[d2]);
00292                 
00293                 if( isdigit(c1) && isdigit(c2) ) {
00294                         int val1, val2;
00295                         
00296                         val1= (int)strtol(s1+d1, (char **)NULL, 10);
00297                         val2= (int)strtol(s2+d2, (char **)NULL, 10);
00298                         
00299                         if (val1<val2) {
00300                                 return -1;
00301                         } else if (val1>val2) {
00302                                 return 1;
00303                         }
00304                         d1++;
00305                         while( isdigit(s1[d1]) )
00306                                 d1++;
00307                         d2++;
00308                         while( isdigit(s2[d2]) )
00309                                 d2++;
00310                         
00311                         c1 = tolower(s1[d1]);
00312                         c2 = tolower(s2[d2]);
00313                 }
00314         
00315                 /* first check for '.' so "foo.bar" comes before "foo 1.bar" */ 
00316                 if(c1=='.' && c2!='.')
00317                         return -1;
00318                 if(c1!='.' && c2=='.')
00319                         return 1;
00320                 else if (c1<c2) {
00321                         return -1;
00322                 } else if (c1>c2) {
00323                         return 1;
00324                 } else if (c1==0) {
00325                         break;
00326                 }
00327                 d1++;
00328                 d2++;
00329         }
00330         return 0;
00331 }
00332 
00333 void BLI_timestr(double _time, char *str)
00334 {
00335         /* format 00:00:00.00 (hr:min:sec) string has to be 12 long */
00336         int  hr= ( (int)  _time) / (60*60);
00337         int min= (((int)  _time) / 60 ) % 60;
00338         int sec= ( (int) (_time)) % 60;
00339         int hun= ( (int) (_time   * 100.0)) % 100;
00340         
00341         if (hr) {
00342                 sprintf(str, "%.2d:%.2d:%.2d.%.2d",hr,min,sec,hun);
00343         } else {
00344                 sprintf(str, "%.2d:%.2d.%.2d",min,sec,hun);
00345         }
00346         
00347         str[11]=0;
00348 }
00349 
00350 /* determine the length of a fixed-size string */
00351 size_t BLI_strnlen(const char *str, size_t maxlen)
00352 {
00353         const char *end = memchr(str, '\0', maxlen);
00354         return end ? (size_t) (end - str) : maxlen;
00355 }
00356 
00357 /* from libswish3, originally called u8_isvalid(),
00358  * modified to return the index of the bad character (byte index not utf).
00359  * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
00360 
00361 /* based on the valid_utf8 routine from the PCRE library by Philip Hazel
00362 
00363    length is in bytes, since without knowing whether the string is valid
00364    it's hard to know how many characters there are! */
00365 
00366 static const char trailingBytesForUTF8[256] = {
00367         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00368         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00369         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00370         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00371         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00372         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
00373         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00374         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
00375 };
00376 
00377 int BLI_utf8_invalid_byte(const char *str, int length)
00378 {
00379         const unsigned char *p, *pend = (unsigned char*)str + length;
00380         unsigned char c;
00381         int ab;
00382 
00383         for (p = (unsigned char*)str; p < pend; p++) {
00384                 c = *p;
00385                 if (c < 128)
00386                         continue;
00387                 if ((c & 0xc0) != 0xc0)
00388                         goto utf8_error;
00389                 ab = trailingBytesForUTF8[c];
00390                 if (length < ab)
00391                         goto utf8_error;
00392                 length -= ab;
00393 
00394                 p++;
00395                 /* Check top bits in the second byte */
00396                 if ((*p & 0xc0) != 0x80)
00397                         goto utf8_error;
00398 
00399                 /* Check for overlong sequences for each different length */
00400                 switch (ab) {
00401                         /* Check for xx00 000x */
00402                 case 1:
00403                         if ((c & 0x3e) == 0) goto utf8_error;
00404                         continue;   /* We know there aren't any more bytes to check */
00405 
00406                         /* Check for 1110 0000, xx0x xxxx */
00407                 case 2:
00408                         if (c == 0xe0 && (*p & 0x20) == 0) goto utf8_error;
00409                         break;
00410 
00411                         /* Check for 1111 0000, xx00 xxxx */
00412                 case 3:
00413                         if (c == 0xf0 && (*p & 0x30) == 0) goto utf8_error;
00414                         break;
00415 
00416                         /* Check for 1111 1000, xx00 0xxx */
00417                 case 4:
00418                         if (c == 0xf8 && (*p & 0x38) == 0) goto utf8_error;
00419                         break;
00420 
00421                         /* Check for leading 0xfe or 0xff,
00422                            and then for 1111 1100, xx00 00xx */
00423                 case 5:
00424                         if (c == 0xfe || c == 0xff ||
00425                                 (c == 0xfc && (*p & 0x3c) == 0)) goto utf8_error;
00426                         break;
00427                 }
00428 
00429                 /* Check for valid bytes after the 2nd, if any; all must start 10 */
00430                 while (--ab > 0) {
00431                         if ((*(p+1) & 0xc0) != 0x80) goto utf8_error;
00432                         p++; /* do this after so we get usable offset - campbell */
00433                 }
00434         }
00435 
00436         return -1;
00437 
00438 utf8_error:
00439 
00440         return (int)((char *)p - (char *)str) - 1;
00441 }
00442 
00443 int BLI_utf8_invalid_strip(char *str, int length)
00444 {
00445         int bad_char, tot= 0;
00446 
00447         while((bad_char= BLI_utf8_invalid_byte(str, length)) != -1) {
00448                 str += bad_char;
00449                 length -= bad_char;
00450 
00451                 if(length == 0) {
00452                         /* last character bad, strip it */
00453                         *str= '\0';
00454                         tot++;
00455                         break;
00456                 }
00457                 else {
00458                         /* strip, keep looking */
00459                         memmove(str, str + 1, length);
00460                         tot++;
00461                 }
00462         }
00463 
00464         return tot;
00465 }
00466 
00467 void BLI_ascii_strtolower(char *str, int len)
00468 {
00469         int i;
00470 
00471         for(i=0; i<len; i++)
00472                 if(str[i] >= 'A' && str[i] <= 'Z')
00473                         str[i] += 'a' - 'A';
00474 }
00475 
00476 void BLI_ascii_strtoupper(char *str, int len)
00477 {
00478         int i;
00479 
00480         for(i=0; i<len; i++)
00481                 if(str[i] >= 'a' && str[i] <= 'z')
00482                         str[i] -= 'a' - 'A';
00483 }
00484