filters

UnicodeMap.cc

00001 //========================================================================
00002 //
00003 // UnicodeMap.cc
00004 //
00005 // Copyright 2001-2002 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #include <aconf.h>
00010 
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014 
00015 #include <stdio.h>
00016 #include <string.h>
00017 #include "gmem.h"
00018 #include "gfile.h"
00019 #include "GString.h"
00020 #include "GList.h"
00021 #include "Error.h"
00022 #include "GlobalParams.h"
00023 #include "UnicodeMap.h"
00024 
00025 //------------------------------------------------------------------------
00026 
00027 #define maxExtCode 16
00028 
00029 struct UnicodeMapExt {
00030   Unicode u;            // Unicode char
00031   char code[maxExtCode];
00032   Guint nBytes;
00033 };
00034 
00035 //------------------------------------------------------------------------
00036 
00037 UnicodeMap *UnicodeMap::parse(GString *encodingNameA) {
00038   FILE *f;
00039   UnicodeMap *map;
00040   UnicodeMapRange *range;
00041   UnicodeMapExt *eMap;
00042   int size, eMapsSize;
00043   char buf[256];
00044   unsigned int x;
00045   int line, nBytes, i;
00046   char *tok1, *tok2, *tok3;
00047 
00048   if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
00049     error(-1, "Couldn't find unicodeMap file for the '%s' encoding",
00050       encodingNameA->getCString());
00051     return NULL;
00052   }
00053 
00054   map = new UnicodeMap(encodingNameA->copy());
00055 
00056   size = 8;
00057   map->ranges = (UnicodeMapRange *)gmalloc(size * sizeof(UnicodeMapRange));
00058   eMapsSize = 0;
00059 
00060   line = 1;
00061   while (getLine(buf, sizeof(buf), f)) {
00062     if ((tok1 = strtok(buf, " \t\r\n")) &&
00063     (tok2 = strtok(NULL, " \t\r\n"))) {
00064       if (!(tok3 = strtok(NULL, " \t\r\n"))) {
00065     tok3 = tok2;
00066     tok2 = tok1;
00067       }
00068       nBytes = strlen(tok3) / 2;
00069       if (nBytes <= 4) {
00070     if (map->len == size) {
00071       size *= 2;
00072       map->ranges = (UnicodeMapRange *)
00073         grealloc(map->ranges, size * sizeof(UnicodeMapRange));
00074     }
00075     range = &map->ranges[map->len];
00076     sscanf(tok1, "%x", &range->start);
00077     sscanf(tok2, "%x", &range->end);
00078     sscanf(tok3, "%x", &range->code);
00079     range->nBytes = nBytes;
00080     ++map->len;
00081       } else if (tok2 == tok1) {
00082     if (map->eMapsLen == eMapsSize) {
00083       eMapsSize += 16;
00084       map->eMaps = (UnicodeMapExt *)
00085         grealloc(map->eMaps, eMapsSize * sizeof(UnicodeMapExt));
00086     }
00087     eMap = &map->eMaps[map->eMapsLen];
00088     sscanf(tok1, "%x", &eMap->u);
00089     for (i = 0; i < nBytes; ++i) {
00090       sscanf(tok3 + i*2, "%2x", &x);
00091       eMap->code[i] = (char)x;
00092     }
00093     eMap->nBytes = nBytes;
00094     ++map->eMapsLen;
00095       } else {
00096     error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00097           line, encodingNameA->getCString());
00098       }
00099     } else {
00100       error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00101         line, encodingNameA->getCString());
00102     }
00103     ++line;
00104   }
00105 
00106   fclose(f);
00107 
00108   return map;
00109 }
00110 
00111 UnicodeMap::UnicodeMap(GString *encodingNameA) {
00112   encodingName = encodingNameA;
00113   unicodeOut = gFalse;
00114   kind = unicodeMapUser;
00115   ranges = NULL;
00116   len = 0;
00117   eMaps = NULL;
00118   eMapsLen = 0;
00119   refCnt = 1;
00120 }
00121 
00122 UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA,
00123                UnicodeMapRange *rangesA, int lenA) {
00124   encodingName = new GString(encodingNameA);
00125   unicodeOut = unicodeOutA;
00126   kind = unicodeMapResident;
00127   ranges = rangesA;
00128   len = lenA;
00129   eMaps = NULL;
00130   eMapsLen = 0;
00131   refCnt = 1;
00132 }
00133 
00134 UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA,
00135                UnicodeMapFunc funcA) {
00136   encodingName = new GString(encodingNameA);
00137   unicodeOut = unicodeOutA;
00138   kind = unicodeMapFunc;
00139   func = funcA;
00140   eMaps = NULL;
00141   eMapsLen = 0;
00142   refCnt = 1;
00143 }
00144 
00145 UnicodeMap::~UnicodeMap() {
00146   delete encodingName;
00147   if (kind == unicodeMapUser && ranges) {
00148     gfree(ranges);
00149   }
00150   if (eMaps) {
00151     gfree(eMaps);
00152   }
00153 }
00154 
00155 void UnicodeMap::incRefCnt() {
00156   ++refCnt;
00157 }
00158 
00159 void UnicodeMap::decRefCnt() {
00160   if (--refCnt == 0) {
00161     delete this;
00162   }
00163 }
00164 
00165 GBool UnicodeMap::match(GString *encodingNameA) {
00166   return !encodingName->cmp(encodingNameA);
00167 }
00168 
00169 int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) {
00170   int a, b, m, n, i, j;
00171   Guint code;
00172 
00173   if (kind == unicodeMapFunc) {
00174     return (*func)(u, buf, bufSize);
00175   }
00176 
00177   a = 0;
00178   b = len;
00179   if (u < ranges[a].start) {
00180     return 0;
00181   }
00182   // invariant: ranges[a].start <= u < ranges[b].start
00183   while (b - a > 1) {
00184     m = (a + b) / 2;
00185     if (u >= ranges[m].start) {
00186       a = m;
00187     } else if (u < ranges[m].start) {
00188       b = m;
00189     }
00190   }
00191   if (u <= ranges[a].end) {
00192     n = ranges[a].nBytes;
00193     if (n > bufSize) {
00194       return 0;
00195     }
00196     code = ranges[a].code + (u - ranges[a].start);
00197     for (i = n - 1; i >= 0; --i) {
00198       buf[i] = (char)(code & 0xff);
00199       code >>= 8;
00200     }
00201     return n;
00202   }
00203 
00204   for (i = 0; i < eMapsLen; ++i) {
00205     if (eMaps[i].u == u) {
00206       n = eMaps[i].nBytes;
00207       for (j = 0; j < n; ++j) {
00208     buf[j] = eMaps[i].code[j];
00209       }
00210       return n;
00211     }
00212   }
00213 
00214   return 0;
00215 }
00216 
00217 //------------------------------------------------------------------------
00218 
00219 UnicodeMapCache::UnicodeMapCache() {
00220   int i;
00221 
00222   for (i = 0; i < unicodeMapCacheSize; ++i) {
00223     cache[i] = NULL;
00224   }
00225 }
00226 
00227 UnicodeMapCache::~UnicodeMapCache() {
00228   int i;
00229 
00230   for (i = 0; i < unicodeMapCacheSize; ++i) {
00231     if (cache[i]) {
00232       cache[i]->decRefCnt();
00233     }
00234   }
00235 }
00236 
00237 UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) {
00238   UnicodeMap *map;
00239   int i, j;
00240 
00241   if (cache[0] && cache[0]->match(encodingName)) {
00242     cache[0]->incRefCnt();
00243     return cache[0];
00244   }
00245   for (i = 1; i < unicodeMapCacheSize; ++i) {
00246     if (cache[i] && cache[i]->match(encodingName)) {
00247       map = cache[i];
00248       for (j = i; j >= 1; --j) {
00249     cache[j] = cache[j - 1];
00250       }
00251       cache[0] = map;
00252       map->incRefCnt();
00253       return map;
00254     }
00255   }
00256   if ((map = UnicodeMap::parse(encodingName))) {
00257     if (cache[unicodeMapCacheSize - 1]) {
00258       cache[unicodeMapCacheSize - 1]->decRefCnt();
00259     }
00260     for (j = unicodeMapCacheSize - 1; j >= 1; --j) {
00261       cache[j] = cache[j - 1];
00262     }
00263     cache[0] = map;
00264     map->incRefCnt();
00265     return map;
00266   }
00267   return NULL;
00268 }
KDE Home | KDE Accessibility Home | Description of Access Keys