00001
00002
00003
00004
00005
00006
00007
00008
00009 #include <aconf.h>
00010
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014
00015 #include <stdio.h>
00016 #include <string.h>
00017 #include "gmem.h"
00018 #include "gfile.h"
00019 #include "GString.h"
00020 #include "GList.h"
00021 #include "Error.h"
00022 #include "GlobalParams.h"
00023 #include "UnicodeMap.h"
00024
00025
00026
00027 #define maxExtCode 16
00028
00029 struct UnicodeMapExt {
00030 Unicode u;
00031 char code[maxExtCode];
00032 Guint nBytes;
00033 };
00034
00035
00036
00037 UnicodeMap *UnicodeMap::parse(GString *encodingNameA) {
00038 FILE *f;
00039 UnicodeMap *map;
00040 UnicodeMapRange *range;
00041 UnicodeMapExt *eMap;
00042 int size, eMapsSize;
00043 char buf[256];
00044 unsigned int x;
00045 int line, nBytes, i;
00046 char *tok1, *tok2, *tok3;
00047
00048 if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
00049 error(-1, "Couldn't find unicodeMap file for the '%s' encoding",
00050 encodingNameA->getCString());
00051 return NULL;
00052 }
00053
00054 map = new UnicodeMap(encodingNameA->copy());
00055
00056 size = 8;
00057 map->ranges = (UnicodeMapRange *)gmalloc(size * sizeof(UnicodeMapRange));
00058 eMapsSize = 0;
00059
00060 line = 1;
00061 while (getLine(buf, sizeof(buf), f)) {
00062 if ((tok1 = strtok(buf, " \t\r\n")) &&
00063 (tok2 = strtok(NULL, " \t\r\n"))) {
00064 if (!(tok3 = strtok(NULL, " \t\r\n"))) {
00065 tok3 = tok2;
00066 tok2 = tok1;
00067 }
00068 nBytes = strlen(tok3) / 2;
00069 if (nBytes <= 4) {
00070 if (map->len == size) {
00071 size *= 2;
00072 map->ranges = (UnicodeMapRange *)
00073 grealloc(map->ranges, size * sizeof(UnicodeMapRange));
00074 }
00075 range = &map->ranges[map->len];
00076 sscanf(tok1, "%x", &range->start);
00077 sscanf(tok2, "%x", &range->end);
00078 sscanf(tok3, "%x", &range->code);
00079 range->nBytes = nBytes;
00080 ++map->len;
00081 } else if (tok2 == tok1) {
00082 if (map->eMapsLen == eMapsSize) {
00083 eMapsSize += 16;
00084 map->eMaps = (UnicodeMapExt *)
00085 grealloc(map->eMaps, eMapsSize * sizeof(UnicodeMapExt));
00086 }
00087 eMap = &map->eMaps[map->eMapsLen];
00088 sscanf(tok1, "%x", &eMap->u);
00089 for (i = 0; i < nBytes; ++i) {
00090 sscanf(tok3 + i*2, "%2x", &x);
00091 eMap->code[i] = (char)x;
00092 }
00093 eMap->nBytes = nBytes;
00094 ++map->eMapsLen;
00095 } else {
00096 error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00097 line, encodingNameA->getCString());
00098 }
00099 } else {
00100 error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00101 line, encodingNameA->getCString());
00102 }
00103 ++line;
00104 }
00105
00106 fclose(f);
00107
00108 return map;
00109 }
00110
00111 UnicodeMap::UnicodeMap(GString *encodingNameA) {
00112 encodingName = encodingNameA;
00113 unicodeOut = gFalse;
00114 kind = unicodeMapUser;
00115 ranges = NULL;
00116 len = 0;
00117 eMaps = NULL;
00118 eMapsLen = 0;
00119 refCnt = 1;
00120 }
00121
00122 UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA,
00123 UnicodeMapRange *rangesA, int lenA) {
00124 encodingName = new GString(encodingNameA);
00125 unicodeOut = unicodeOutA;
00126 kind = unicodeMapResident;
00127 ranges = rangesA;
00128 len = lenA;
00129 eMaps = NULL;
00130 eMapsLen = 0;
00131 refCnt = 1;
00132 }
00133
00134 UnicodeMap::UnicodeMap(const char *encodingNameA, GBool unicodeOutA,
00135 UnicodeMapFunc funcA) {
00136 encodingName = new GString(encodingNameA);
00137 unicodeOut = unicodeOutA;
00138 kind = unicodeMapFunc;
00139 func = funcA;
00140 eMaps = NULL;
00141 eMapsLen = 0;
00142 refCnt = 1;
00143 }
00144
00145 UnicodeMap::~UnicodeMap() {
00146 delete encodingName;
00147 if (kind == unicodeMapUser && ranges) {
00148 gfree(ranges);
00149 }
00150 if (eMaps) {
00151 gfree(eMaps);
00152 }
00153 }
00154
00155 void UnicodeMap::incRefCnt() {
00156 ++refCnt;
00157 }
00158
00159 void UnicodeMap::decRefCnt() {
00160 if (--refCnt == 0) {
00161 delete this;
00162 }
00163 }
00164
00165 GBool UnicodeMap::match(GString *encodingNameA) {
00166 return !encodingName->cmp(encodingNameA);
00167 }
00168
00169 int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) {
00170 int a, b, m, n, i, j;
00171 Guint code;
00172
00173 if (kind == unicodeMapFunc) {
00174 return (*func)(u, buf, bufSize);
00175 }
00176
00177 a = 0;
00178 b = len;
00179 if (u < ranges[a].start) {
00180 return 0;
00181 }
00182
00183 while (b - a > 1) {
00184 m = (a + b) / 2;
00185 if (u >= ranges[m].start) {
00186 a = m;
00187 } else if (u < ranges[m].start) {
00188 b = m;
00189 }
00190 }
00191 if (u <= ranges[a].end) {
00192 n = ranges[a].nBytes;
00193 if (n > bufSize) {
00194 return 0;
00195 }
00196 code = ranges[a].code + (u - ranges[a].start);
00197 for (i = n - 1; i >= 0; --i) {
00198 buf[i] = (char)(code & 0xff);
00199 code >>= 8;
00200 }
00201 return n;
00202 }
00203
00204 for (i = 0; i < eMapsLen; ++i) {
00205 if (eMaps[i].u == u) {
00206 n = eMaps[i].nBytes;
00207 for (j = 0; j < n; ++j) {
00208 buf[j] = eMaps[i].code[j];
00209 }
00210 return n;
00211 }
00212 }
00213
00214 return 0;
00215 }
00216
00217
00218
00219 UnicodeMapCache::UnicodeMapCache() {
00220 int i;
00221
00222 for (i = 0; i < unicodeMapCacheSize; ++i) {
00223 cache[i] = NULL;
00224 }
00225 }
00226
00227 UnicodeMapCache::~UnicodeMapCache() {
00228 int i;
00229
00230 for (i = 0; i < unicodeMapCacheSize; ++i) {
00231 if (cache[i]) {
00232 cache[i]->decRefCnt();
00233 }
00234 }
00235 }
00236
00237 UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) {
00238 UnicodeMap *map;
00239 int i, j;
00240
00241 if (cache[0] && cache[0]->match(encodingName)) {
00242 cache[0]->incRefCnt();
00243 return cache[0];
00244 }
00245 for (i = 1; i < unicodeMapCacheSize; ++i) {
00246 if (cache[i] && cache[i]->match(encodingName)) {
00247 map = cache[i];
00248 for (j = i; j >= 1; --j) {
00249 cache[j] = cache[j - 1];
00250 }
00251 cache[0] = map;
00252 map->incRefCnt();
00253 return map;
00254 }
00255 }
00256 if ((map = UnicodeMap::parse(encodingName))) {
00257 if (cache[unicodeMapCacheSize - 1]) {
00258 cache[unicodeMapCacheSize - 1]->decRefCnt();
00259 }
00260 for (j = unicodeMapCacheSize - 1; j >= 1; --j) {
00261 cache[j] = cache[j - 1];
00262 }
00263 cache[0] = map;
00264 map->incRefCnt();
00265 return map;
00266 }
00267 return NULL;
00268 }