00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "CSConfig.h"
00029 #include <inttypes.h>
00030
00031
00032 #include <string.h>
00033 #include <stdlib.h>
00034 #include <ctype.h>
00035 #include <stdio.h>
00036 #include <errno.h>
00037
00038 #ifdef DRIZZLED
00039 #include <boost/algorithm/string.hpp>
00040 #define STRCASESTR(s1, s2) boost::ifind_first(s1, s2)
00041 #else
00042 #define STRCASESTR(s1, s2) strcasestr(s1, s2)
00043 #endif
00044
00045 #include "CSXML.h"
00046
00047 #define ISSPACE(ch) (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
00048 #define ISSINGLE(ch) (ch == '*' || ch == '+' || ch == '(' || ch == ')' || ch == ',' || ch == '|' || ch == '[' || ch == ']' || ch == '?' || ch == '/')
00049
00050 #define SET_CHAR(x, ch) { x->buffer[0] = ch; x->count = 1; }
00051 #define ADD_CHAR(x, ch) { if (x->count < PARSE_BUFFER_SIZE) { x->buffer[x->count] = ch; x->count++; } else x->buffer[PARSE_BUFFER_SIZE-1] = ch; }
00052
00053 bool CSXMLParser::match_string(const char *ch)
00054 {
00055 int32_t i;
00056
00057 for (i=0; i<this->count; i++) {
00058 if (this->buffer[i] != *ch)
00059 return false;
00060 ch++;
00061 }
00062 if (*ch)
00063 return false;
00064 return(i == this->count);
00065 }
00066
00067 void CSXMLParser::increment_nesting(wchar_t ch)
00068 {
00069 if (this->nesting < PARSE_STACK_SIZE) {
00070 switch (ch) {
00071 case '/':
00072 this->end_type[this->nesting] = XML_OP_1_END_CLOSE_TAG;
00073 break;
00074 case '?':
00075 this->end_type[this->nesting] = XML_OP_1_END_PI_TAG;
00076 break;
00077 case '!':
00078 this->end_type[this->nesting] = XML_OP_1_END_ENTITY_TAG;
00079 break;
00080 case '[':
00081 this->end_type[this->nesting] = XML_OP_1_END_BRACKET_TAG;
00082 break;
00083 default:
00084 if (ISSPACE(ch))
00085 this->end_type[this->nesting] = XML_OP_1_END_UNKNOWN_TAG;
00086 else
00087 this->end_type[this->nesting] = XML_OP_1_END_TAG;
00088 break;
00089 }
00090 }
00091 this->nesting++;
00092 }
00093
00094 int32_t CSXMLParser::parseChar(wchar_t ch)
00095
00096
00097
00098
00099
00100
00101
00102 {
00103 switch (this->state) {
00104 case XML_BEFORE_CDATA:
00105 this->nesting = 0;
00106
00107 if (ch == '<') {
00108 this->state = XML_LT;
00109 this->type = XML_noop;
00110 }
00111 else {
00112 this->state = XML_IN_CDATA;
00113 this->type = XML_CDATA_CH;
00114 }
00115 SET_CHAR(this, ch);
00116 break;
00117 case XML_IN_CDATA:
00118 if (ch == '<') {
00119 this->state = XML_LT;
00120 this->type = XML_noop;
00121 }
00122 else
00123 this->type = XML_CDATA_CH;
00124 SET_CHAR(this, ch);
00125 break;
00126 case XML_LT:
00127 if (ISSPACE(ch)) {
00128 if (this->nesting) {
00129 this->state = XML_BEFORE_ATTR;
00130 if (this->step == XML_STEP_TAG)
00131 this->type = XML_start_tag_TAG_CH;
00132 else if (this->step == XML_STEP_NESTED)
00133 this->type = XML_TAG_CH;
00134 else if (this->step == XML_STEP_NONE)
00135 this->type = XML_end_cdata_TAG_CH;
00136 else
00137 this->type = XML_add_attr_TAG_CH;
00138 this->step = XML_STEP_TAG;
00139 increment_nesting(ch);
00140 this->count = 0;
00141 }
00142 else {
00143 this->state = XML_IN_CDATA;
00144 this->type = XML_CDATA_CH;
00145 ADD_CHAR(this, ch);
00146 }
00147 }
00148 else if (ch == '!') {
00149 this->state = XML_LT_BANG;
00150 this->type = XML_noop;
00151 ADD_CHAR(this, ch);
00152 }
00153 else {
00154 this->state = XML_IN_TAG_NAME;
00155 if (this->step == XML_STEP_TAG)
00156 this->type = XML_start_tag_TAG_CH;
00157 else if (this->step == XML_STEP_NESTED)
00158 this->type = XML_TAG_CH;
00159 else if (this->step == XML_STEP_NONE)
00160 this->type = XML_end_cdata_TAG_CH;
00161 else
00162 this->type = XML_add_attr_TAG_CH;
00163 this->step = XML_STEP_TAG;
00164 increment_nesting(ch);
00165 SET_CHAR(this, ch);
00166 }
00167 break;
00168 case XML_LT_BANG:
00169 if (ch == '-') {
00170 this->state = XML_LT_BANG_DASH;
00171 this->type = XML_noop;
00172 }
00173 else if (ch == '[') {
00174 this->state = XML_LT_BANG_SQR;
00175 this->type = XML_noop;
00176 }
00177 else {
00178 this->state = XML_IN_TAG_NAME;
00179 if (this->step == XML_STEP_TAG)
00180 this->type = XML_start_tag_TAG_CH;
00181 else if (this->step == XML_STEP_NESTED)
00182 this->type = XML_TAG_CH;
00183 else if (this->step == XML_STEP_NONE)
00184 this->type = XML_end_cdata_TAG_CH;
00185 else
00186 this->type = XML_add_attr_TAG_CH;
00187 this->step = XML_STEP_TAG;
00188 increment_nesting('!');
00189 SET_CHAR(this, '!');
00190 }
00191 ADD_CHAR(this, ch);
00192 break;
00193 case XML_LT_BANG_DASH:
00194 if (ch == '-') {
00195 this->state = XML_IN_COMMENT;
00196 if (this->step == XML_STEP_TAG)
00197 this->type = XML_start_tag_start_comment;
00198 else if (this->step == XML_STEP_NESTED)
00199 this->type = XML_start_comment;
00200 else if (this->step == XML_STEP_NONE)
00201 this->type = XML_end_cdata_start_comment;
00202 else
00203 this->type = XML_add_attr_start_comment;
00204 increment_nesting(' ');
00205 }
00206 else {
00207 this->state = XML_IN_CDATA;
00208 this->type = XML_CDATA_CH;
00209 ADD_CHAR(this, ch);
00210 }
00211 break;
00212 case XML_LT_BANG_SQR:
00213 if (ISSPACE(ch))
00214 this->type = XML_noop;
00215 else if (ch == '[') {
00216 this->state = XML_BEFORE_ATTR;
00217 if (this->step == XML_STEP_TAG)
00218 this->type = XML_start_tag_TAG_CH;
00219 else if (this->step == XML_STEP_NESTED)
00220 this->type = XML_TAG_CH;
00221 else if (this->step == XML_STEP_NONE)
00222 this->type = XML_end_cdata_TAG_CH;
00223 else
00224 this->type = XML_add_attr_TAG_CH;
00225 this->step = XML_STEP_TAG;
00226 increment_nesting('[');
00227 SET_CHAR(this, '!');
00228 ADD_CHAR(this, '[');
00229 }
00230 else {
00231 this->state = XML_LT_BANG_SQR_IN_NAME;
00232 this->type = XML_noop;
00233 SET_CHAR(this, '!');
00234 ADD_CHAR(this, '[');
00235 ADD_CHAR(this, ch);
00236 }
00237 break;
00238 case XML_LT_BANG_SQR_IN_NAME:
00239 if (ISSPACE(ch)) {
00240 this->state = XML_LT_BANG_SQR_AFTER_NAME;
00241 this->type = XML_noop;
00242 }
00243 else if (ch == '[') {
00244 if (match_string("![CDATA")) {
00245 this->state = XML_IN_CDATA_TAG;
00246 if (this->step == XML_STEP_TAG)
00247 this->type = XML_start_tag_start_cdata_tag;
00248 else if (this->step == XML_STEP_NESTED)
00249 this->type = XML_start_cdata_tag;
00250 else if (this->step == XML_STEP_NONE)
00251 this->type = XML_end_cdata_start_cdata_tag;
00252 else
00253 this->type = XML_add_attr_start_cdata_tag;
00254 this->step = XML_STEP_TAG;
00255 increment_nesting('[');
00256 }
00257 else {
00258 this->state = XML_BEFORE_ATTR;
00259 if (this->step == XML_STEP_TAG)
00260 this->type = XML_start_tag_TAG_CH;
00261 else if (this->step == XML_STEP_NESTED)
00262 this->type = XML_TAG_CH;
00263 else if (this->step == XML_STEP_NONE)
00264 this->type = XML_end_cdata_TAG_CH;
00265 else
00266 this->type = XML_add_attr_TAG_CH;
00267 this->step = XML_STEP_TAG;
00268 increment_nesting('[');
00269 }
00270 }
00271 else {
00272 this->type = XML_noop;
00273 ADD_CHAR(this, ch);
00274 }
00275 break;
00276 case XML_LT_BANG_SQR_AFTER_NAME:
00277 if (ch == '[') {
00278 if (match_string("![CDATA")) {
00279 this->state = XML_IN_CDATA_TAG;
00280 if (this->step == XML_STEP_TAG)
00281 this->type = XML_start_tag_start_cdata_tag;
00282 else if (this->step == XML_STEP_NESTED)
00283 this->type = XML_start_cdata_tag;
00284 else if (this->step == XML_STEP_NONE)
00285 this->type = XML_end_cdata_start_cdata_tag;
00286 else
00287 this->type = XML_add_attr_start_cdata_tag;
00288 increment_nesting('[');
00289 }
00290 else {
00291 this->state = XML_BEFORE_ATTR;
00292 if (this->step == XML_STEP_TAG)
00293 this->type = XML_start_tag_TAG_CH;
00294 else if (this->step == XML_STEP_NESTED)
00295 this->type = XML_TAG_CH;
00296 else if (this->step == XML_STEP_NONE)
00297 this->type = XML_end_cdata_TAG_CH;
00298 else
00299 this->type = XML_add_attr_TAG_CH;
00300 this->step = XML_STEP_TAG;
00301 increment_nesting('[');
00302 }
00303 }
00304 else
00305
00306 this->type = XML_noop;
00307 break;
00308 case XML_IN_TAG_NAME:
00309 if (ISSPACE(ch)) {
00310 this->state = XML_BEFORE_ATTR;
00311 this->type = XML_noop;
00312 }
00313 else if (ch == '<') {
00314 this->state = XML_LT;
00315 this->type = XML_noop;
00316 }
00317 else if (ch == '>') {
00318 if (this->step == XML_STEP_TAG)
00319 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00320 else if (this->step == XML_STEP_NESTED)
00321 this->type = XML_end_tag(END_TAG_TYPE(this));
00322 else
00323 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00324 this->nesting--;
00325 if (this->nesting) {
00326 this->step = XML_STEP_NESTED;
00327 this->state = XML_BEFORE_ATTR;
00328 }
00329 else {
00330 this->step = XML_STEP_NONE;
00331 this->state = XML_IN_CDATA;
00332 }
00333 }
00334 else if (ch == '"' || ch == '\'') {
00335 this->state = XML_QUOTE_BEFORE_VALUE;
00336 this->quote = ch;
00337 this->type = XML_noop;
00338 }
00339 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00340 this->state = XML_SLASH;
00341 this->type = XML_noop;
00342 }
00343 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00344 this->state = XML_QMARK;
00345 this->type = XML_noop;
00346 }
00347 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00348 this->state = XML_SQR;
00349 this->type = XML_noop;
00350 }
00351 else if (ISSINGLE(ch)) {
00352 this->state = XML_BEFORE_ATTR;
00353 if (this->step == XML_STEP_TAG)
00354 this->type = XML_start_tag_ATTR_CH;
00355 else if (this->step == XML_STEP_NESTED)
00356 this->type = XML_ATTR_CH;
00357 else
00358 this->type = XML_add_attr_ATTR_CH;
00359 this->step = XML_STEP_ATTR;
00360 SET_CHAR(this, ch);
00361 }
00362 else {
00363 this->type = XML_TAG_CH;
00364 SET_CHAR(this, ch);
00365 }
00366 break;
00367 case XML_BEFORE_ATTR:
00368 if (ISSPACE(ch))
00369 this->type = XML_noop;
00370 else if (ch == '<') {
00371 this->state = XML_LT;
00372 this->type = XML_noop;
00373 }
00374 else if (ch == '>') {
00375 if (this->step == XML_STEP_TAG)
00376 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00377 else if (this->step == XML_STEP_NESTED)
00378 this->type = XML_end_tag(END_TAG_TYPE(this));
00379 else
00380 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00381 this->nesting--;
00382 if (this->nesting) {
00383 this->step = XML_STEP_NESTED;
00384 this->state = XML_BEFORE_ATTR;
00385 }
00386 else {
00387 this->step = XML_STEP_NONE;
00388 this->state = XML_IN_CDATA;
00389 }
00390 }
00391 else if (ch == '"' || ch == '\'') {
00392 this->state = XML_QUOTE_BEFORE_VALUE;
00393 this->quote = ch;
00394 this->type = XML_noop;
00395 }
00396 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00397 this->state = XML_SLASH;
00398 this->type = XML_noop;
00399 }
00400 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00401 this->state = XML_QMARK;
00402 this->type = XML_noop;
00403 }
00404 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00405 this->state = XML_SQR;
00406 this->type = XML_noop;
00407 }
00408 else if (ISSINGLE(ch)) {
00409 if (this->step == XML_STEP_TAG)
00410 this->type = XML_start_tag_ATTR_CH;
00411 else if (this->step == XML_STEP_NESTED)
00412 this->type = XML_ATTR_CH;
00413 else
00414 this->type = XML_add_attr_ATTR_CH;
00415 this->step = XML_STEP_ATTR;
00416 SET_CHAR(this, ch);
00417 }
00418 else {
00419 this->state = XML_IN_ATTR;
00420 if (this->step == XML_STEP_TAG)
00421 this->type = XML_start_tag_ATTR_CH;
00422 else if (this->step == XML_STEP_NESTED)
00423 this->type = XML_ATTR_CH;
00424 else
00425 this->type = XML_add_attr_ATTR_CH;
00426 this->step = XML_STEP_ATTR;
00427 SET_CHAR(this, ch);
00428 }
00429 break;
00430 case XML_IN_ATTR:
00431 if (ISSPACE(ch)) {
00432 this->state = XML_BEFORE_EQUAL;
00433 this->type = XML_noop;
00434 }
00435 else if (ch == '<') {
00436 this->state = XML_LT;
00437 this->type = XML_noop;
00438 }
00439 else if (ch == '>') {
00440 if (this->step == XML_STEP_TAG)
00441 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00442 else if (this->step == XML_STEP_NESTED)
00443 this->type = XML_end_tag(END_TAG_TYPE(this));
00444 else
00445 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00446 this->nesting--;
00447 if (this->nesting) {
00448 this->step = XML_STEP_NESTED;
00449 this->state = XML_BEFORE_ATTR;
00450 }
00451 else {
00452 this->step = XML_STEP_NONE;
00453 this->state = XML_IN_CDATA;
00454 }
00455 }
00456 else if (ch == '"' || ch == '\'') {
00457 this->state = XML_QUOTE_BEFORE_VALUE;
00458 this->quote = ch;
00459 this->type = XML_noop;
00460 }
00461 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00462 this->state = XML_SLASH;
00463 this->type = XML_noop;
00464 }
00465 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00466 this->state = XML_QMARK;
00467 this->type = XML_noop;
00468 }
00469 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00470 this->state = XML_SQR;
00471 this->type = XML_noop;
00472 }
00473 else if (ISSINGLE(ch)) {
00474 this->state = XML_BEFORE_ATTR;
00475 if (this->step == XML_STEP_TAG)
00476 this->type = XML_start_tag_ATTR_CH;
00477 else if (this->step == XML_STEP_NESTED)
00478 this->type = XML_ATTR_CH;
00479 else
00480 this->type = XML_add_attr_ATTR_CH;
00481 this->step = XML_STEP_ATTR;
00482 SET_CHAR(this, ch);
00483 }
00484 else if (ch == '=') {
00485 this->state = XML_AFTER_EQUAL;
00486 this->type = XML_noop;
00487 }
00488 else {
00489 this->type = XML_ATTR_CH;
00490 SET_CHAR(this, ch);
00491 }
00492 break;
00493 case XML_BEFORE_EQUAL:
00494 if (ISSPACE(ch))
00495 this->type = XML_noop;
00496 else if (ch == '<') {
00497 this->state = XML_LT;
00498 this->type = XML_noop;
00499 }
00500 else if (ch == '>') {
00501 if (this->step == XML_STEP_TAG)
00502 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00503 else if (this->step == XML_STEP_NESTED)
00504 this->type = XML_end_tag(END_TAG_TYPE(this));
00505 else
00506 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00507 this->nesting--;
00508 if (this->nesting) {
00509 this->step = XML_STEP_NESTED;
00510 this->state = XML_BEFORE_ATTR;
00511 }
00512 else {
00513 this->step = XML_STEP_NONE;
00514 this->state = XML_IN_CDATA;
00515 }
00516 }
00517 else if (ch == '"' || ch == '\'') {
00518 this->state = XML_QUOTE_BEFORE_VALUE;
00519 this->quote = ch;
00520 this->type = XML_noop;
00521 }
00522 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00523 this->state = XML_SLASH;
00524 this->type = XML_noop;
00525 }
00526 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00527 this->state = XML_QMARK;
00528 this->type = XML_noop;
00529 }
00530 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00531 this->state = XML_SQR;
00532 this->type = XML_noop;
00533 }
00534 else if (ISSINGLE(ch)) {
00535 this->state = XML_BEFORE_ATTR;
00536 if (this->step == XML_STEP_TAG)
00537 this->type = XML_start_tag_ATTR_CH;
00538 else if (this->step == XML_STEP_NESTED)
00539 this->type = XML_ATTR_CH;
00540 else
00541 this->type = XML_add_attr_ATTR_CH;
00542 this->step = XML_STEP_ATTR;
00543 SET_CHAR(this, ch);
00544 }
00545 else if (ch == '=') {
00546 this->state = XML_AFTER_EQUAL;
00547 this->type = XML_noop;
00548 }
00549 else {
00550 this->state = XML_IN_ATTR;
00551 if (this->step == XML_STEP_TAG)
00552 this->type = XML_start_tag_ATTR_CH;
00553 else if (this->step == XML_STEP_NESTED)
00554 this->type = XML_ATTR_CH;
00555 else
00556 this->type = XML_add_attr_ATTR_CH;
00557 this->step = XML_STEP_ATTR;
00558 SET_CHAR(this, ch);
00559 }
00560 break;
00561 case XML_AFTER_EQUAL:
00562 if (ISSPACE(ch)) {
00563 this->state = XML_AFTER_EQUAL;
00564 this->type = XML_noop;
00565 }
00566 else if (ch == '<') {
00567 this->state = XML_LT;
00568 this->type = XML_noop;
00569 }
00570 else if (ch == '>') {
00571 if (this->step == XML_STEP_TAG)
00572 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00573 else if (this->step == XML_STEP_NESTED)
00574 this->type = XML_end_tag(END_TAG_TYPE(this));
00575 else
00576 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00577 this->nesting--;
00578 if (this->nesting) {
00579 this->step = XML_STEP_NESTED;
00580 this->state = XML_BEFORE_ATTR;
00581 }
00582 else {
00583 this->step = XML_STEP_NONE;
00584 this->state = XML_IN_CDATA;
00585 }
00586 }
00587 else if (ch == '"' || ch == '\'') {
00588 this->state = XML_QUOTE_BEFORE_VALUE;
00589 this->quote = ch;
00590 this->type = XML_noop;
00591 }
00592 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00593 this->state = XML_SLASH;
00594 this->type = XML_noop;
00595 }
00596 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00597 this->state = XML_QMARK;
00598 this->type = XML_noop;
00599 }
00600 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00601 this->state = XML_SQR;
00602 this->type = XML_noop;
00603 }
00604 else if (ISSINGLE(ch)) {
00605 this->state = XML_BEFORE_ATTR;
00606 if (this->step == XML_STEP_TAG)
00607 this->type = XML_start_tag_ATTR_CH;
00608 else if (this->step == XML_STEP_NESTED)
00609 this->type = XML_ATTR_CH;
00610 else
00611 this->type = XML_add_attr_ATTR_CH;
00612 this->step = XML_STEP_ATTR;
00613 SET_CHAR(this, ch);
00614 }
00615 else {
00616 this->state = XML_IN_VALUE;
00617 this->quote = 0;
00618 if (this->step == XML_STEP_TAG)
00619 this->type = XML_start_tag_VALUE_CH;
00620 else if (this->step == XML_STEP_VALUE)
00621 this->type = XML_add_attr_VALUE_CH;
00622 else
00623 this->type = XML_VALUE_CH;
00624 this->step = XML_STEP_VALUE;
00625 SET_CHAR(this, ch);
00626 }
00627 break;
00628 case XML_QUOTE_BEFORE_VALUE:
00629 if (ch == this->quote) {
00630 this->state = XML_QUOTE_AFTER_VALUE;
00631
00632 if (this->step == XML_STEP_TAG)
00633 this->type = XML_start_tag_VALUE_CH;
00634 else if (this->step == XML_STEP_VALUE)
00635 this->type = XML_add_attr_VALUE_CH;
00636 else
00637 this->type = XML_VALUE_CH;
00638 this->step = XML_STEP_VALUE;
00639 this->count = 0;
00640 }
00641 else {
00642 this->state = XML_IN_VALUE;
00643 if (this->step == XML_STEP_TAG)
00644 this->type = XML_start_tag_VALUE_CH;
00645 else if (this->step == XML_STEP_VALUE)
00646 this->type = XML_add_attr_VALUE_CH;
00647 else
00648 this->type = XML_VALUE_CH;
00649 this->step = XML_STEP_VALUE;
00650 SET_CHAR(this, ch);
00651 }
00652 break;
00653 case XML_IN_VALUE:
00654 if (this->quote) {
00655 if (ch == this->quote) {
00656 this->state = XML_QUOTE_AFTER_VALUE;
00657 this->type = XML_noop;
00658 }
00659 else {
00660 this->type = XML_VALUE_CH;
00661 SET_CHAR(this, ch);
00662 }
00663 }
00664 else {
00665
00666 if (ISSPACE(ch)) {
00667 this->state = XML_BEFORE_ATTR;
00668 this->type = XML_noop;
00669 }
00670 else if (ch == '<') {
00671 this->state = XML_LT;
00672 this->type = XML_noop;
00673 }
00674 else if (ch == '>') {
00675 if (this->step == XML_STEP_TAG)
00676 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00677 else if (this->step == XML_STEP_NESTED)
00678 this->type = XML_end_tag(END_TAG_TYPE(this));
00679 else
00680 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00681 this->nesting--;
00682 if (this->nesting) {
00683 this->step = XML_STEP_NESTED;
00684 this->state = XML_BEFORE_ATTR;
00685 }
00686 else {
00687 this->step = XML_STEP_NONE;
00688 this->state = XML_IN_CDATA;
00689 }
00690 }
00691 else if (ch == '"' || ch == '\'') {
00692 this->state = XML_QUOTE_BEFORE_VALUE;
00693 this->quote = ch;
00694 this->type = XML_noop;
00695 }
00696 else {
00697 this->type = XML_VALUE_CH;
00698 SET_CHAR(this, ch);
00699 }
00700 }
00701 break;
00702 case XML_QUOTE_AFTER_VALUE:
00703 if (ISSPACE(ch)) {
00704 this->state = XML_BEFORE_ATTR;
00705 this->type = XML_noop;
00706 }
00707 else if (ch == '<') {
00708 this->state = XML_LT;
00709 this->type = XML_noop;
00710 }
00711 else if (ch == '>') {
00712 if (this->step == XML_STEP_TAG)
00713 this->type = XML_start_tag_end_tag(END_TAG_TYPE(this));
00714 else if (this->step == XML_STEP_NESTED)
00715 this->type = XML_end_tag(END_TAG_TYPE(this));
00716 else
00717 this->type = XML_add_attr_end_tag(END_TAG_TYPE(this));
00718 this->nesting--;
00719 if (this->nesting) {
00720 this->step = XML_STEP_NESTED;
00721 this->state = XML_BEFORE_ATTR;
00722 }
00723 else {
00724 this->step = XML_STEP_NONE;
00725 this->state = XML_IN_CDATA;
00726 }
00727 }
00728 else if (ch == '"' || ch == '\'') {
00729 this->state = XML_QUOTE_BEFORE_VALUE;
00730 this->quote = ch;
00731 this->type = XML_noop;
00732 }
00733 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00734 this->state = XML_SLASH;
00735 this->type = XML_noop;
00736 }
00737 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00738 this->state = XML_QMARK;
00739 this->type = XML_noop;
00740 }
00741 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00742 this->state = XML_SQR;
00743 this->type = XML_noop;
00744 }
00745 else if (ISSINGLE(ch)) {
00746 this->state = XML_BEFORE_ATTR;
00747 if (this->step == XML_STEP_TAG)
00748 this->type = XML_start_tag_ATTR_CH;
00749 else if (this->step == XML_STEP_NESTED)
00750 this->type = XML_ATTR_CH;
00751 else
00752 this->type = XML_add_attr_ATTR_CH;
00753 this->step = XML_STEP_ATTR;
00754 SET_CHAR(this, ch);
00755 }
00756 else {
00757 this->state = XML_IN_ATTR;
00758 if (this->step == XML_STEP_TAG)
00759 this->type = XML_start_tag_ATTR_CH;
00760 else if (this->step == XML_STEP_NESTED)
00761 this->type = XML_ATTR_CH;
00762 else
00763 this->type = XML_add_attr_ATTR_CH;
00764 this->step = XML_STEP_ATTR;
00765 SET_CHAR(this, ch);
00766 }
00767 break;
00768 case XML_SQR:
00769 SET_CHAR(this, ']');
00770 goto cont;
00771 case XML_SLASH:
00772 SET_CHAR(this, '/');
00773 goto cont;
00774 case XML_QMARK:
00775 SET_CHAR(this, '?');
00776 cont:
00777 if (ISSPACE(ch)) {
00778 this->state = XML_BEFORE_ATTR;
00779 if (this->step == XML_STEP_TAG)
00780 this->type = XML_start_tag_TAG_CH;
00781 else if (this->step == XML_STEP_NESTED)
00782 this->type = XML_TAG_CH;
00783 else if (this->step == XML_STEP_NONE)
00784 this->type = XML_end_cdata_TAG_CH;
00785 else
00786 this->type = XML_add_attr_TAG_CH;
00787 this->step = XML_STEP_ATTR;
00788 }
00789 else if (ch == '<') {
00790 this->state = XML_LT;
00791 if (this->step == XML_STEP_TAG)
00792 this->type = XML_start_tag_TAG_CH;
00793 else if (this->step == XML_STEP_NESTED)
00794 this->type = XML_TAG_CH;
00795 else if (this->step == XML_STEP_NONE)
00796 this->type = XML_end_cdata_TAG_CH;
00797 else
00798 this->type = XML_add_attr_TAG_CH;
00799 this->step = XML_STEP_TAG;
00800 }
00801 else if (ch == '>') {
00802 if (this->state == XML_SLASH) {
00803 if (this->step == XML_STEP_TAG)
00804 this->type = XML_start_tag_end_empty_tag;
00805 else if (this->step == XML_STEP_NESTED)
00806 this->type = XML_end_empty_tag;
00807 else
00808 this->type = XML_add_attr_end_empty_tag;
00809 }
00810 else if (this->state == XML_SQR) {
00811 if (this->step == XML_STEP_TAG)
00812 this->type = XML_start_tag_end_tag(XML_OP_1_END_BRACKET_TAG);
00813 else if (this->step == XML_STEP_NESTED)
00814 this->type = XML_end_tag(XML_OP_1_END_BRACKET_TAG);
00815 else
00816 this->type = XML_add_attr_end_tag(XML_OP_1_END_BRACKET_TAG);
00817 }
00818 else {
00819 if (this->step == XML_STEP_TAG)
00820 this->type = XML_start_tag_end_pi_tag;
00821 else if (this->step == XML_STEP_NESTED)
00822 this->type = XML_end_pi_tag;
00823 else
00824 this->type = XML_add_attr_end_pi_tag;
00825 }
00826 this->nesting--;
00827 if (this->nesting) {
00828 this->step = XML_STEP_NESTED;
00829 this->state = XML_BEFORE_ATTR;
00830 }
00831 else {
00832 this->step = XML_STEP_NONE;
00833 this->state = XML_IN_CDATA;
00834 }
00835 }
00836 else if (ch == '"' || ch == '\'') {
00837 this->state = XML_QUOTE_BEFORE_VALUE;
00838 this->quote = ch;
00839 if (this->step == XML_STEP_TAG)
00840 this->type = XML_start_tag_TAG_CH;
00841 else if (this->step == XML_STEP_NESTED)
00842 this->type = XML_TAG_CH;
00843 else if (this->step == XML_STEP_NONE)
00844 this->type = XML_end_cdata_TAG_CH;
00845 else
00846 this->type = XML_add_attr_TAG_CH;
00847 this->step = XML_STEP_ATTR;
00848 }
00849 else if (ch == '/' && (END_TAG_TYPE(this) == XML_OP_1_END_TAG)) {
00850 this->state = XML_SLASH;
00851 if (this->step == XML_STEP_TAG)
00852 this->type = XML_start_tag_TAG_CH;
00853 else if (this->step == XML_STEP_NESTED)
00854 this->type = XML_TAG_CH;
00855 else if (this->step == XML_STEP_NONE)
00856 this->type = XML_end_cdata_TAG_CH;
00857 else
00858 this->type = XML_add_attr_TAG_CH;
00859 this->step = XML_STEP_ATTR;
00860 }
00861 else if (ch == '?' && (END_TAG_TYPE(this) == XML_OP_1_END_PI_TAG)) {
00862 this->state = XML_QMARK;
00863 if (this->step == XML_STEP_TAG)
00864 this->type = XML_start_tag_TAG_CH;
00865 else if (this->step == XML_STEP_NESTED)
00866 this->type = XML_TAG_CH;
00867 else if (this->step == XML_STEP_NONE)
00868 this->type = XML_end_cdata_TAG_CH;
00869 else
00870 this->type = XML_add_attr_TAG_CH;
00871 this->step = XML_STEP_ATTR;
00872 }
00873 else if (ch == ']' && (END_TAG_TYPE(this) == XML_OP_1_END_BRACKET_TAG)) {
00874 this->state = XML_SQR;
00875 if (this->step == XML_STEP_TAG)
00876 this->type = XML_start_tag_TAG_CH;
00877 else if (this->step == XML_STEP_NESTED)
00878 this->type = XML_TAG_CH;
00879 else if (this->step == XML_STEP_NONE)
00880 this->type = XML_end_cdata_TAG_CH;
00881 else
00882 this->type = XML_add_attr_TAG_CH;
00883 this->step = XML_STEP_ATTR;
00884 }
00885 else if (ISSINGLE(ch)) {
00886 this->state = XML_BEFORE_ATTR;
00887 if (this->step == XML_STEP_TAG)
00888 this->type = XML_start_tag_TAG_CH;
00889 else if (this->step == XML_STEP_NESTED)
00890 this->type = XML_TAG_CH;
00891 else if (this->step == XML_STEP_NONE)
00892 this->type = XML_end_cdata_TAG_CH;
00893 else
00894 this->type = XML_add_attr_TAG_CH;
00895 this->step = XML_STEP_ATTR;
00896 ADD_CHAR(this, ch);
00897 }
00898 else {
00899 this->state = XML_IN_ATTR;
00900 if (this->step == XML_STEP_TAG)
00901 this->type = XML_start_tag_TAG_CH;
00902 else if (this->step == XML_STEP_NESTED)
00903 this->type = XML_TAG_CH;
00904 else if (this->step == XML_STEP_NONE)
00905 this->type = XML_end_cdata_TAG_CH;
00906 else
00907 this->type = XML_add_attr_TAG_CH;
00908 this->step = XML_STEP_ATTR;
00909 ADD_CHAR(this, ch);
00910 }
00911 break;
00912 case XML_IN_COMMENT:
00913 if (ch == '-') {
00914 this->state = XML_IN_COMMENT_DASH;
00915 this->type = XML_noop;
00916 }
00917 else
00918 this->type = XML_COMMENT_CH;
00919 SET_CHAR(this, ch);
00920 break;
00921 case XML_IN_COMMENT_DASH:
00922 if (ch == '-') {
00923 this->state = XML_IN_COMMENT_DASH_DASH;
00924 this->type = XML_noop;
00925 }
00926 else {
00927 this->state = XML_IN_COMMENT;
00928 this->type = XML_COMMENT_CH;
00929 }
00930 ADD_CHAR(this, ch);
00931 break;
00932 case XML_IN_COMMENT_DASH_DASH:
00933 if (ch == '-') {
00934 this->state = XML_IN_COMMENT_3_DASH;
00935 this->type = XML_COMMENT_CH;
00936 SET_CHAR(this, ch);
00937 }
00938 else if (ch == '>') {
00939 this->type = XML_end_comment;
00940 this->nesting--;
00941 if (this->nesting) {
00942 this->step = XML_STEP_NESTED;
00943 this->state = XML_BEFORE_ATTR;
00944 }
00945 else {
00946 this->step = XML_STEP_NONE;
00947 this->state = XML_IN_CDATA;
00948 }
00949 }
00950 else {
00951 this->state = XML_IN_COMMENT;
00952 this->type = XML_COMMENT_CH;
00953 ADD_CHAR(this, ch);
00954 }
00955 break;
00956 case XML_IN_COMMENT_3_DASH:
00957 if (ch == '-') {
00958 this->type = XML_COMMENT_CH;
00959 SET_CHAR(this, ch);
00960 }
00961 else if (ch == '>') {
00962 this->type = XML_end_comment;
00963 this->nesting--;
00964 if (this->nesting) {
00965 this->step = XML_STEP_NESTED;
00966 this->state = XML_BEFORE_ATTR;
00967 }
00968 else {
00969 this->step = XML_STEP_NONE;
00970 this->state = XML_IN_CDATA;
00971 }
00972 }
00973 else {
00974 this->state = XML_IN_COMMENT;
00975 this->type = XML_COMMENT_CH;
00976 SET_CHAR(this, '-');
00977 ADD_CHAR(this, '-');
00978 ADD_CHAR(this, ch);
00979 }
00980 break;
00981 case XML_IN_CDATA_TAG:
00982 if (ch == ']') {
00983 this->state = XML_IN_CDATA_TAG_SQR;
00984 this->type = XML_noop;
00985 }
00986 else
00987 this->type = XML_CDATA_TAG_CH;
00988 SET_CHAR(this, ch);
00989 break;
00990 case XML_IN_CDATA_TAG_SQR:
00991 if (ch == ']') {
00992 this->state = XML_IN_CDATA_TAG_SQR_SQR;
00993 this->type = XML_noop;
00994 }
00995 else {
00996 this->state = XML_IN_CDATA_TAG;
00997 this->type = XML_CDATA_TAG_CH;
00998 }
00999 ADD_CHAR(this, ch);
01000 break;
01001 case XML_IN_CDATA_TAG_SQR_SQR:
01002 if (ch == ']') {
01003 this->state = XML_IN_CDATA_TAG_3_SQR;
01004 this->type = XML_CDATA_TAG_CH;
01005 SET_CHAR(this, ch);
01006 }
01007 else if (ch == '>') {
01008 this->type = XML_end_cdata_tag;
01009 this->nesting--;
01010 if (this->nesting) {
01011 this->step = XML_STEP_NESTED;
01012 this->state = XML_BEFORE_ATTR;
01013 }
01014 else {
01015 this->step = XML_STEP_NONE;
01016 this->state = XML_IN_CDATA;
01017 }
01018 }
01019 else {
01020 this->state = XML_IN_CDATA_TAG;
01021 this->type = XML_CDATA_TAG_CH;
01022 ADD_CHAR(this, ch);
01023 }
01024 break;
01025 case XML_IN_CDATA_TAG_3_SQR:
01026 if (ch == ']') {
01027 this->type = XML_CDATA_TAG_CH;
01028 SET_CHAR(this, ch);
01029 }
01030 else if (ch == '>') {
01031 this->type = XML_end_cdata_tag;
01032 this->nesting--;
01033 if (this->nesting) {
01034 this->step = XML_STEP_NESTED;
01035 this->state = XML_BEFORE_ATTR;
01036 }
01037 else {
01038 this->step = XML_STEP_NONE;
01039 this->state = XML_IN_CDATA;
01040 }
01041 }
01042 else {
01043 this->state = XML_IN_CDATA_TAG;
01044 this->type = XML_CDATA_TAG_CH;
01045 SET_CHAR(this, ']');
01046 ADD_CHAR(this, ']');
01047 ADD_CHAR(this, ch);
01048 }
01049 break;
01050 }
01051 return(this->type);
01052 }
01053
01054
01055
01056
01057 bool CSXMLProcessor::buildConversionTable()
01058 {
01059 int32_t i;
01060
01061
01062
01063
01064 if (strcasecmp(charset, "ISO-8859-1") == 0) {
01065 for (i=0; i<128; i++)
01066 conversion_table[i] = (wchar_t) (i + 128);
01067 }
01068 else {
01069 for (i=0; i<128; i++)
01070 conversion_table[i] = '?';
01071 }
01072 return true;
01073 }
01074
01075
01076
01077 int32_t CSXMLProcessor::capture_initializer(wchar_t ch)
01078
01079
01080
01081
01082 {
01083 int32_t op;
01084
01085 op = parseChar(ch);
01086 switch (op & XML_OP_1_MASK) {
01087 case XML_OP_1_START_TAG:
01088 this->tlength = 0;
01089 break;
01090 case XML_OP_1_ADD_ATTR:
01091 this->nlength = 0;
01092 this->vlength = 0;
01093 break;
01094 }
01095 return(op);
01096 }
01097
01098 int32_t CSXMLProcessor::entity_translator(wchar_t ch)
01099
01100
01101
01102
01103
01104
01105
01106
01107 {
01108 int32_t op;
01109
01110 op = capture_initializer(ch);
01111 return(op);
01112 }
01113
01114
01115
01116
01117 int32_t CSXMLProcessor::charset_transformer(wchar_t ch)
01118 {
01119 int32_t op;
01120
01121
01122 switch (this->charset_type) {
01123 case CHARSET_UTF_8:
01124 if (ch > 127 && ch < 256) {
01125 uint32_t utf_value;
01126 uint8_t utf_ch = (uint8_t)ch;
01127
01128 if ((utf_ch & 0xC0) != 0x80)
01129 this->utf8_count = 0;
01130 if ((utf_ch & 0x80) == 0x00)
01131 this->utf8_length = 1;
01132 else if ((utf_ch & 0xE0) == 0xC0)
01133 this->utf8_length = 2;
01134 else if ((utf_ch & 0xF0) == 0xE0)
01135 this->utf8_length = 3;
01136 else if ((utf_ch & 0xF8) == 0xF0)
01137 this->utf8_length = 4;
01138 else if ((utf_ch & 0xFC) == 0xF8)
01139 this->utf8_length = 5;
01140 else if ((utf_ch & 0xFE) == 0xFC)
01141 this->utf8_length = 6;
01142 this->utf8_buffer[this->utf8_count] = (uint32_t) utf_ch;
01143 this->utf8_count++;
01144 if (this->utf8_count < this->utf8_length) {
01145
01146 setDataType(XML_noop);
01147 return(XML_noop);
01148 }
01149 utf_value = 0;
01150 switch (this->utf8_length) {
01151 case 1:
01152 utf_value = this->utf8_buffer[0] & 0x0000007F;
01153 break;
01154 case 2:
01155 utf_value = ((this->utf8_buffer[0] & 0x0000001F) << 6) |
01156 (this->utf8_buffer[1] & 0x0000003F);
01157 if (utf_value < 0x00000080)
01158 utf_value = '?';
01159 break;
01160 case 3:
01161 utf_value = ((this->utf8_buffer[0] & 0x0000000F) << 12) |
01162 ((this->utf8_buffer[1] & 0x0000003F) << 6) |
01163 (this->utf8_buffer[2] & 0x0000003F);
01164 if (utf_value < 0x000000800)
01165 utf_value = '?';
01166 break;
01167 case 4:
01168 utf_value = ((this->utf8_buffer[0] & 0x00000007) << 18) |
01169 ((this->utf8_buffer[1] & 0x0000003F) << 12) |
01170 ((this->utf8_buffer[2] & 0x0000003F) << 6) |
01171 (this->utf8_buffer[3] & 0x0000003F);
01172 if (utf_value < 0x00010000)
01173 utf_value = '?';
01174 break;
01175 case 5:
01176 utf_value = ((this->utf8_buffer[0] & 0x00000003) << 24) |
01177 ((this->utf8_buffer[1] & 0x0000003F) << 18) |
01178 ((this->utf8_buffer[2] & 0x0000003F) << 12) |
01179 ((this->utf8_buffer[3] & 0x0000003F) << 6) |
01180 (this->utf8_buffer[4] & 0x0000003F);
01181 if (utf_value < 0x00200000)
01182 utf_value = '?';
01183 break;
01184 case 6:
01185 utf_value = ((this->utf8_buffer[0] & 0x00000001) << 30) |
01186 ((this->utf8_buffer[1] & 0x0000003F) << 24) |
01187 ((this->utf8_buffer[2] & 0x0000003F) << 18) |
01188 ((this->utf8_buffer[3] & 0x0000003F) << 12) |
01189 ((this->utf8_buffer[4] & 0x0000003F) << 6) |
01190 (this->utf8_buffer[5] & 0x0000003F);
01191 if (utf_value < 0x04000000)
01192 utf_value = '?';
01193 break;
01194 }
01195 if (utf_value > 0x0000FFFF)
01196 ch = '?';
01197 else
01198 ch = utf_value;
01199 }
01200 break;
01201 case CHARSET_TO_CONVERT_8_BIT:
01202 if (ch > 127 && ch < 256)
01203 ch = this->conversion_table[((unsigned char) ch) - 128];
01204 break;
01205 }
01206
01207 op = entity_translator(ch);
01208
01209
01210 switch (op & XML_OP_1_MASK) {
01211 case XML_OP_1_START_TAG:
01212 if (strcmp(this->pr_tag, "?xml") == 0)
01213 this->ip = true;
01214 else
01215 this->ip = false;
01216 break;
01217 case XML_OP_1_ADD_ATTR:
01218 if (this->ip) {
01219 if (strcasecmp(this->pr_name, "encoding") == 0) {
01220 strcpy(this->charset, this->pr_value);
01221 if (STRCASESTR(this->charset, "utf-8"))
01222 this->charset_type = CHARSET_UTF_8;
01223 else if (STRCASESTR(this->charset, "ucs-2") ||
01224 STRCASESTR(this->charset, "ucs-4") ||
01225 STRCASESTR(this->charset, "unicode"))
01226 this->charset_type = CHARSET_STANDARD;
01227 else {
01228 this->charset_type = CHARSET_TO_CONVERT_8_BIT;
01229 buildConversionTable();
01230 }
01231 }
01232 }
01233 break;
01234 }
01235 return(op);
01236 }
01237
01238 void CSXMLProcessor::appendWCharToString(char *dstr, size_t *dlen, size_t dsize, wchar_t *schars, size_t slen)
01239 {
01240 for (size_t i=0; i < slen; i++) {
01241 if (*dlen < dsize-1) {
01242 if (*schars > 127)
01243 dstr[*dlen] = '~';
01244 else
01245 dstr[*dlen] = (char)*schars;
01246 (*dlen)++;
01247 schars++;
01248 dstr[*dlen] = 0;
01249 }
01250 }
01251 }
01252
01253 int32_t CSXMLProcessor::processChar(wchar_t ch)
01254 {
01255 int32_t op;
01256
01257 op = charset_transformer(ch);
01258
01259
01260
01261
01262
01263
01264 switch (op & XML_DATA_MASK) {
01265 case XML_DATA_TAG:
01266 appendWCharToString(this->pr_tag, &this->tlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
01267 break;
01268 case XML_DATA_ATTR:
01269 appendWCharToString(this->pr_name, &this->nlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
01270 break;
01271 case XML_DATA_VALUE:
01272 appendWCharToString(this->pr_value, &this->vlength, CS_MAX_XML_NAME_SIZE, this->getDataPtr(), this->getDataLen());
01273 break;
01274 }
01275 return(op);
01276 }
01277
01278 bool CSXMLProcessor::getError(int32_t *err, char **msg)
01279 {
01280 *err = err_no;
01281 *msg = err_message;
01282 return err_no != 0;
01283 }
01284
01285 void CSXMLProcessor::setError(int32_t err, char *msg)
01286 {
01287 err_no = err;
01288 if (msg) {
01289 strncpy(err_message, msg, CS_XML_ERR_MSG_SIZE);
01290 err_message[CS_XML_ERR_MSG_SIZE-1] = 0;
01291 return;
01292 }
01293
01294 switch (err) {
01295 case CS_XML_ERR_OUT_OF_MEMORY:
01296 snprintf(err_message, CS_XML_ERR_MSG_SIZE, "AES parse error- insufficient memory");
01297 break;
01298 case CS_XML_ERR_CHAR_TOO_LARGE:
01299 snprintf(err_message, CS_XML_ERR_MSG_SIZE, "AES parse error- UNICODE character too large to be encoded as UTF-8");
01300 break;
01301 default:
01302 snprintf(err_message, CS_XML_ERR_MSG_SIZE, "AES parse error- %s", strerror(err));
01303 break;
01304 }
01305 }
01306
01307 void CSXMLProcessor::printError(char *prefix)
01308 {
01309 printf("%s%s", prefix, err_message);
01310 }
01311
01312
01313
01314
01315 #ifdef DEBUG_ALL
01316 #define EXTRA_SIZE 2
01317 #else
01318 #define EXTRA_SIZE 100
01319 #endif
01320
01321 bool CSXMLString::addChar(char ch, CSXMLProcessor *xml)
01322 {
01323 char *ptr;
01324
01325 if (stringLen + 2 > stringSize) {
01326 if (!(ptr = (char *) realloc(stringPtr, stringLen + 2 + EXTRA_SIZE))) {
01327 xml->setError(CS_XML_ERR_OUT_OF_MEMORY, NULL);
01328 return false;
01329 }
01330 stringPtr = ptr;
01331 stringSize = stringLen + 2 + EXTRA_SIZE;
01332 }
01333 stringPtr[stringLen] = ch;
01334 stringPtr[stringLen+1] = 0;
01335 stringLen++;
01336 return true;
01337 }
01338
01339 bool CSXMLString::addChars(size_t size, wchar_t *buffer, bool to_lower, CSXMLProcessor *xml)
01340 {
01341 size_t i;
01342 uint32_t uni_char;
01343 int32_t shift;
01344
01345 for (i=0; i<size; i++) {
01346 uni_char = (uint32_t) buffer[i];
01347
01348
01349 if (to_lower && uni_char <= 127)
01350 uni_char = (uint32_t) tolower((int32_t) uni_char);
01351
01352
01353 if (uni_char <= 0x0000007F) {
01354 if (!addChar((char) uni_char, xml))
01355 return false;
01356 shift = -6;
01357 }
01358 else if (uni_char <= 0x000007FF) {
01359 if (!addChar((char) ((0x000000C0) | ((uni_char >> 6) & 0x0000001F)), xml))
01360 return false;
01361 shift = 0;
01362 }
01363 else if (uni_char <= 0x00000FFFF) {
01364 if (!addChar((char) ((0x000000E0) | ((uni_char >> 12) & 0x0000000F)), xml))
01365 return false;
01366 shift = 6;
01367 }
01368 else if (uni_char <= 0x001FFFFF) {
01369 if (!addChar((char) ((0x000000F0) | ((uni_char >> 18) & 0x00000007)), xml))
01370 return false;
01371 shift = 12;
01372 }
01373 else if (uni_char <= 0x003FFFFFF) {
01374 if (!addChar((char) ((0x000000F0) | ((uni_char >> 24) & 0x00000003)), xml))
01375 return false;
01376 shift = 18;
01377 }
01378 else if (uni_char <= 0x07FFFFFFF) {
01379 if (!addChar((char) ((0x000000F0) | ((uni_char >> 30) & 0x00000001)), xml))
01380 return false;
01381 shift = 24;
01382 }
01383 else {
01384 xml->setError(CS_XML_ERR_CHAR_TOO_LARGE, NULL);
01385 return false;
01386 }
01387
01388 while (shift >= 0) {
01389 if (!addChar((char) ((0x00000080) | ((uni_char >> shift) & 0x0000003F)), xml))
01390 return false;
01391 shift -= 6;
01392 }
01393 }
01394 return true;
01395 }
01396
01397 bool CSXMLString::addString(const char *string, CSXMLProcessor *xml)
01398 {
01399 bool ok = true;
01400
01401 while (*string && ok) {
01402 ok = addChar(*string, xml);
01403 string++;
01404 }
01405 return ok;
01406 }
01407
01408 void CSXMLString::setEmpty()
01409 {
01410 stringLen = 0;
01411 if (stringPtr)
01412 *stringPtr = 0;
01413 }
01414
01415 void CSXMLString::setNull()
01416 {
01417 if (stringPtr)
01418 free(stringPtr);
01419 stringPtr = NULL;
01420 stringLen = 0;
01421 stringSize = 0;
01422 }
01423
01424 char *CSXMLString::lastComponent()
01425 {
01426 char *ptr;
01427
01428 if (stringLen == 0)
01429 return NULL;
01430
01431 ptr = stringPtr + stringLen - 1;
01432 while (ptr > stringPtr && *ptr != '/')
01433 ptr--;
01434 return ptr;
01435 }
01436
01437
01438 char *CSXMLString::findTrailingComponent(const char *comp)
01439 {
01440 char *ptr, *last_slash;
01441
01442 if (stringLen == 0)
01443 return NULL;
01444
01445 ptr = stringPtr + stringLen - 1;
01446 last_slash = NULL;
01447
01448 do {
01449
01450 while (ptr > stringPtr && *ptr != '/')
01451 ptr--;
01452 if (last_slash)
01453 *last_slash = 0;
01454 if (strcmp(ptr, comp) == 0) {
01455 if (last_slash)
01456 *last_slash = '/';
01457 return ptr;
01458 }
01459 if (last_slash)
01460 *last_slash = '/';
01461 last_slash = ptr;
01462 ptr--;
01463 }
01464 while (ptr > stringPtr);
01465 return NULL;
01466 }
01467
01468 void CSXMLString::truncate(char *ptr)
01469 {
01470 *ptr = 0;
01471 stringLen = ptr - stringPtr;
01472 }
01473
01474
01475
01476
01477 #define IS_XML_CDATA 0
01478 #define IS_XML_CDATA_TAG 1
01479 #define IS_XML_TAG 2
01480 #define IS_XML_CLOSE_TAG 3
01481 #define IS_XML_COMMENT 4
01482 #define IS_XML_DTD 5
01483 #define IS_XML_PI 6
01484 #define IS_XML_PI_XML 7
01485 #define IS_XML_IN_EX 8
01486 #define IS_XML_OPEN_BRACKET 9
01487 #define IS_XML_CLOSE_BRACKET 10
01488
01489 int32_t CSXML::nodeType(char *name)
01490 {
01491 if (name) {
01492 switch (*name) {
01493 case 0:
01494 return IS_XML_CDATA;
01495 case '[':
01496 if (strlen(name) == 1)
01497 return IS_XML_OPEN_BRACKET;
01498 break;
01499 case ']':
01500 if (strlen(name) == 1)
01501 return IS_XML_CLOSE_BRACKET;
01502 break;
01503 case '/':
01504 return IS_XML_CLOSE_TAG;
01505 case '!':
01506 if (strlen(name) > 1) {
01507 if (strcasecmp(name, "!--") == 0)
01508 return IS_XML_COMMENT;
01509 if (name[1] == '[') {
01510 if (strcasecmp(name, "![CDATA[") == 0)
01511 return IS_XML_CDATA_TAG;
01512 return IS_XML_IN_EX;
01513 }
01514 }
01515 return IS_XML_DTD;
01516 case '?':
01517 if (strcasecmp(name, "?xml") == 0)
01518 return IS_XML_PI_XML;
01519 return IS_XML_PI;
01520 }
01521 return IS_XML_TAG;
01522 }
01523 return IS_XML_CDATA;
01524 }
01525
01526 bool CSXML::internalCloseNode(const char *name, bool single)
01527 {
01528 bool ok = true;
01529 char *ptr;
01530
01531 if (single) {
01532 if ((ptr = xml_path.lastComponent())) {
01533 ok = closeNode(xml_path.stringPtr);
01534 xml_path.truncate(ptr);
01535 }
01536 }
01537 else if ((ptr = xml_path.findTrailingComponent(name))) {
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547 for (;;) {
01548 if (!(ptr = xml_path.lastComponent()))
01549 break;
01550 if (!(ok = closeNode(xml_path.stringPtr)))
01551 break;
01552 if (strcmp(ptr, name) == 0) {
01553 xml_path.truncate(ptr);
01554 break;
01555 }
01556 xml_path.truncate(ptr);
01557 }
01558 }
01559 return ok;
01560 }
01561
01562 bool CSXML::internalOpenNode(const char *name)
01563 {
01564 bool ok;
01565
01566 ok = xml_path.addString("/", this);
01567 if (!ok)
01568 return ok;
01569 ok = xml_path.addString(name, this);
01570 if (!ok)
01571 return ok;
01572 return openNode(this->xml_path.stringPtr, this->xml_value.stringPtr);
01573 }
01574
01575 bool CSXML::parseXML(int32_t my_flags)
01576 {
01577 wchar_t ch;
01578 bool ok = true;
01579 int32_t op;
01580 int32_t tagtype;
01581
01582 this->flags = my_flags;
01583 ok = xml_path.addChars(0, NULL, false, this);
01584 if (!ok)
01585 goto exit;
01586 ok = xml_name.addChars(0, NULL, false, this);
01587 if (!ok)
01588 goto exit;
01589 ok = xml_value.addChars(0, NULL, false, this);
01590 if (!ok)
01591 goto exit;
01592
01593 ok = getChar(&ch);
01594 while (ch != CS_XML_EOF_CHAR && ok) {
01595 op = processChar(ch);
01596 switch (op & XML_OP_1_MASK) {
01597 case XML_OP_1_NOOP:
01598 break;
01599 case XML_OP_1_END_TAG:
01600 break;
01601 case XML_OP_1_END_CLOSE_TAG:
01602 break;
01603 case XML_OP_1_END_EMPTY_TAG:
01604 ok = internalCloseNode("/>", true);
01605 break;
01606 case XML_OP_1_END_PI_TAG:
01607 ok = internalCloseNode("?>", true);
01608 break;
01609 case XML_OP_1_END_ENTITY_TAG:
01610 ok = internalCloseNode(">", true);
01611 break;
01612 case XML_OP_1_END_BRACKET_TAG:
01613 ok = internalCloseNode("]>", true);
01614 break;
01615 case XML_OP_1_END_UNKNOWN_TAG:
01616 ok = internalCloseNode(">", true);
01617 break;
01618 case XML_OP_1_START_CDATA_TAG:
01619 break;
01620 case XML_OP_1_START_COMMENT:
01621 break;
01622 case XML_OP_1_START_TAG:
01623 if (nodeType(xml_name.stringPtr) == IS_XML_CLOSE_TAG)
01624 ok = internalCloseNode(xml_name.stringPtr, false);
01625 else
01626 ok = internalOpenNode(xml_name.stringPtr);
01627 xml_name.setEmpty();
01628 xml_value.setEmpty();
01629 break;
01630 case XML_OP_1_ADD_ATTR:
01631 tagtype = nodeType(xml_name.stringPtr);
01632 if (tagtype != IS_XML_OPEN_BRACKET && tagtype != IS_XML_CLOSE_BRACKET)
01633 ok = addAttribute(xml_path.stringPtr, xml_name.stringPtr, xml_value.stringPtr);
01634 xml_name.setEmpty();
01635 xml_value.setEmpty();
01636 break;
01637 case XML_OP_1_END_CDATA:
01638 if (xml_value.stringLen || (my_flags & XML_KEEP_EMPTY_CDATA)) {
01639 ok = internalOpenNode("");
01640 xml_name.setEmpty();
01641 xml_value.setEmpty();
01642 ok = internalCloseNode("", true);
01643 }
01644 break;
01645 case XML_OP_1_END_CDATA_TAG:
01646 ok = internalOpenNode("![CDATA[");
01647 xml_name.setEmpty();
01648 xml_value.setEmpty();
01649 if (ok)
01650 ok = internalCloseNode("]]>", true);
01651 break;
01652 case XML_OP_1_END_COMMENT:
01653 ok = internalOpenNode("!--");
01654 xml_name.setEmpty();
01655 xml_value.setEmpty();
01656 if (ok)
01657 ok = internalCloseNode("-->", true);
01658 break;
01659 }
01660 if (!ok)
01661 break;
01662 switch (op & XML_DATA_MASK) {
01663 case XML_DATA_TAG:
01664 case XML_DATA_ATTR:
01665 ok = xml_name.addChars(getDataLen(), getDataPtr(), true, this);
01666 break;
01667 case XML_DATA_CDATA:
01668 case XML_DATA_CDATA_TAG:
01669 case XML_COMMENT:
01670 case XML_DATA_VALUE:
01671 ok = xml_value.addChars(getDataLen(), getDataPtr(), false, this);
01672 break;
01673 }
01674 if (!ok)
01675 break;
01676 switch (op & XML_OP_2_MASK) {
01677 case XML_OP_2_NOOP:
01678 break;
01679 case XML_OP_2_END_TAG:
01680 break;
01681 case XML_OP_2_END_CLOSE_TAG:
01682 break;
01683 case XML_OP_2_END_EMPTY_TAG:
01684 ok = internalCloseNode("/>", true);
01685 break;
01686 case XML_OP_2_END_PI_TAG:
01687 ok = internalCloseNode("?>", true);
01688 break;
01689 case XML_OP_2_END_ENTITY_TAG:
01690 ok = internalCloseNode(">", true);
01691 break;
01692 case XML_OP_2_END_BRACKET_TAG:
01693 ok = internalCloseNode("]>", true);
01694 break;
01695 case XML_OP_2_END_UNKNOWN_TAG:
01696 ok = internalCloseNode(">", true);
01697 break;
01698 case XML_OP_2_START_CDATA_TAG:
01699 break;
01700 case XML_OP_2_START_COMMENT:
01701 break;
01702 }
01703 ok = getChar(&ch);
01704 }
01705
01706 exit:
01707 xml_path.setNull();
01708 xml_name.setNull();
01709 xml_value.setNull();
01710 return ok;
01711 }
01712
01713
01714
01715
01716 bool CSXMLPrint::openNode(char *path, char *value)
01717 {
01718 printf("OPEN %s\n", path);
01719 if (value && *value)
01720 printf(" %s\n", value);
01721 return true;
01722 }
01723
01724 bool CSXMLPrint::closeNode(char *path)
01725 {
01726 printf("close %s\n", path);
01727 return true;
01728 }
01729
01730 bool CSXMLPrint::addAttribute(char *path, char *name, char *value)
01731 {
01732 if (value)
01733 printf("attr %s %s=%s\n", path, name, value);
01734 else
01735 printf("attr %s %s\n", path, name);
01736 return true;
01737 }
01738
01739
01740
01741
01742 bool CSXMLBuffer::parseString(const char *data, int32_t my_flags)
01743 {
01744 charData = data;
01745 dataLen = strlen(data);
01746 dataPos = 0;
01747 return parseXML(my_flags);
01748 }
01749
01750 bool CSXMLBuffer::parseData(const char *data, size_t len, int32_t my_flags)
01751 {
01752 charData = data;
01753 dataLen = len;
01754 dataPos = 0;
01755 return parseXML(my_flags);
01756 }
01757
01758 bool CSXMLBuffer::getChar(wchar_t *ch)
01759 {
01760 if (dataPos == dataLen)
01761 *ch = CS_XML_EOF_CHAR;
01762 else {
01763 *ch = (wchar_t) (unsigned char) charData[dataPos];
01764 dataPos++;
01765 }
01766 return true;
01767 }
01768
01769
01770
01771
01772 bool CSXMLFile::parseFile(char *file_name, int32_t my_flags)
01773 {
01774 bool ok;
01775
01776 if (!(this->file = fopen(file_name, "r"))) {
01777 setError(errno, NULL);
01778 return false;
01779 }
01780 ok = parseXML(my_flags);
01781 fclose(this->file);
01782 return ok;
01783 }
01784
01785 bool CSXMLFile::getChar(wchar_t *ch)
01786 {
01787 int32_t next_ch;
01788
01789 next_ch = fgetc(file);
01790 if (next_ch == EOF) {
01791 if (ferror(file)) {
01792 setError(errno, NULL);
01793 return false;
01794 }
01795 *ch = CS_XML_EOF_CHAR;
01796 }
01797 else
01798 *ch = (wchar_t) next_ch;
01799 return true;
01800 }
01801
01802