44 #include "EST_TDeque.h"
45 #include "EST_THash.h"
46 #include "EST_error.h"
48 #include "rxp/XML_Parser.h"
50 #include "ling_class_init.h"
52 #if defined(ESTLIBDIRC)
53 # define __STRINGIZE(X) #X
54 # define ESTLIBDIR __STRINGIZE(ESTLIBDIRC)
58 static EST_Regex simpleIDRegex(
"[^#]*#id(\\([-a-z0-9]+\\))");
59 static EST_Regex rangeIDRegex(
"[^#]*#id(\\([a-z]*\\)\\([0-9]*\\)\\(-\\([0-9]+\\)\\)*).*id(\\([a-z]*\\)\\([0-9]*\\)\\(-\\([0-9]+\\)\\)*)");
60 static EST_Regex featureDefRegex(
"\\([^:]*\\):\\(.*\\)");
69 #define MAX_FEATS (50)
73 class GenXML_Parse_State
94 GenXML_Parse_State() : contents(100) {}
134 const char *instruction);
145 void EST_GenXML::class_init(
void)
147 ling_class_init::use();
149 pclass =
new GenXML_Parser_Class();
151 printf(
"Register estlib in genxml %s\n", ESTLIBDIR
"/\\1.dtd");
154 pclass->
register_id(
"//CSTR EST//DTD \\(.*\\)//[A-Z]*",
155 ESTLIBDIR
"/\\1.dtd");
156 pclass->
register_id(
"//CSTR EST//ENTITIES \\(.*\\)//[A-Z]*",
157 ESTLIBDIR
"/\\1.ent");
160 void EST_GenXML::register_id(
const EST_String pattern,
171 InputSource EST_GenXML::try_and_open(Entity ent)
177 EST_read_status EST_GenXML::read_xml(FILE *file,
183 (void)print_attributes;
184 GenXML_Parse_State state;
194 return read_format_error;
203 static
void ensure_relation(GenXML_Parse_State *state,
EST_String name)
205 if (state->rel!=NULL && name == state->relName)
208 state->rel = state->utt->create_relation(state->relName=name);
218 state->contents.add_item(
id, c);
223 if (c->relations.
present(state->relName))
235 sprintf(buf,
"%s%d", root, ++count);
246 val = attributes.
val(
"id");
247 #if defined(EST_DEBUGGING)
248 fprintf(stderr,
"ID %s\n", (
const char *)val);
252 else if (attributes.
present(
"href"))
254 val = attributes.
val(
"href");
255 int starts[EST_Regex_max_subexpressions];
256 int ends[EST_Regex_max_subexpressions];
258 if (val.
matches(simpleIDRegex, 0, starts, ends))
261 #if defined(EST_DEBUGGING)
262 fprintf(stderr,
"SIMPLE %s\n", (
const char *)n);
266 else if (val.
matches(rangeIDRegex, 0, starts, ends))
268 EST_String prefix1 = val.
at(starts[1], ends[1]-starts[1]);
269 int n1 = atoi(val.
at(starts[2], ends[2]-starts[2]));
270 EST_String postfix1 = val.
at(starts[4], ends[4]-starts[4]);
271 EST_String prefix2 = val.
at(starts[5], ends[5]-starts[5]);
272 int n2 = atoi(val.
at(starts[6], ends[6]-starts[6]));
273 EST_String postfix2 = val.
at(starts[8], ends[8]-starts[8]);
275 #if defined(EST_DEBUGGING)
276 fprintf(stderr,
"RANGE '%s' %d - '%s' // '%s' %d - '%s'\n",
277 (
const char *)prefix1,
279 (
const char *)postfix1,
280 (
const char *)prefix2,
282 (
const char *)postfix2
286 if (prefix1==prefix2)
293 if (postfix1.length()==0)
295 sprintf(buf,
"%s%s%d",
296 (
const char *)prefix1,
297 (
const char *)prefix2,
306 if (postfix2.length()>0)
307 for (; c<=atoi(postfix2); c++)
309 sprintf(buf,
"%s%s%d-%d",
310 (
const char *)prefix1,
311 (
const char *)prefix2,
320 for(
int i=n1; i<=n2; i++)
323 && postfix2.length()>0)
325 sprintf(buf,
"%s%s%d",
326 (
const char *)prefix1,
327 (
const char *)prefix2,
331 for (
int c=1; c<=atoi(postfix2); c++)
333 sprintf(buf,
"%s%s%d-%d",
334 (
const char *)prefix1,
335 (
const char *)prefix2,
344 if ( postfix1.length()>0)
345 sprintf(buf,
"%s%s%d-%s",
346 (
const char *)prefix1,
347 (
const char *)prefix2,
349 (
const char *)postfix1
352 sprintf(buf, "%s%s%d",
353 (const
char *)prefix1,
354 (const
char *)prefix2,
366 EST_warning("element with bad ID or HREF '%s'", (const
char *)val);
369 ids.append(make_new_id("n"));
380 for(them.
begin(attributes); them ; them++)
382 (
const char *)them->k,
383 (
const char *)them->v);
394 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
397 state->open_depth=-1;
398 state->rel_start_depth=-1;
399 state->depth_stack.clear();
410 (void)c; (void)p; (void)data;
413 static void proccess_features(
EST_String name,
419 int starts[EST_Regex_max_subexpressions];
420 int ends[EST_Regex_max_subexpressions];
422 int n = split(defs, names, MAX_FEATS, feat_sep);
423 for(
int i=0; i<n; i++)
429 if (def.
matches(featureDefRegex, 0, starts, ends))
431 feat = def.
at(starts[1], ends[1]-starts[1]);
432 attr = def.
at(starts[2], ends[2]-starts[2]);
443 printf(
"on %s got %s(%s)=%s\n", name,
459 (void)c; (void)p; (void)attributes; (void)name;
460 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
467 if (state->utt != NULL
469 proccess_features(name, val, attributes, state->utt->f);
472 if (state->rel != NULL
474 proccess_features(name, val, attributes, state->rel->f);
485 EST_warning(
"%s\nNo feature '%s' to name relation\n", get_error(p), (
const char *)val);
488 EST_String relationType = attributes.
val(
"estRelationTypeAttr");
490 ensure_relation(state, relName);
491 state->rel_start_depth=state->depth;
492 state->linear=(attributes.
val(relationType) ==
"linear"||
493 attributes.
val(relationType) ==
"list");
495 printf(
"start of relation depth=%d name=%s type=%s\n", state->depth, (
const char *)relName, state->linear?
"linear":
"tree");
498 else if ((state->rel_start_depth >= 0 &&
504 printf(
"push depth=%d name=%s ig=%s\n", state->depth, name, (
const char *)ig);
507 ensure_relation(state, val);
509 state->depth_stack.push(state->open_depth);
510 state->open_depth=state->depth;
516 extract_ids(attributes, ids);
521 switch (ids.length())
539 for(them.
begin(attributes); them ; them++)
546 cont->
f.
set(
"id",
id);
551 if (state->current == NULL)
552 item = state->rel->append();
554 item = state->current->insert_after();
555 else if (state->current == NULL)
556 if (state->parent == NULL)
557 item = state->rel->append();
559 item = state->parent->append_daughter();
561 if (state->parent == NULL)
562 item = state->current->insert_after();
564 item = state->parent->append_daughter();
566 item->set_contents(cont);
575 bool embed = (attributes.
val(
"estExpansion") ==
"embed");
578 state->id=make_new_id(
"e");
579 element_open(c, p, data, name, attributes);
584 for(; idp!= NULL; idp = idp->next())
591 element_close(c, p, data, name);
596 element_open(c, p, data, name, attributes);
601 element_close(c, p, data, name);
607 if (state->parent!=NULL)
608 state->contentAttr = attributes.
val(
"estContentFeature");
611 printf(
"\t current=%s parent=%s contA=%s\n",
612 (
const char *)state->current->name(),
613 (
const char *)state->parent->name(),
614 (
const char *)state->contentAttr);
630 (void)c; (void)p; (void)attributes;
631 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
634 element_open(c, p, data, name, attributes);
635 element_close(c, p, data, name);
644 (void)c; (void)p; (void)name;
645 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
650 if (state->depth == state->rel_start_depth )
653 printf(
"end of relation depth=%d name=%s\n", state->depth, name);
655 state->rel_start_depth=-1;
659 state->depth == state->open_depth)
662 printf(
"pop depth=%d name=%s\n", state->depth, name);
664 state->current = state->parent;
665 state->parent=
parent(state->parent);
666 state->open_depth = state->depth_stack.pop();
668 printf(
"\t current=%s parent=%s\n",
669 (
const char *)state->current->name(),
670 (
const char *)state->parent->name());
686 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
690 state->parent->set(state->contentAttr, chars);
693 printf(
"GEN XML Parser [pcdata[%s]] %d\n", chars, state->depth);
703 (void)c; (void)p; (void)data; (void)chars;
707 printf(
"GEN XML Parser [cdata[%s]] %d\n", chars, state->depth);
715 const char *instruction)
717 (void)c; (void)p; (void)instruction;
718 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
722 printf(
"GEN XML Parser [proc[%s]] %d\n", instruction, state->depth);
731 (void)c; (void)p; (void)data;
734 EST_error(
"GEN XML Parser %s", get_error(p));
742 #if defined(INSTANTIATE_TEMPLATES)
744 #include "../base_class/EST_THash.cc"
virtual void element_close(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name)
void clear()
remove everything in utterance
virtual void pcdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
virtual void document_close(XML_Parser_Class &c, XML_Parser &p, void *data)
virtual void element_open(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
A Regular expression class to go with the CSTR EST_String class.
void registered_ids(EST_TList< EST_String > &list)
void set(const EST_String &name, int ival)
EST_Item * root(const EST_Item *n)
return root node of treeprevious sibling (sister) of n
EST_Features f
General features for this item.
void register_id(EST_Regex id_pattern, EST_String directory)
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
InputSource try_and_open(Entity ent)
V & val(const K &key, int &found) const
int present(const K &key) const
Does the key have an entry?
virtual void element(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
void error(XML_Parser_Class &c, XML_Parser &p, void *data, EST_String message)
void track_context(bool flag)
const T & first() const
return const reference to first item in list
const int present(const K &rkey) const
Returns true if key is present.
virtual void cdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
void append(const T &item)
add item onto end of list
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
void begin(const Container &over)
Set the iterator ready to run over this container.
An open hash table. The number of buckets should be set to allow enough space that there are relative...
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
virtual void processing(XML_Parser_Class &c, XML_Parser &p, void *data, const char *instruction)
EST_String at(int from, int len=0) const
Return part at position.
EST_Item * parent(const EST_Item *n)
return parent of n
int matches(const char *e, int pos=0) const
Exactly match this string?
static const EST_String Empty
Constant empty string.
virtual void document_open(XML_Parser_Class &c, XML_Parser &p, void *data)