50 float (*local_cost_function)(
const EST_Item *item1,
54 bool (*local_pruning_function)(
const int i,
62 local_cost_function lcf,
63 local_pruning_function lpf,
69 local_cost_function lcf,
73 bool local_prune(
const int i,
const int j,
74 const int max_i,
const int max_j);
75 static void load_vocab(
const EST_String &vfile);
79 static bool show_cost=FALSE;
80 static int prune_width = 100;
95 float insertion_cost = 1;
96 float deletion_cost = 1;
97 float substitution_cost = 1;
108 int main(
int argc,
char **argv)
117 null_sym->set_name(
"<null>");
119 parse_command_line(argc, argv,
121 "dp <options> \"pattern 1\" \"pattern 2\"\n"+
122 "Find the best alignment of a pair of symbol sequences (e.g. word pronuciations).\n"+
123 "-vocab <string> file containing vocabulary\n"+
124 "-place_holder <string> which vocab item is the place holder (default is " + null_sym->name() +
" )\n"+
125 "-show_cost show cost of matching path\n"+
126 "-o <string> output file\n"+
127 "-p <int> 'beam' width\n"+
129 "-i <float> insertion cost\n"+
130 "-d <float> deletion cost\n"+
131 "-s <float> substitution cost\n"+
133 "-cost_matrix <string> file containing cost matrix\n",
139 load_vocab(al.
val(
"-vocab"));
142 cerr << argv[0] <<
": no vocab file specified" << endl;
147 prune_width = al.
ival(
"-p");
149 if (al.
present(
"-cost_matrix"))
153 cerr <<
"Can't have ins/del/subs costs as well as matrix !" << endl;
156 distance_measure=
"matrix";
157 cost_matrix.
load(al.
val(
"-cost_matrix"));
159 if(al.
present(
"-place_holder"))
160 null_sym->set_name(al.
val(
"-place_holder"));
164 cerr <<
"The place holder symbol '" << null_sym->name();
165 cerr <<
"' is not in the vocbulary !" << endl;
171 cerr <<
"Cost matrix number of columns must match vocabulary size !" << endl;
176 cerr <<
"Cost matrix number of rows must match vocabulary size !" << endl;
183 insertion_cost = al.
fval(
"-i");
184 deletion_cost = al.
fval(
"-d");
185 substitution_cost = al.
fval(
"-s");
189 cerr <<
"Must give either ins/del/subs costs or cost matrix !" << endl;
197 if(files.length() != 2)
199 cerr <<
"Must give 2 patterns !" << endl;
212 for(p=pattern1_l.head();p != 0; p=p->next())
216 cerr << pattern1_l(p) <<
" is not in the vocabulary !" << endl;
220 new_item.set_name(pattern1_l(p));
221 path1->append(&new_item);
224 for(p=pattern2_l.head();p != 0; p=p->next())
228 cerr << pattern2_l(p) <<
" is not in the vocabulary !" << endl;
232 new_item.set_name(pattern2_l(p));
233 path2->append(&new_item);
241 if(!dp_match(*path1,*path2,*match,
242 local_cost,local_prune,null_sym))
245 cerr <<
"No match could be found." << endl;
255 static void load_vocab(
const EST_String &vfile)
260 if (ts.
open(vfile) == -1)
262 cerr <<
"can't find vocab file \"" << vfile <<
"\"" << endl;
283 if(distance_measure ==
"simple")
285 if(s1->name() == s2->name())
290 return insertion_cost;
291 else if(s2 == null_sym)
292 return deletion_cost;
294 return substitution_cost;
306 bool local_prune(
const int i,
const int j,
307 const int max_i,
const int max_j)
313 float scale = (float)max_i / (
float)max_j;
315 float near_j = (float)i / scale;
316 float near_i = (float)j * scale;
325 if( (abs((
int)(near_i - (float)i)) > prune_width) ||
326 (abs((
int)(near_j - (
float)j)) > prune_width) )
EST_TokenStream & get(EST_Token &t)
get next token in stream
int num_columns() const
return number of columns
EST_write_status save(const EST_String &filename, const EST_String &type="est_ascii") const
void StrList_to_StrVector(EST_StrList &l, EST_StrVector &v)
Convert a list of strings to a vector of strings.
EST_Relation * create_relation(const EST_String &relname)
create a new relation called n.
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
int ival(const EST_String &rkey, int m=1) const
void close(void)
Close stream.
float fval(const EST_String &rkey, int m=1) const
int open(const EST_String &filename)
open a EST_TokenStream for a file.
INLINE int length() const
number of items in vector.
void StringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String to a EST_StrList by separating tokens in s delimited by the separator sep...
EST_read_status load(const EST_String &filename)
Load from file (ascii or binary as defined in file)
const int present(const K &rkey) const
Returns true if key is present.
EST_Token & peek(void)
peek at next token
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void append(const T &item)
add item onto end of list
int StrVector_index(const EST_StrVector &v, const EST_String &s)
Search the vector and return the position of the first occurance of string s in the vector...
int num_rows() const
return number of rows