40 #ifndef __EST_WFST_H__
41 #define __EST_WFST_H__
43 #include "EST_simplestats.h"
44 #include "EST_rw_status.h"
45 #include "EST_Option.h"
47 #include "EST_TVector.h"
48 #include "EST_THash.h"
50 #define wfst_error_msg(WMESS) (cerr << WMESS << endl,siod_error())
52 #define WFST_ERROR_STATE -1
70 { p_weight=t.p_weight; p_state=t.p_state;
71 p_in_symbol = t.p_in_symbol; p_out_symbol=t.p_out_symbol; }
73 { p_weight=w; p_state=s; p_in_symbol=i; p_out_symbol=o;}
75 float weight()
const {
return p_weight; }
76 int state()
const {
return p_state; }
77 int in_symbol()
const {
return p_in_symbol; }
78 int out_symbol()
const {
return p_out_symbol; }
79 void set_weight(
float f) { p_weight = f; }
80 void set_state(
int s) { p_state = s; }
85 enum wfst_state_type {wfst_final, wfst_nonfinal, wfst_error, wfst_licence};
89 #define WFST_NONFINAL 1
91 #define WFST_LICENCE 3
101 enum wfst_state_type p_type;
114 int name()
const {
return p_name; }
115 int num_transitions()
const {
return transitions.length(); }
116 enum wfst_state_type type()
const {
return p_type; }
117 void set_type(wfst_state_type t) { p_type = t; }
118 void set_tag(
int v) { p_tag = v;}
119 int tag()
const {
return p_tag;}
124 enum wfst_mstate_type {wfst_ms_set, wfst_ms_list};
134 enum wfst_mstate_type p_type;
137 { p_name = -1; p_weight = 0.0; p_type = wfst_ms_set; }
139 { p_name = -1; p_weight = 0.0; p_type = ty; }
140 int name()
const {
return p_name; }
141 void set_name(
int i) { p_name = i; }
142 float weight()
const {
return p_weight; }
143 void set_weight(
float w) { p_weight = w; }
144 void set_type(
enum wfst_mstate_type s) { p_type = s; }
145 enum wfst_mstate_type type()
const {
return p_type; }
164 int operator_and(LISP l);
165 int operator_or(LISP l);
166 int operator_star(LISP l);
167 int operator_plus(LISP l);
168 int operator_optional(LISP l);
169 int operator_not(LISP l);
170 int terminal(LISP l);
174 void extend_alphabets(
const EST_WFST &b);
176 EST_read_status load_transitions_from_lisp(
int s, LISP trans);
177 void more_states(
int new_max);
179 int can_reach_final(
int state);
180 static int traverse_tag;
194 void init(
int init_num_states=10);
196 void init(LISP in, LISP out);
205 int num_states()
const {
return p_num_states; }
206 int start_state()
const {
return p_start_state; }
209 {
return p_in_symbols.
name(s); }
212 {
return p_in_symbols.
name(i); }
215 {
return p_out_symbols.
name(s); }
218 {
return p_out_symbols.
name(i); }
230 int final(
int i)
const
231 {
return ((i != WFST_ERROR_STATE) && (
state(i)->type() == wfst_final));}
244 EST_write_status save_binary(FILE *fd);
248 EST_read_status load_binary(FILE *fd,
257 int transition(
int state,
int in,
int out)
const;
258 int transition(
int state,
int in,
int out,
float &prob)
const;
266 int transduce(
int state,
int in,
int &out)
const;
291 int add_state(
enum wfst_state_type state_type);
296 void build_wfst(
int start,
int end,LISP regex);
303 void build_from_regex(LISP inalpha, LISP outalpha, LISP regex);
305 void kkrule_compile(LISP inalpha, LISP outalpha, LISP fp,
306 LISP rule, LISP sets);
308 void build_from_rg(LISP inalpha, LISP outalpha,
309 LISP distinguished, LISP rewrites,
310 LISP sets, LISP terms,
313 void build_tree_lex(LISP inalpha, LISP outalpha,
360 int in,
int out)
const;
384 void kkcompile(LISP ruleset,
EST_WFST &all_wfst);
386 void ltscompile(LISP lts_rules,
EST_WFST &all_wfst);
388 void rgcompile(LISP rg,
EST_WFST &all_wfst);
390 void tlcompile(LISP rg,
EST_WFST &all_wfst);
398 int recognize_for_perplexity(
const EST_WFST &wfst,
403 int recognize_for_perplexity(
const EST_WFST &wfst,
410 VAL_REGISTER_CLASS_DCLS(wfst,
EST_WFST)
LISP epsilon_label() const
LISP for on epsilon symbols.
void minimize(const EST_WFST &a)
Build minimized form of a.
void complement(const EST_WFST &a)
Build complement of a.
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.
int deterministic() const
True if WFST is deterministic.
int transduce(int state, int in, int &out) const
Transduce in to out from state.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
an internal class for EST_WFST for representing transitions in an WFST
a call representing a weighted finite-state transducer
void clear()
clear removing existing states if any
EST_WFST(const EST_WFST &wfst)
?
void build_or_transition(int start, int end, LISP disjunctions)
Basic disjunction constructor.
int out_epsilon() const
Internal index for output epsilon.
const EST_String & name(const int n) const
The name given the index.
EST_WFST & operator=(const EST_WFST &a)
?
void transition_all(int state, int in, int out, EST_WFST_MultiState *ms) const
Find all possible transitions for given state/input/output.
void intersection(EST_TList< EST_WFST > &wl)
A specialised hash table for when the key is an EST_String.
void determinize(const EST_WFST &a)
Build determinized form of a.
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
void start_cumulate()
Clear and start cumulation.
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
const EST_Discrete & out_symbols() const
Accessing the output alphabet.
int cumulate() const
Cumulation condition.
an internal class to EST_WFST used in holding multi-states when determinizing and find the intersecti...
const EST_Discrete & in_symbols() const
Accessing the input alphabet.
an internal class for EST_WFST used to represent a state in a WFST
int out_symbol(const EST_String &s) const
Map output symbol to output alphabet index.
void compose(const EST_WFST &a, const EST_WFST &b)
const EST_WFST_State * state(int i) const
Return internal state information.
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
const EST_String & in_symbol(int i) const
Map input alphabet index to input symbol.
void concat(const EST_WFST &a, const EST_WFST &b)
void uunion(EST_TList< EST_WFST > &wl)
void add_epsilon_reachable(EST_WFST_MultiState *ms) const
Extend multi-state with epsilon reachable states.
int in_epsilon() const
Internal index for input epsilon.
void build_wfst(int start, int end, LISP regex)
Basic regex constructor.
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
enum wfst_state_type ms_type(EST_WFST_MultiState *ms) const
Given a multi-state return type (final, ok, error)
EST_WFST_State * state_non_const(int i)
Return internal state information (non-const)
void remove_error_states(const EST_WFST &a)
Remove error states from the WFST.
EST_WFST_MultiState * apply_multistate(const EST_WFST &wfst, EST_WFST_MultiState *ms, int in, int out) const
Transduce a multi-state given n and out.
void init(int init_num_states=10)
Clear with (estimation of number of states required)
void build_and_transition(int start, int end, LISP conjunctions)
Basic conjunction constructor.
void difference(const EST_WFST &a, const EST_WFST &b)
const EST_String & out_symbol(int i) const
Map output alphabet index to output symbol.
void copy(const EST_WFST &wfst)
Copy from existing wfst.
EST_read_status load(const EST_String &filename)
?