50 #include "EST_Ngrammar.h"
52 static double fs_find_backoff_prob(
EST_Ngrammar *backoff_ngrams,
56 void Ngram_freqsmooth(
EST_Ngrammar &ngram,
int smooth_thresh1,
63 Good_Turing_smooth(ngram,smooth_thresh1);
65 fs_build_backoff_ngrams(backoff_ngrams,ngram);
67 fs_backoff_smooth(backoff_ngrams,ngram,smooth_thresh2);
69 delete [] backoff_ngrams;
73 void fs_build_backoff_ngrams(
EST_Ngrammar *backoff_ngrams,
80 for (i=0; i < ngram.order()-1; i++)
81 backoff_ngrams[i].init(i+1,EST_Ngrammar::dense,
82 *ngram.vocab,*ngram.pred_vocab);
84 for (i=0; i < ngram.num_states(); i++)
89 !ngram.p_states[i].pdf().
item_end(k);
94 ngram.p_states[i].pdf().
item_freq(k,name,freq);
96 for (j=0; j < ngram.order()-1; j++)
100 for (l=0; l < j; l++)
101 nnn[l] = words(ngram.order()-1-j);
102 backoff_ngrams[j].accumulate(nnn,freq);
119 if (ngram.representation() != EST_Ngrammar::dense)
121 cerr <<
"Ngrammar: can only ptsmooth dense ngrammars" << endl;
126 for (i=0; i < ngram.num_states(); i++)
128 if (ngram.p_states[i].pdf().
samples() < smooth_thresh)
131 occurs = ngram.p_states[i].pdf().
samples();
142 words[words.
n()-1] = name;
144 fs_find_backoff_prob(backoff_ngrams,
157 static double fs_find_backoff_prob(
EST_Ngrammar *backoff_ngrams,
170 for(i=0; i<order; i++)
171 nnn[order-1-i] = words(words.
n()-1-i);
173 if (backoff_ngrams[order-1].frequency(nnn) < smooth_thresh)
174 return fs_find_backoff_prob(backoff_ngrams,
175 order-1,words,smooth_thresh);
177 return backoff_ngrams[order-1].probability(nnn);
EST_Litem * item_next(EST_Litem *idx) const
Used for iterating through members of the distribution.
double samples(void) const
Total number of example found.
EST_Litem * item_start() const
Used for iterating through members of the distribution.
int item_end(EST_Litem *idx) const
Used for iterating through members of the distribution.
void resize(int n, int set=1)
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const
During iteration returns name and frequency given index.
INLINE int n() const
number of items in vector.
void set_frequency(const EST_String &s, double c)