Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pitchmark_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : 1997, 1998, 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* Pitchmarking program */
37 /*************************************************************************/
38 
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include "EST_unix.h"
43 #include "EST_cmd_line_options.h"
44 #include "EST_cmd_line.h"
45 #include "EST_speech_class.h"
46 #include "sigpr/EST_pitchmark.h"
47 
48 
49 void set_options(EST_Features &op, EST_Option &al);
50 
51 static EST_write_status save_msec(EST_Track &pm, EST_String filename);
52 static EST_write_status save_ogi_bin(EST_Track &pm, EST_String filename,
53  int sr);
54 void pm_to_label(EST_Track &pm, EST_Relation &lab);
55 
56 
57 /*void pm_to_label(EST_Track &pm, EST_Relation &lab);
58 void find_pm(EST_Wave &sig, EST_Track &pm);
59 
60 void pm_min_check(EST_Track &pm, float min);
61 void pm_sanity_check(EST_Track &pm, float new_end,
62  float max, float min, float def);
63 
64 void pm_fill(EST_Track &pm, float new_end, float max,
65  float min, float def);
66 
67 void pm_to_f0(EST_Track &pm, EST_Track &f0);
68 */
69 
70 
71 int main (int argc, char *argv[])
72 {
73  EST_Track pm;
74  EST_Wave lx;
75  EST_Option al;
76  EST_Features op;
77  EST_String out_file("-");
78  EST_StrList files;
79 
80  parse_command_line
81  (argc, argv,
82  EST_String("[input file] -o [output file] [options]")+
83  "Summary: pitchmark laryngograph (lx) files\n"
84  "use \"-\" to make input and output files stdin/out\n"
85  "-h Options help\n\n"+
86  options_wave_input()+
87  options_track_output()+
88  "-lx_lf <int> lx low frequency cutoff\n\n"
89  "-lx_lo <int> lx low order\n\n"
90  "-lx_hf <int> lx high frequency cutoff\n\n"
91  "-lx_ho <int> lx high order\n\n"
92  "-df_lf <int> df low frequeny cutoff\n\n"
93  "-df_lo <int> df low order\n\n"
94  "-med_o <int> median smoothing order\n\n"
95  "-mean_o <int> mean smoothing order\n\n"
96  "-inv Invert polarity of lx signal. Often the lx signal \n"
97  " is upside down. This option inverts the signal prior to \n"
98  " processing.\n\n"
99  "-fill Insert and remove pitchmarks according to min, max\n"
100  " and def period values. Often it is desirable to place limits\n"
101  " on the values of the pitchmarks. This option enforces a \n"
102  " minimum and maximum pitch period (specified by -man and -max).\n"
103  " If the maximum pitch setting is low enough, this will \n"
104  " esnure that unvoiced regions have evenly spaced pitchmarks \n\n"
105  "-min <float> Minimum allowed pitch period, in seconds\n\n"
106  "-max <float> Maximum allowed pitch period, in seconds\n\n"
107  "-def <float> Default pitch period in seconds, used for a guide\n"
108  " as to what length pitch periods should be in unvoiced \n"
109  " sections \n\n"
110  "-pm <ifile> Input is raw pitchmark file. This option is \n"
111  " used to perform filling operations on an already existing \n"
112  " set of pitchmarks \n\n"
113  "-f0 <ofile> Calculate F0 from pitchmarks and save to file\n\n"
114  "-end <float> Specify the end time of the last pitchmark, for use \n"
115  " with the -fill option\n\n"
116  "-wave_end Use the end of a waveform to specify when the \n"
117  " last pitchmark position should be. The waveform file is only \n"
118  " read to determine its end, no processing is performed\n\n"
119  "-inter Output intermediate waveforms. This will output the \n"
120  " signal at various stages of processing. Examination of these \n"
121  " waveforms is extremely useful in setting the parameters for \n"
122  " similar waveforms\n\n"
123  "-style <string> \"track\" or \"lab\"\n\n", files, al);
124 
125  set_options(op, al);
126 
127  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
128 
129  if (!al.present("-pm") || (al.present("-pm") && al.present("-wave_end")))
130  if (read_wave(lx, files.first(), al) != read_ok)
131  exit(-1);
132 
133  if (al.present("-pm"))
134  pm.load(al.val("-pm"));
135  else
136  {
137  if (al.present("-inv"))
138  invert(lx);
139  pm = pitchmark(lx, op);
140  }
141 
142  // this allows the end to be aligned with the end of a waveform
143  op.set("pm_end", lx.end());
144 
145  if (al.present("-f0"))
146  {
147  EST_Track f0;
148  pm_to_f0(pm, f0);
149  f0.save(al.val("-f0"));
150  }
151 
152  // various options for filling he gaps between distant pitchmarks
153  // and removing pitchmarks that are too close together
154 
155  if (al.present("-fill"))
156  {
157  pm_fill(pm, op.F("pm_end"), op.F("max_period"),
158  op.F("min_period"), op.F("def_period"));
159  pm_fill(pm, op.F("pm_end"), op.F("max_period"),
160  op.F("min_period"), op.F("def_period"));
161  }
162  else if (al.present("-min"))
163  pm_min_check(pm, al.fval("-min"));
164 
165  if (al.present("-style"))
166  {
167  // label format
168  if (al.val("-style").contains("lab"))
169  {
170  EST_Relation lab;
171  pm_to_label(pm, lab);
172  if (lab.save(out_file + ".pm_lab") != write_ok)
173  exit(-1);
174  }
175  // save file in "traditional" milli-second format
176  if (al.val("-style").contains("msec"))
177  save_msec(pm, out_file + ".pm");
178 
179  // ogi binary integer sample point format
180  if (al.val("-style").contains("ogi_bin"))
181  save_ogi_bin(pm, out_file + ".pmv", lx.sample_rate());
182  }
183  else if (pm.save(out_file, al.val("-otype", 0)) != write_ok)
184  {
185  cerr << "pitchmark: failed to write output to \""
186  << out_file << "\"" << endl;
187  exit(-1);
188  }
189  return 0;
190 }
191 
192 static EST_write_status save_msec(EST_Track &pm, EST_String filename)
193 {
194  ostream *outf;
195 
196  if (filename == "-")
197  outf = &cout;
198  else
199  outf = new ofstream(filename);
200 
201  if (!(*outf))
202  return write_fail;
203 
204  outf->precision(5);
205  outf->setf(ios::fixed, ios::floatfield);
206  outf->width(8);
207 
208  for (int i = 0; i < pm.num_frames(); ++i)
209  *outf << pm.t(i) * 1000.0 << endl;
210 
211  return write_ok;
212 }
213 
214 static EST_write_status save_ogi_bin(EST_Track &pm, EST_String filename, int sr)
215 {
216  int *d;
217  FILE *fp;
218  int i;
219 
220  d = new int[pm.num_frames()];
221 
222  for (i = 0; i < pm.num_frames(); ++i)
223  d[i] = int(pm.t(i) * (float) sr);
224 
225  if ((fp = fopen(filename, "wb")) == NULL)
226  return misc_write_error;
227 
228  if (fwrite(d, pm.num_frames(), sizeof(int), fp) != 1)
229  {
230  fclose(fp);
231  return misc_write_error;
232  }
233  delete d;
234 
235  return write_ok;
236 }
237 
238 void override_lib_ops(EST_Option &op, EST_Option &al)
239 {
240  op.override_ival("lx_low_frequency", 400);
241  op.override_ival("lx_low_order", 19);
242  op.override_ival("lx_high_frequency", 40);
243  op.override_ival("lx_high_order", 19);
244  op.override_ival("df_low_frequency", 1000);
245  op.override_ival("df_low_order", 19);
246  op.override_fval("min_period", 0.003);
247  op.override_fval("max_period", 0.02);
248  op.override_fval("def_period", 0.01);
249  op.override_fval("pm_end", -1.0);
250 
251  if (al.present("-lx_lf"))
252  op.override_ival("lx_low_frequency", al.ival("-lx_lf", 0));
253  if (al.present("-lx_lo"))
254  op.override_ival("lx_low_order", al.ival("-lx_lo", 0));
255  if (al.present("-lx_hf"))
256  op.override_ival("lx_high_frequency", al.ival("-lx_hf", 0));
257  if (al.present("-lx_ho"))
258  op.override_ival("lx_high_order", al.ival("-lx_ho", 0));
259  if (al.present("-med_o"))
260  op.override_ival("median_order", al.ival("-med_o", 0));
261  if (al.present("-mean_o"))
262  op.override_ival("mean_order", al.ival("-mean_o", 0));
263  if (al.present("-df_lf"))
264  op.override_ival("df_low_frequency", al.ival("-df_lf", 0));
265  if (al.present("-df_lo"))
266  op.override_ival("df_low_order", al.ival("-df_lo", 0));
267  if (al.present("-min"))
268  op.override_fval("min_period", al.fval("-min", 0));
269  if (al.present("-max"))
270  op.override_fval("max_period", al.fval("-max", 0));
271  if (al.present("-def"))
272  op.override_fval("def_period", al.fval("-def", 0));
273  if (al.present("-end"))
274  op.override_fval("pm_end", al.fval("-end", 0));
275  if (al.present("-inter"))
276  op.override_ival("pm_debug", 1);
277 }
278 
279 void set_options(EST_Features &op, EST_Option &al)
280 {
281  op.set("lx_low_frequency", LX_LOW_FREQUENCY);
282  op.set("lx_low_order", LX_LOW_ORDER);
283  op.set("lx_high_frequency", LX_HIGH_FREQUENCY);
284  op.set("lx_high_order", LX_HIGH_ORDER);
285  op.set("df_low_frequency", DF_LOW_FREQUENCY);
286  op.set("df_low_order", DF_LOW_ORDER);
287  op.set("min_period", MIN_PERIOD);
288  op.set("max_period", MAX_PERIOD);
289  op.set("def_period", DEF_PERIOD);
290  op.set("pm_end", PM_END);
291 
292  if (al.present("-lx_lf"))
293  op.set("lx_low_frequency", al.ival("-lx_lf", 0));
294  if (al.present("-lx_lo"))
295  op.set("lx_low_order", al.ival("-lx_lo", 0));
296  if (al.present("-lx_hf"))
297  op.set("lx_high_frequency", al.ival("-lx_hf", 0));
298  if (al.present("-lx_ho"))
299  op.set("lx_high_order", al.ival("-lx_ho", 0));
300  if (al.present("-med_o"))
301  op.set("median_order", al.ival("-med_o", 0));
302  if (al.present("-mean_o"))
303  op.set("mean_order", al.ival("-mean_o", 0));
304  if (al.present("-df_lf"))
305  op.set("df_low_frequency", al.ival("-df_lf", 0));
306  if (al.present("-df_lo"))
307  op.set("df_low_order", al.ival("-df_lo", 0));
308  if (al.present("-min"))
309  op.set("min_period", al.fval("-min", 0));
310  if (al.present("-max"))
311  op.set("max_period", al.fval("-max", 0));
312  if (al.present("-def"))
313  op.set("def_period", al.fval("-def", 0));
314  if (al.present("-end"))
315  op.set("pm_end", al.fval("-end", 0));
316  if (al.present("-inter"))
317  op.set("pm_debug", 1);
318 }
319 
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
void pm_fill(EST_Track &pm, float new_end, float max, float min, float def)
Definition: pitchmark.cc:153
float & t(int i=0)
return time position of frame i
Definition: EST_Track.h:477
int override_ival(const EST_String rkey, const int rval)
add to end of list or overwrite. If rval is empty, do nothing
Definition: EST_Option.cc:66
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:76
int override_fval(const EST_String rkey, const float rval)
add to end of list or overwrite. If rval is empty, do nothing
Definition: EST_Option.cc:56
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:98
void pm_min_check(EST_Track &pm, float min)
Definition: pitchmark.cc:137
void set(const EST_String &name, int ival)
Definition: EST_Features.h:186
const float F(const EST_String &path) const
Definition: EST_Features.h:136
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
Definition: EST_Track.cc:1309
EST_write_status save(const EST_String name, const EST_String EST_filetype="")
Definition: EST_Track.cc:1230
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:154
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
float end()
return the time position of the last sample.
Definition: EST_Wave.h:153
int sample_rate() const
return the sampling rate (frequency)
Definition: EST_Wave.h:147
int contains(const char *s, int pos=-1) const
Does it contain this substring?
Definition: EST_String.h:374
int num_frames() const
return number of frames in track
Definition: EST_Track.h:650
EST_write_status save(const EST_String &filename, bool evaluate_ff=false) const
EST_Track pitchmark(EST_Wave &lx, EST_Features &op)
Definition: pitchmark.cc:104