RAPP Platform API
 All Classes Namespaces Files Functions Variables Typedefs
speech_to_text.cpp
Go to the documentation of this file.
4 #include <boost/program_options.hpp>
5 #include <string>
6 #include <fstream>
7 #include <streambuf>
8 
9 namespace po = boost::program_options;
10 
11 // A helper function to simplify the main part.
12 template<class T>
13 std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
14 {
15  std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, " "));
16  return os;
17 }
18 
19 // load a JSGF text file to a string
20 std::string load_jsgf(const std::string filename)
21 {
22  std::ifstream t(filename);
23  if (!t.is_open())
24  throw std::runtime_error("could not open: "+filename);
25  std::string str;
26  t.seekg(0, std::ios::end);
27  str.reserve(t.tellg());
28  t.seekg(0, std::ios::beg);
29  str.assign((std::istreambuf_iterator<char>(t)),
30  std::istreambuf_iterator<char>());
31  return str;
32 }
33 
44 int main(int argc, char* argv[])
45 {
46  try
47  {
48  po::options_description desc("Allowed options - See headers `audio.hpp` and `speech_detection_sphinx4.hpp` for details");
49  desc.add_options()
50  ("help", "produce help message")
51  ("audio", po::value<std::string>(), "(required) the wav/pcm audio input")
52  ("audio-source", po::value<std::string>(), "(required) the audio source type")
53  ("lang", po::value<std::string>(), "(required) set language, e.g: `en` or `gr`")
54  ("user", po::value<std::string>(), "(required) set user, e.g: rapp")
55  ("words", po::value<std::vector<std::string>>()->multitoken(),
56  "(optional) keyword search, e.g: key book beer")
57  ("sentences", po::value<std::vector<std::string>>()->multitoken(),
58  "(optional) sentence matching, e.g: find my keys")
59  ("jsgf", po::value<std::string>(), "(optional) JSGF grammar file");
60 
61  po::positional_options_description p;
62  p.add("input-file", -1);
63 
64  po::variables_map vm;
65  po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
66  po::notify(vm);
67 
68  std::string token = "my_token";
69  int checks = 0;
70  std::string audio_file, audio_source, lang, user, jsgf = "";
71  std::vector<std::string> words = {}, sentences = {};
72 
73  if (vm.count("help")) {
74  std::cout << "Usage: options_description [options]\n";
75  std::cout << desc;
76  return 0;
77  }
78  if (vm.count("audio")) {
79  std::cout << "audio: " << vm["audio"].as<std::string>() << "\n";
80  checks++;
81  audio_file = vm["audio"].as<std::string>();
82  }
83  if (vm.count("audio-source")) {
84  std::cout << "audio-source: " << vm["audio-source"].as<std::string>() << "\n";
85  checks++;
86  audio_source = vm["audio-source"].as<std::string>();
87  }
88  if (vm.count("lang")) {
89  std::cout << "lang: " << vm["lang"].as<std::string>() << "\n";
90  checks++;
91  lang = vm["lang"].as<std::string>();
92  }
93  if (vm.count("user")) {
94  std::cout << "user: " << vm["user"].as<std::string>() << "\n";
95  checks++;
96  user = vm["user"].as<std::string>();
97  }
98  if (vm.count("words")) {
99  std::cout << "words: " << vm["words"].as<std::vector<std::string>>() << "\n";
100  words = vm["words"].as<std::vector<std::string>>();
101  }
102  if (vm.count("sentences")) {
103  std::cout << "sentences: " << vm["sentences"].as<std::vector<std::string>>() << "\n";
104  sentences = vm["sentences"].as<std::vector<std::string>>();
105  }
106  if (vm.count("jsgf")) {
107  std::cout << "JSGF: " << vm["jsgf"].as<std::string>() << "\n";
108  jsgf = vm["jsgf"].as<std::string>();
109  }
110 
111  // we have the required params set
112  if (checks == 4) {
114  std::shared_ptr<rapp::object::audio> audio;
115  std::vector<std::string> gram;
116 
117  if (audio_source == "microphone_wav")
118  audio = std::make_shared<rapp::object::microphone_wav>(audio_file);
119  else if (audio_source == "nao_single_channel_wav")
120  audio = std::make_shared<rapp::object::nao_single_channel_wav>(audio_file);
121  else if (audio_source == "nao_quad_channel_wav")
122  audio = std::make_shared<rapp::object::nao_quad_channel_wav>(audio_file);
123  else if (audio_source == "ogg")
124  audio = std::make_shared<rapp::object::ogg>(audio_file);
125  else
126  throw std::runtime_error("uknown audio source");
127 
128  assert(audio);
129  if (!jsgf.empty())
130  gram.push_back(load_jsgf(jsgf));
131 
132  if (audio) {
133  auto callback = [&](std::vector<std::string> words)
134  {
135  for (const auto & str : words)
136  std::cout << str << " ";
137  std::cout << std::endl;
138  };
139  auto sphinx4_call = std::make_shared<rapp::cloud::speech_detection_sphinx4>(audio, // audio file
140  lang, // Language
141  user, // user
142  gram, // grammar
143  words, // words
144  sentences, // sentences
145  callback,
146  token);
147  ctrl.run_job(sphinx4_call);
148  }
149  }
150  else {
151  std::cerr << "missing required arguments -- please see \"--help\"\n";
152  }
153  }
154  catch(std::exception & e) {
155  std::cerr << "error: " << e.what() << "\n";
156  return 1;
157  }
158  catch(...) {
159  std::cerr << "Exception of unknown type!\n";
160  }
161 
162  return 0;
163 }
Main class that controllers RAPP Services.
std::string load_jsgf(const std::string filename)
int main(int argc, char *argv[])
void run_job(const std::shared_ptr< asio_socket > job)
Run one service job.