api/html/speech__to__text_8cpp_source.html

 #include "cloud/service_controller/service_controller.hpp"

 #include "cloud/speech/speech_detection_sphinx4/speech_detection_sphinx4.hpp"

 #include "objects/audio/audio.hpp"

 #include <boost/program_options.hpp>

 #include <string>

 #include <fstream>

 #include <streambuf>


 namespace po = boost::program_options;


 // A helper function to simplify the main part.

 template<class T>

 std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)

 {

     std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, " "));

     return os;

 }


 // load a JSGF text file to a string

 std::string load_jsgf(const std::string filename)

 {

     std::ifstream t(filename);

     if (!t.is_open())

         throw std::runtime_error("could not open: "+filename);

     std::string str;

     t.seekg(0, std::ios::end);

     str.reserve(t.tellg());

     t.seekg(0, std::ios::beg);

     str.assign((std::istreambuf_iterator<char>(t)),

                 std::istreambuf_iterator<char>());

     return str;

 }


 int main(int argc, char* argv[])

 {

     try

     {

         po::options_description desc("Allowed options - See headers `audio.hpp` and `speech_detection_sphinx4.hpp` for details");

         desc.add_options()

         ("help", "produce help message")

         ("audio", po::value<std::string>(), "(required) the wav/pcm audio input")

         ("audio-source", po::value<std::string>(), "(required) the audio source type")

         ("lang", po::value<std::string>(), "(required) set language, e.g: `en` or `gr`")

         ("user", po::value<std::string>(), "(required) set user, e.g: rapp")

         ("words", po::value<std::vector<std::string>>()->multitoken(),

                   "(optional) keyword search, e.g: key book beer")

         ("sentences", po::value<std::vector<std::string>>()->multitoken(),

                    "(optional) sentence matching, e.g: find my keys")

         ("jsgf", po::value<std::string>(), "(optional) JSGF grammar file");


         po::positional_options_description p;

         p.add("input-file", -1);


         po::variables_map vm;

         po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);

         po::notify(vm);


         std::string token = "my_token";

         int checks = 0;

         std::string audio_file, audio_source, lang, user, jsgf = "";

         std::vector<std::string> words = {}, sentences = {};


         if (vm.count("help")) {

             std::cout << "Usage: options_description [options]\n";

             std::cout << desc;

             return 0;

         }

         if (vm.count("audio")) {

             std::cout << "audio: " << vm["audio"].as<std::string>() << "\n";

             checks++;

             audio_file = vm["audio"].as<std::string>();

         }

         if (vm.count("audio-source")) {

             std::cout << "audio-source: " << vm["audio-source"].as<std::string>() << "\n";

             checks++;

             audio_source = vm["audio-source"].as<std::string>();

         }

         if (vm.count("lang")) {

             std::cout << "lang: " << vm["lang"].as<std::string>() << "\n";

             checks++;

             lang = vm["lang"].as<std::string>();

         }

         if (vm.count("user")) {

             std::cout << "user: " << vm["user"].as<std::string>() << "\n";

             checks++;

             user = vm["user"].as<std::string>();

         }

         if (vm.count("words")) {

             std::cout << "words: " << vm["words"].as<std::vector<std::string>>() << "\n";

             words =  vm["words"].as<std::vector<std::string>>();

         }

         if (vm.count("sentences")) {

             std::cout << "sentences: " << vm["sentences"].as<std::vector<std::string>>() << "\n";

             sentences = vm["sentences"].as<std::vector<std::string>>();

         }

         if (vm.count("jsgf")) {

             std::cout << "JSGF: " << vm["jsgf"].as<std::string>() << "\n";

             jsgf =  vm["jsgf"].as<std::string>();

         }


         // we have the required params set

         if (checks == 4) {

             rapp::cloud::service_controller ctrl;

             std::shared_ptr<rapp::object::audio> audio;

             std::vector<std::string> gram;


             if (audio_source == "microphone_wav")

                 audio = std::make_shared<rapp::object::microphone_wav>(audio_file);

             else if (audio_source == "nao_single_channel_wav")

                 audio = std::make_shared<rapp::object::nao_single_channel_wav>(audio_file);

             else if (audio_source == "nao_quad_channel_wav")

                 audio = std::make_shared<rapp::object::nao_quad_channel_wav>(audio_file);

             else if (audio_source == "ogg")

                 audio = std::make_shared<rapp::object::ogg>(audio_file);

             else

                 throw std::runtime_error("uknown audio source");


             assert(audio);

             if (!jsgf.empty())

                 gram.push_back(load_jsgf(jsgf));


             if (audio) {

                 auto callback = [&](std::vector<std::string> words)

                                 {

                                     for (const auto & str : words)

                                         std::cout << str << " ";

                                     std::cout << std::endl;

                                 };

                 auto sphinx4_call = std::make_shared<rapp::cloud::speech_detection_sphinx4>(audio,        // audio file

                                                                                             lang,         // Language

                                                                                             user,         // user

                                                                                             gram,         // grammar

                                                                                             words,        // words

                                                                                             sentences,    // sentences

                                                                                             callback,

                                                                                             token);

                 ctrl.run_job(sphinx4_call);

             }

         }

         else {

             std::cerr << "missing required arguments -- please see \"--help\"\n";

         }

     }

     catch(std::exception & e) {

         std::cerr << "error: " << e.what() << "\n";

         return 1;

     }

     catch(...) {

         std::cerr << "Exception of unknown type!\n";

     }


     return 0;

 }

audio.hpp

service_controller.hpp

rapp::cloud::service_controller
Main class that controllers RAPP Services.
Definition: service_controller.hpp:20

speech_detection_sphinx4.hpp

load_jsgf
std::string load_jsgf(const std::string filename)
Definition: speech_to_text.cpp:20

main
int main(int argc, char *argv[])
Definition: speech_to_text.cpp:44

rapp::cloud::service_controller::run_job
void run_job(const std::shared_ptr< asio_socket > job)
Run one service job.
Definition: service_controller.cpp:9