RAPP Platform  v0.6.0
RAPP Platform is a collection of ROS nodes and back-end processes that aim to deliver ready-to-use generic services to robots
 All Classes Namespaces Files Functions Variables Macros
sphinx4_wrapper.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- encode: utf-8 -*-
3 
4 #Copyright 2015 RAPP
5 
6 #Licensed under the Apache License, Version 2.0 (the "License");
7 #you may not use this file except in compliance with the License.
8 #You may obtain a copy of the License at
9 
10  #http://www.apache.org/licenses/LICENSE-2.0
11 
12 #Unless required by applicable law or agreed to in writing, software
13 #distributed under the License is distributed on an "AS IS" BASIS,
14 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 #See the License for the specific language governing permissions and
16 #limitations under the License.
17 
18 # Authors: Athanassios Kintsakis, Manos Tsardoulias
19 # contact: akintsakis@issel.ee.auth.gr, etsardou@iti.gr
20 
21 import sys
22 import subprocess
23 import socket
24 import time
25 import os
26 from global_parameters import GlobalParams
27 import rospy
28 
29 from rapp_utilities import RappUtilities
30 from rapp_exceptions import RappError
31 
32 from rapp_platform_ros_communications.srv import(
33  AudioProcessingDenoiseSrv,
34  AudioProcessingDenoiseSrvRequest,
35  AudioProcessingDetectSilenceSrv,
36  AudioProcessingDetectSilenceSrvRequest,
37  AudioProcessingTransformAudioSrv,
38  AudioProcessingTransformAudioSrvResponse,
39  AudioProcessingTransformAudioSrvRequest
40  )
41 
42 ## @class Sphinx4Wrapper
43 # @brief Contains the Sphinx subprocess and is responsible for configuring Sphinx and performing the recognition request.
44 #
45 # Initializes a Sphinx.java subprocess and creates an IPC using sockets.
46 # It is responsible for interacting with Sphinx via the socket to send
47 # configuration params/instructions and initialize a recognition procedure.
49 
50  ## Constructor
51  # Initiates service clients
52  def __init__(self):
53  ## Contains global Sphinx parameters
54  #
55  # (see global_parameters.GlobalParams)
56  self._globalParams = GlobalParams()
57 
58  ## Sphinx configuration
59  self._conf = ''
60  ## Sphinx status flag
61  self._sphinxDied = False
62 
63  ## The IPC socket
64  self._sphinx_socket = None
65  ## The IPC socket port
66  self._sphinx_socket_PORT = None
67 
68  ## The Sphinx subprocess
69  self._sphinxSubprocess = None
70 
71  # Denoise service topic name
72  denoise_topic = rospy.get_param("rapp_audio_processing_denoise_topic")
73  # Energy denoise service topic name
74  energy_denoise_topic = \
75  rospy.get_param("rapp_audio_processing_energy_denoise_topic")
76  # Detect silence service topic name
77  detect_silence_topic = \
78  rospy.get_param("rapp_audio_processing_detect_silence_topic")
79  # Transform audio service topic name
80  audio_trans_topic = \
81  rospy.get_param("rapp_audio_processing_transform_audio_topic")
82 
83  if(not denoise_topic):
84  rospy.logerror("Audio processing denoise topic not found")
85  if(not energy_denoise_topic):
86  rospy.logerror("Audio processing energy denoise topic not found")
87  if(not detect_silence_topic):
88  rospy.logerror("Audio processing detect silence topic not found")
89  if(not audio_trans_topic):
90  rospy.logerror("Audio processing transform audio topic not found")
91 
92  ## @brief Denoise service client
93  #
94  # rapp_audio_processing.rapp_audio_processing.AudioProcessing::denoise
95  self._denoise_service = rospy.ServiceProxy(\
96  denoise_topic, AudioProcessingDenoiseSrv)
97 
98  ## @brief Energy denoise service client
99  #
100  # rapp_audio_processing.rapp_audio_processing.AudioProcessing::energy_denoise
101  self._energy_denoise_service = rospy.ServiceProxy(\
102  energy_denoise_topic, AudioProcessingDenoiseSrv)
103 
104  ## @brief Detect silence service client
105  #
106  # rapp_audio_processing.rapp_audio_processing.AudioProcessing::detect_silence
107  self._detect_silence_service = rospy.ServiceProxy(\
108  detect_silence_topic, AudioProcessingDetectSilenceSrv)
109 
110  ## @brief Transform audio service client
111  #
112  # rapp_audio_processing.rapp_audio_processing.AudioProcessing::transform_audio
113  self._audio_transform_srv = rospy.ServiceProxy( \
114  audio_trans_topic, AudioProcessingTransformAudioSrv )
115 
116  ## Contains the absolute path for the Sphinx jar file
117  self._jar_path = ".:" + self._globalParams._sphinx_jar_files_url + \
118  "/" + self._globalParams._sphinx_jar_file + ":" \
119  + self._globalParams._sphinx_package_url + "/src"
120 
122 
123  ## Helper function for getting input from IPC with Sphinx subprocess
124  #
125  # @return line [string] A buffer read from socket
126  def _readLine(self):
127  line = self.socket_connection.recv(1024)
128  if self._globalParams._allow_sphinx_output == True:
129  RappUtilities.rapp_print( line )
130  return line
131 
132  ## Perform Sphinx4 initialization
133  # Initiates Sphinx subprocess, sets up socket IPC and configures Sphinx subprocess
134  #
135  # @param conf [dictionary] Contains the configuration parameters
136  def _initializeSphinxProcess(self, conf = None):
137 
138  RappUtilities.rapp_print('Initializing Sphinx subprocess')
139  RappUtilities.rapp_print(self._jar_path)
140 
141  RappUtilities.rapp_print('Setting up socket IPC')
142  self._createSocket()
143 
144  RappUtilities.rapp_print('Forking subprocess')
145  if self._globalParams._allow_sphinx_output == True:
146  self._sphinxSubprocess = subprocess.Popen( \
147  ["java", "-cp", self._jar_path, "Sphinx4", \
148  str(self._sphinx_socket_PORT)] )
149  else:
150  try:
151  from subprocess import DEVNULL
152  except ImportError:
153  DEVNULL = open(os.devnull, 'wb')
154 
155  self._sphinxSubprocess = subprocess.Popen( \
156  ["java", "-cp", self._jar_path, "Sphinx4", \
157  str(self._sphinx_socket_PORT)], \
158  stdout = DEVNULL, stderr = DEVNULL )
159 
160  RappUtilities.rapp_print('Awaiting socket connection')
161  self.socket_connection, addr = self._sphinx_socket.accept()
162 
163  if conf != None:
164  self.configureSphinx( conf )
165 
166  ## Creates socket IPC between self and Sphinx subprocess
167  # Creates the socket server with a system provided port, which is pass as an
168  # argument to the created subprocess.
169  def _createSocket(self):
170  HOST = self._globalParams._socket_host
171  self._sphinx_socket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) # Create Unix Socket
172  self._sphinx_socket.setsockopt( socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
173  self._sphinx_socket.bind( (HOST, 0) )
174  self._sphinx_socket_PORT = self._sphinx_socket.getsockname()[1]
175  self._sphinx_socket.listen( 1 )
176  RappUtilities.rapp_print('Socket created. PORT: ' + \
177  str(self._sphinx_socket_PORT))
178 
179  ## Perform Sphinx4 configuration
180  #
181  # @param conf [dictionary] Contains the configuration parameters
182  def configureSphinx(self, conf):
183  self._conf = conf
184  self.socket_connection.sendall("configurationPath#" + conf['configuration_path'] + '\r\n')
185  self._readLine()
186  self.socket_connection.sendall("acousticModel#" + conf['acoustic_model'] + '\r\n')
187  self._readLine()
188  self.socket_connection.sendall("grammarName#" + conf['grammar_name'] + "#" + \
189  conf['grammar_folder'] + '\r\n')
190  self._readLine()
191  self.socket_connection.sendall("dictionary#" + conf['dictionary'] + '\r\n')
192  self._readLine()
193  self.socket_connection.sendall("languageModel#" + conf['language_model'] + '\r\n')
194  self._readLine()
195  if(conf['grammar_disabled']):
196  self.socket_connection.sendall("disableGrammar#\r\n")
197  else:
198  self.socket_connection.sendall("enableGrammar#\r\n")
199  self._readLine()
200  self.socket_connection.sendall("forceConfiguration#\r\n")
201  self._readLine()
202 
203  ## Creates audio profile based on the audio type for processing purposes.
204  # Defines a set of audio processing procedures (i.e. denoising) to be
205  # performed on the audio file and the parameters of the procedures.
206  # Aims to improve to audio file quality to improve speech recognition results.
207  #
208  # @param audio_type [string] The audio type
209  #
210  # @return processingProfile [dictionary] The profile attributes
211  def _createProcessingProfile(self, audio_type):
212  processingProfile = {}
213  processingProfile['sox_transform'] = False
214  processingProfile['sox_channels_and_rate'] = False
215  processingProfile['sox_denoising'] = False
216  processingProfile['sox_denoising_scale'] = 0.0
217  processingProfile['detect_silence'] = False
218  processingProfile['detect_silence_threshold'] = 0.0
219  processingProfile['energy_denoising'] = False
220  processingProfile['energy_denoising_init_scale'] = 0.0
221 
222  if audio_type == "headset":
223  pass
224  elif audio_type == "nao_ogg":
225  processingProfile['sox_transform'] = True
226  processingProfile['sox_denoising'] = True
227  processingProfile['sox_denoising_scale'] = 0.15
228  processingProfile['detect_silence'] = True
229  processingProfile['detect_silence_threshold'] = 3.0
230  processingProfile['energy_denoising'] = True
231  processingProfile['energy_denoising_init_scale'] = 0.125
232  elif audio_type == "nao_wav_4_ch":
233  processingProfile['sox_channels_and_rate'] = True
234  processingProfile['sox_denoising'] = True
235  processingProfile['sox_denoising_scale'] = 0.15
236  processingProfile['detect_silence'] = True
237  processingProfile['detect_silence_threshold'] = 3.0
238  processingProfile['energy_denoising'] = True
239  processingProfile['energy_denoising_init_scale'] = 0.125
240  elif audio_type == "nao_wav_1_ch":
241  processingProfile['sox_denoising'] = True
242  processingProfile['sox_denoising_scale'] = 0.15
243  processingProfile['detect_silence'] = True
244  processingProfile['detect_silence_threshold'] = 3.0
245  processingProfile['energy_denoising'] = True
246  processingProfile['energy_denoising_init_scale'] = 0.125
247  elif audio_type == "nao_wav_1_ch_denoised":
248  processingProfile['detect_silence'] = True
249  processingProfile['detect_silence_threshold'] = 3.0
250  processingProfile['energy_denoising'] = True
251  processingProfile['energy_denoising_init_scale'] = 0.125
252  elif audio_type == "nao_wav_1_ch_only_sox":
253  processingProfile['sox_denoising'] = True
254  processingProfile['sox_denoising_scale'] = 0.15
255  processingProfile['detect_silence'] = True
256  processingProfile['detect_silence_threshold'] = 3.0
257  elif audio_type == "nao_wav_1_ch_denoised_only_sox":
258  processingProfile['detect_silence'] = True
259  processingProfile['detect_silence_threshold'] = 3.0
260 
261  return processingProfile
262 
263 
264  ## Performs the speech recognition and returns a list of words
265  #
266  # @param audio_file [string] The audio file's name
267  # @param audio_type [string] The audio file's type
268  #
269  # @returns words [list::string] The result words
270  # @exception RappError Audio transformation error
271  def performSpeechRecognition(self, audio_file, audio_type, user):
272  # Check if path exists
273  if os.path.isfile(audio_file) == False:
274  return ["Error: Something went wrong with the local audio storage\
275  Requested path: " + audio_file]
276 
277  # Keep extra audio files that need erasing
278  audio_to_be_erased = []
279 
280  # If it is an .ogg file (from NAO) recode it into .wav
281  next_audio_file = audio_file
282  prev_audio_file = next_audio_file
283 
284  audio_file_folder = os.path.dirname(audio_file)
285  if audio_file_folder[-1] != "/":
286  audio_file_folder += "/"
287 
288  # Check that the audio_type is legit
289  if audio_type not in [\
290  "headset", \
291  "nao_ogg", \
292  "nao_wav_4_ch", \
293  "nao_wav_1_ch",\
294  "nao_wav_1_ch_denoised", \
295  "nao_wav_1_ch_only_sox", \
296  "nao_wav_1_ch_denoised_only_sox"\
297  ]:
298  return ["Error: Audio source unrecognized"]
299 
300  # Get processing profile
301  profile = self._createProcessingProfile(audio_type)
302 
303  transform_req = AudioProcessingTransformAudioSrvRequest()
304  transform_req.source_type = audio_type
305  transform_req.source_name = prev_audio_file
306  transform_req.target_type = 'wav'
307 
308  # Check if sox_transform is needed
309  if profile['sox_transform'] == True:
310  next_audio_file += "_transformed.wav"
311  transform_req.target_name = next_audio_file
312 
313  trans_response = self._audio_transform_srv( transform_req )
314 
315  if trans_response.error != 'success':
316  return [ 'Audio transformation error: ' + trans_response.error ]
317  #raise RappError( 'Audio transformation error: ' + error )
318 
319  audio_to_be_erased.append(next_audio_file)
320  prev_audio_file = next_audio_file
321  if profile['sox_channels_and_rate'] == True:
322  next_audio_file += "_mono16k.wav"
323  transform_req.target_name = next_audio_file
324  transform_req.target_channels = 1
325  transform_req.target_rate = 16000
326 
327  trans_response = self._audio_transform_srv( transform_req )
328 
329  if trans_response.error != 'success':
330  return [ 'Audio transformation error: ' + trans_response.error ]
331  #raise RappError( 'Audio transformation error: ' + error )
332  audio_to_be_erased.append(next_audio_file)
333  prev_audio_file = next_audio_file
334  if profile['sox_denoising'] == True:
335  next_audio_file = prev_audio_file + "_denoised.wav"
336  den_request = AudioProcessingDenoiseSrvRequest()
337  den_request.audio_file = prev_audio_file
338  den_request.denoised_audio_file = next_audio_file
339  den_request.audio_type = audio_type
340  den_request.user = user
341  den_request.scale = profile['sox_denoising_scale']
342  den_response = self._denoise_service(den_request)
343  if den_response.success != "true":
344  return ["Error:" + den_response.success]
345  audio_to_be_erased.append(next_audio_file)
346  prev_audio_file = next_audio_file
347  if profile['detect_silence'] == True:
348  # Detect silence
349  silence_req = AudioProcessingDetectSilenceSrvRequest()
350  silence_req.audio_file = prev_audio_file
351  silence_req.threshold = profile['detect_silence_threshold']
352  silence_res = self._detect_silence_service(silence_req)
353  RappUtilities.rapp_print("Silence detection results: " + str(silence_res))
354  if silence_res.silence == "true":
355  return ["Error: No speech detected. RSD = " + str(silence_res.level)]
356 
357  tries = 0
358  while tries < 2:
359  # Perform energy denoising as well
360  if profile['energy_denoising'] == True:
361  next_audio_file = prev_audio_file + "_energy_denoised.wav"
362  dres = self._performEnergyDenoising(next_audio_file, prev_audio_file, \
363  profile['energy_denoising_init_scale'] + tries * 0.125)
364  if dres != "true":
365  return ["Error:" + dres]
366  audio_to_be_erased.append(next_audio_file)
367  prev_audio_file = next_audio_file
368 
369  new_audio_file = next_audio_file
370  words = self._callSphinxJava(new_audio_file)
371  if self._sphinxDied == True:
372  self._sphinxDied = False
373  break
374 
375  if len(words) == 0 or (len(words) == 1 and words[0] == ""):
376  tries += 1
377  else:
378  break
379 
380  for f in audio_to_be_erased:
381  try:
382  os.remove(f)
383  except OSError as e:
384  return ["Error: Server rm malfunctioned"]
385 
386  return words
387 
388  ## Perform energy denoise.
389  # Calls energy denoise service
390  # rapp_audio_processing.rapp_audio_processing.AudioProcessing::energy_denoise
391  # (see also rapp_audio_processing.rapp_energy_denoise.EnergyDenoise::energyDenoise)
392  #
393  # @param audio_file [string] The audio file path
394  # @param scale [float] The scale parameter for the denoising procedure
395  #
396  # @returns file_path [string] THe path of the denoised file
397  def _performEnergyDenoising(self, next_audio_file, audio_file, scale):
398  energy_denoise_req = AudioProcessingDenoiseSrvRequest()
399  energy_denoise_req.audio_file = audio_file
400  energy_denoise_req.denoised_audio_file = next_audio_file
401  energy_denoise_req.scale = scale
402  energy_denoise_res = self._energy_denoise_service(energy_denoise_req)
403  return energy_denoise_res.success
404 
405  ## Communicate with Sphinx subprocess to initiate recognition and fetch results.
406  #
407  # @param audio_file [string] The audio file path
408  #
409  # @return words [list::string] The Sphinx result
410  def _callSphinxJava(self, audio_file):
411  self.socket_connection.sendall("start\r\n")
412  self.socket_connection.sendall("audioInput#" + audio_file + "\r\n")
413  start_time = time.time()
414  words = []
415  while(True):
416  line = self._readLine()
417  self.socket_connection.sendall('Read line\r\n')
418  if(len(line)>0):
419  if(line[0]=="#"):
420  stripped_down_line = line[1:-1].split(" ")
421  for word in stripped_down_line:
422  words.append(word)
423  if("stopPython\n" in line):
424  break
425  if("CatchedException" in line):
426  rospy.logerr(line)
427  self._respawnSphinx()
428  self._sphinxDied = True
429  return words
430 
431  if (time.time() - start_time > 10):
432  words.append("Error: Time out error")
433  break
434  return words
435 
436  ## Respawns Sphinx subprocess, if it terminates abruptly
437  def _respawnSphinx(self):
438  #rospy.logwarn("Respawning sphinx")
439  self._sphinxSubprocess.kill()
440  time.sleep(2)
441  self._initializeSphinxProcess( self._conf )
442  #rospy.logwarn("Respawned sphinx")
443 
444 
def _readLine
Helper function for getting input from IPC with Sphinx subprocess.
_jar_path
Contains the absolute path for the Sphinx jar file.
def __init__
Constructor Initiates service clients.
Contains the Sphinx subprocess and is responsible for configuring Sphinx and performing the recogniti...
def performSpeechRecognition
Performs the speech recognition and returns a list of words.
def _createProcessingProfile
Creates audio profile based on the audio type for processing purposes.
def _respawnSphinx
Respawns Sphinx subprocess, if it terminates abruptly.
def _createSocket
Creates socket IPC between self and Sphinx subprocess Creates the socket server with a system provide...
def _callSphinxJava
Communicate with Sphinx subprocess to initiate recognition and fetch results.
def _initializeSphinxProcess
Perform Sphinx4 initialization Initiates Sphinx subprocess, sets up socket IPC and configures Sphinx ...
std::vector< std::string > split(std::string str, std::string sep)
Splits string by delimiter.