RAPP Platform  v0.6.0
RAPP Platform is a collection of ROS nodes and back-end processes that aim to deliver ready-to-use generic services to robots
 All Classes Namespaces Files Functions Variables Macros
speech_recognition_google.py
Go to the documentation of this file.
1 #!/usr/bin/env python2
2 # -*- coding: utf-8 -*-
3 
4 #Copyright 2015 RAPP
5 
6 #Licensed under the Apache License, Version 2.0 (the "License");
7 #you may not use this file except in compliance with the License.
8 #You may obtain a copy of the License at
9 
10  #http://www.apache.org/licenses/LICENSE-2.0
11 
12 #Unless required by applicable law or agreed to in writing, software
13 #distributed under the License is distributed on an "AS IS" BASIS,
14 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 #See the License for the specific language governing permissions and
16 #limitations under the License.
17 
18 import rospy
19 import httplib
20 import json
21 import sys
22 import os
23 
24 from pylab import *
25 from scipy.io import wavfile
26 
27 from rapp_platform_ros_communications.srv import (
28  SpeechToTextSrv,
29  SpeechToTextSrvResponse
30  )
31 
32 from rapp_platform_ros_communications.srv import (
33  AudioProcessingDenoiseSrv,
34  AudioProcessingDenoiseSrvResponse,
35  AudioProcessingDenoiseSrvRequest
36  )
37 
38 from rapp_platform_ros_communications.srv import (
39  AudioProcessingTransformAudioSrv,
40  AudioProcessingTransformAudioSrvResponse,
41  AudioProcessingTransformAudioSrvRequest
42  )
43 
44 from rapp_platform_ros_communications.msg import (
45  StringArrayMsg
46  )
47 
48 from rapp_exceptions import RappError
49 
50 ## @class SpeechToTextGoogle
51 # Implements calls the Google ASR API
53 
54  ## Default contructor. Declares the service callback
55  def __init__(self):
56  # Speech recognition service published
57  self.serv_topic = rospy.get_param("rapp_speech_detection_google_detect_speech_topic")
58  if(not self.serv_topic):
59  rospy.logerror("Speech detection google topic param not found")
60 
61  self.serv = rospy.Service(self.serv_topic, \
62  SpeechToTextSrv, self.speech_to_text_callback)
63 
64  ## @brief The service callback
65  # @param req [SpeechToTextSrvRequest] The ROS service request
66  def speech_to_text_callback(self, req):
67 
68  res = SpeechToTextSrvResponse()
69 
70  if req.language == '':
71  res.error = 'No language specified'
72  return res
73 
74  # Getting the results in order to parse them
75  try:
76  transcripts = self.speech_to_text(\
77  req.filename,\
78  req.user,\
79  req.audio_type,\
80  req.language)
81  #print transcripts
82  except RappError as e:
83  res.error = e.value
84  return res
85 
86  if len(transcripts['result']) == 0:
87  return res
88 
89  # The alternative results
90  alternatives = transcripts['result'][0]['alternative']
91  res = SpeechToTextSrvResponse()
92 
93  # If alternatives is 0 returns void response
94  if len(alternatives) > 0:
95  # The first alternative is Google's suggestion
96  words = alternatives[0]['transcript'].split(" ")
97  for w in words:
98  res.words = res.words + [w]
99  # Google provides the confidence for the first suggestion
100  if 'confidence' in alternatives[0].keys():
101  res.confidence.data = alternatives[0]['confidence']
102  else:
103  res.confidence.data = 0
104 
105  for alt in alternatives[1:]:
106  sam = StringArrayMsg()
107  words = alt['transcript'].split(" ")
108  for w in words:
109  sam.s = sam.s + [w]
110  res.alternatives = res.alternatives + [sam]
111  else:
112  res.confidence.data = 0
113  return res
114 
115 
116  ## @brief Performs the call to Google API
117  # @param file_path [string] The audio file
118  # @param user [string] The username
119  # @param audio_type [string] Used to check if denoising is needed. Can be one of headset, nao_ogg, nao_wav_1_ch, nao_wav_4_ch
120  # @param language [string] The language in which the ASR will be performed
121  # @return The transcript from Google
122  def speech_to_text(self, file_path, user, audio_file_type, language):
123 
124  # Check if file exists
125  if not os.path.isfile(file_path):
126  raise RappError("Error: file " + file_path + ' not found')
127 
128  # Check if file is flac. If not convert it
129  new_audio = file_path
130 
131  audio_trans_topic = rospy.get_param("rapp_audio_processing_transform_audio_topic")
132  audio_transform_srv = rospy.ServiceProxy( audio_trans_topic, AudioProcessingTransformAudioSrv )
133 
134  cleanup = []
135 
136  transform_req = AudioProcessingTransformAudioSrvRequest()
137  transform_req.source_type = audio_file_type
138  transform_req.source_name = new_audio
139  transform_req.target_type = 'wav'
140  new_audio += '.wav'
141  transform_req.target_name = new_audio
142  transform_req.target_channels = 1
143  transform_req.target_rate = 16000
144 
145  trans_response = audio_transform_srv( transform_req )
146 
147  if trans_response.error != 'success':
148  raise RappError( trans_response.error )
149  cleanup.append(new_audio)
150 
151  # Denoise if necessary
152  prev_audio_file = new_audio
153  next_audio_file = prev_audio_file
154  if audio_file_type in ['nao_ogg', 'nao_wav_1_ch', 'nao_wav_4_ch']:
155  denoise_topic = rospy.get_param("rapp_audio_processing_denoise_topic")
156  energy_denoise_topic = \
157  rospy.get_param("rapp_audio_processing_energy_denoise_topic")
158  denoise_service = rospy.ServiceProxy(\
159  denoise_topic, AudioProcessingDenoiseSrv)
160  energy_denoise_service = rospy.ServiceProxy(\
161  energy_denoise_topic, AudioProcessingDenoiseSrv)
162 
163  manipulation = {}
164  manipulation['sox_transform'] = False
165  manipulation['sox_denoising'] = False
166  manipulation['sox_channels_and_rate'] = False
167  if audio_file_type == "headset":
168  pass
169  elif audio_file_type == "nao_ogg":
170  manipulation['sox_transform'] = True
171  manipulation['sox_denoising'] = True
172  manipulation['sox_denoising_scale'] = 0.15
173  elif audio_file_type == "nao_wav_4_ch":
174  manipulation['sox_channels_and_rate'] = True
175  manipulation['sox_denoising'] = True
176  manipulation['sox_denoising_scale'] = 0.15
177  elif audio_file_type == "nao_wav_1_ch":
178  manipulation['sox_denoising'] = True
179  manipulation['sox_denoising_scale'] = 0.15
180  manipulation['detect_silence'] = True
181  manipulation['detect_silence_threshold'] = 0.25
182 
183  # Check if sox_transform is needed
184  if manipulation['sox_transform'] == True:
185  next_audio_file += "_transformed.wav"
186  command = "sox " + prev_audio_file + " " + next_audio_file
187  com_res = os.system(command)
188  if com_res != 0:
189  raise RappError("Error: sox malfunctioned")
190  cleanup.append(next_audio_file)
191  prev_audio_file = next_audio_file
192  if manipulation['sox_channels_and_rate'] == True:
193  next_audio_file += "_mono16k.wav"
194  command = "sox " + prev_audio_file + " -r 16000 -c 1 " + next_audio_file
195  com_res = os.system(command)
196  if com_res != 0:
197  raise RappError("Error: sox malfunctioned")
198  cleanup.append(next_audio_file)
199  prev_audio_file = next_audio_file
200  if manipulation['sox_denoising'] == True:
201  next_audio_file = prev_audio_file + "_denoised.wav"
202  den_request = AudioProcessingDenoiseSrvRequest()
203  den_request.audio_file = prev_audio_file
204  den_request.denoised_audio_file = next_audio_file
205  den_request.audio_type = audio_file_type
206  den_request.user = user
207  den_request.scale = manipulation['sox_denoising_scale']
208  den_response = denoise_service(den_request)
209  if den_response.success != "true":
210  raise RappError("Error:" + den_response.success)
211  cleanup.append(next_audio_file)
212  prev_audio_file = next_audio_file
213 
214  # must implement a fallback function to clear redundant files
215 
216  # Transform to flac
217  transform_req = AudioProcessingTransformAudioSrvRequest()
218  transform_req.source_type = 'headset'
219  transform_req.source_name = new_audio
220  transform_req.target_type = 'flac'
221  newer_audio = new_audio + '.flac'
222  transform_req.target_name = newer_audio
223  transform_req.target_channels = 1
224  transform_req.target_rate = 16000
225 
226  trans_response = audio_transform_srv( transform_req )
227  cleanup.append(newer_audio)
228 
229  if trans_response.error != 'success':
230  raise RappError( trans_response.error )
231 
232 
233  # Open the file
234  with open(newer_audio, "r") as f:
235  speech = f.read()
236  url = "www.google.com"
237 
238  # Fix language
239  if language == 'en':
240  language = "en-US"
241  elif language == 'gr':
242  language = 'el'
243 
244  #NOTE - Thats a general usage key. They may disable it in the future.
245  key = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
246  path = "/speech-api/v2/recognize?lang=" + language + "&key=" + key
247  headers = { "Content-type": "audio/x-flac; rate=22050" };
248  params = {"xjerr": "1", "client": "chromium"}
249  conn = httplib.HTTPSConnection(url)
250  conn.request("POST", path, speech, headers)
251  response = conn.getresponse()
252  data = response.read()
253  initial_data = data
254  # Google returns one empty result for some reason here. Removing it..
255  index = data.find("}")
256  data = data[index + 1:]
257  if data == '\n':
258  # Returned nothing.. something went wrong
259  data = initial_data
260  jsdata = json.loads(data)
261 
262  # Remove the flac if needed
263  for f in cleanup:
264  command = 'rm -f ' + f
265  if os.system(command):
266  raise RappError("Error: Removal of temporary file malfunctioned")
267  return jsdata
268 
269 ## The main function. Creates a SpeechToTextGoogle object
270 if __name__ == "__main__":
271  rospy.init_node('speech_to_text_ros_node')
272  speech_to_text_node = SpeechToTextGoogle()
273  rospy.spin()
274 
275 
def speech_to_text
Performs the call to Google API.
std::vector< std::string > split(std::string str, std::string sep)
Splits string by delimiter.