25 from scipy.io
import wavfile
27 from rapp_platform_ros_communications.srv
import (
29 SpeechToTextSrvResponse
32 from rapp_platform_ros_communications.srv
import (
33 AudioProcessingDenoiseSrv,
34 AudioProcessingDenoiseSrvResponse,
35 AudioProcessingDenoiseSrvRequest
38 from rapp_platform_ros_communications.srv
import (
39 AudioProcessingTransformAudioSrv,
40 AudioProcessingTransformAudioSrvResponse,
41 AudioProcessingTransformAudioSrvRequest
44 from rapp_platform_ros_communications.msg
import (
48 from rapp_exceptions
import RappError
57 self.
serv_topic = rospy.get_param(
"rapp_speech_detection_google_detect_speech_topic")
59 rospy.logerror(
"Speech detection google topic param not found")
68 res = SpeechToTextSrvResponse()
70 if req.language ==
'':
71 res.error =
'No language specified'
82 except RappError
as e:
86 if len(transcripts[
'result']) == 0:
90 alternatives = transcripts[
'result'][0][
'alternative']
91 res = SpeechToTextSrvResponse()
94 if len(alternatives) > 0:
96 words = alternatives[0][
'transcript'].
split(
" ")
98 res.words = res.words + [w]
100 if 'confidence' in alternatives[0].keys():
101 res.confidence.data = alternatives[0][
'confidence']
103 res.confidence.data = 0
105 for alt
in alternatives[1:]:
106 sam = StringArrayMsg()
107 words = alt[
'transcript'].
split(
" ")
110 res.alternatives = res.alternatives + [sam]
112 res.confidence.data = 0
125 if not os.path.isfile(file_path):
126 raise RappError(
"Error: file " + file_path +
' not found')
129 new_audio = file_path
131 audio_trans_topic = rospy.get_param(
"rapp_audio_processing_transform_audio_topic")
132 audio_transform_srv = rospy.ServiceProxy( audio_trans_topic, AudioProcessingTransformAudioSrv )
136 transform_req = AudioProcessingTransformAudioSrvRequest()
137 transform_req.source_type = audio_file_type
138 transform_req.source_name = new_audio
139 transform_req.target_type =
'wav'
141 transform_req.target_name = new_audio
142 transform_req.target_channels = 1
143 transform_req.target_rate = 16000
145 trans_response = audio_transform_srv( transform_req )
147 if trans_response.error !=
'success':
148 raise RappError( trans_response.error )
149 cleanup.append(new_audio)
152 prev_audio_file = new_audio
153 next_audio_file = prev_audio_file
154 if audio_file_type
in [
'nao_ogg',
'nao_wav_1_ch',
'nao_wav_4_ch']:
155 denoise_topic = rospy.get_param(
"rapp_audio_processing_denoise_topic")
156 energy_denoise_topic = \
157 rospy.get_param(
"rapp_audio_processing_energy_denoise_topic")
158 denoise_service = rospy.ServiceProxy(\
159 denoise_topic, AudioProcessingDenoiseSrv)
160 energy_denoise_service = rospy.ServiceProxy(\
161 energy_denoise_topic, AudioProcessingDenoiseSrv)
164 manipulation[
'sox_transform'] =
False
165 manipulation[
'sox_denoising'] =
False
166 manipulation[
'sox_channels_and_rate'] =
False
167 if audio_file_type ==
"headset":
169 elif audio_file_type ==
"nao_ogg":
170 manipulation[
'sox_transform'] =
True
171 manipulation[
'sox_denoising'] =
True
172 manipulation[
'sox_denoising_scale'] = 0.15
173 elif audio_file_type ==
"nao_wav_4_ch":
174 manipulation[
'sox_channels_and_rate'] =
True
175 manipulation[
'sox_denoising'] =
True
176 manipulation[
'sox_denoising_scale'] = 0.15
177 elif audio_file_type ==
"nao_wav_1_ch":
178 manipulation[
'sox_denoising'] =
True
179 manipulation[
'sox_denoising_scale'] = 0.15
180 manipulation[
'detect_silence'] =
True
181 manipulation[
'detect_silence_threshold'] = 0.25
184 if manipulation[
'sox_transform'] ==
True:
185 next_audio_file +=
"_transformed.wav"
186 command =
"sox " + prev_audio_file +
" " + next_audio_file
187 com_res = os.system(command)
189 raise RappError(
"Error: sox malfunctioned")
190 cleanup.append(next_audio_file)
191 prev_audio_file = next_audio_file
192 if manipulation[
'sox_channels_and_rate'] ==
True:
193 next_audio_file +=
"_mono16k.wav"
194 command =
"sox " + prev_audio_file +
" -r 16000 -c 1 " + next_audio_file
195 com_res = os.system(command)
197 raise RappError(
"Error: sox malfunctioned")
198 cleanup.append(next_audio_file)
199 prev_audio_file = next_audio_file
200 if manipulation[
'sox_denoising'] ==
True:
201 next_audio_file = prev_audio_file +
"_denoised.wav"
202 den_request = AudioProcessingDenoiseSrvRequest()
203 den_request.audio_file = prev_audio_file
204 den_request.denoised_audio_file = next_audio_file
205 den_request.audio_type = audio_file_type
206 den_request.user = user
207 den_request.scale = manipulation[
'sox_denoising_scale']
208 den_response = denoise_service(den_request)
209 if den_response.success !=
"true":
210 raise RappError(
"Error:" + den_response.success)
211 cleanup.append(next_audio_file)
212 prev_audio_file = next_audio_file
217 transform_req = AudioProcessingTransformAudioSrvRequest()
218 transform_req.source_type =
'headset'
219 transform_req.source_name = new_audio
220 transform_req.target_type =
'flac'
221 newer_audio = new_audio +
'.flac'
222 transform_req.target_name = newer_audio
223 transform_req.target_channels = 1
224 transform_req.target_rate = 16000
226 trans_response = audio_transform_srv( transform_req )
227 cleanup.append(newer_audio)
229 if trans_response.error !=
'success':
230 raise RappError( trans_response.error )
234 with open(newer_audio,
"r") as f:
236 url = "www.google.com"
241 elif language ==
'gr':
245 key =
"AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
246 path =
"/speech-api/v2/recognize?lang=" + language +
"&key=" + key
247 headers = {
"Content-type":
"audio/x-flac; rate=22050" };
248 params = {
"xjerr":
"1",
"client":
"chromium"}
249 conn = httplib.HTTPSConnection(url)
250 conn.request(
"POST", path, speech, headers)
251 response = conn.getresponse()
252 data = response.read()
255 index = data.find(
"}")
256 data = data[index + 1:]
260 jsdata = json.loads(data)
264 command =
'rm -f ' + f
265 if os.system(command):
266 raise RappError(
"Error: Removal of temporary file malfunctioned")
270 if __name__ ==
"__main__":
271 rospy.init_node(
'speech_to_text_ros_node')
def speech_to_text
Performs the call to Google API.
def __init__
Default contructor.
Implements calls the Google ASR API.
def speech_to_text_callback
The service callback.
std::vector< std::string > split(std::string str, std::string sep)
Splits string by delimiter.