RAPP Platform  v0.6.0
RAPP Platform is a collection of ROS nodes and back-end processes that aim to deliver ready-to-use generic services to robots
 All Classes Namespaces Files Functions Variables Macros
greek_support.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 
4 #Copyright 2015 RAPP
5 
6 #Licensed under the Apache License, Version 2.0 (the "License");
7 #you may not use this file except in compliance with the License.
8 #You may obtain a copy of the License at
9 
10  #http://www.apache.org/licenses/LICENSE-2.0
11 
12 #Unless required by applicable law or agreed to in writing, software
13 #distributed under the License is distributed on an "AS IS" BASIS,
14 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 #See the License for the specific language governing permissions and
16 #limitations under the License.
17 
18 # Authors: Athanassios Kintsakis, Aris Thallas, Manos Tsardoulias
19 # contact: akintsakis@issel.ee.auth.gr, aris.thallas@{iti.gr, gmail.com}, etsardou@iti.gr
20 
21 
22 from language_support import *
23 from english_support import *
24 
25 ## @class GreekSupport
26 # @brief Allows the creation of configuration files for Greek Sphinx speech recognition
27 #
28 # Also supports multilanguage words (English/Greek) by utilizing
29 # english_support.EnglishSupport
31 
32  ## Performs initializations
33  def __init__(self):
34 
35  # Initialize LanguageSupport
36  super(GreekSupport, self).__init__()
37 
38  # TODO: Split the rapp_sphinx4_java_libraries package into libraries and
39  # language models
40  # NOTE: This does not exist yet
41  #self._greek_dictionary = self._language_models_url + \
42  #"/englishPack/cmudict-en-us.dict"
43 
44  ## Allows the creation of configuration files for English words
45  #
46  # Instantiates english_support.EnglishSupport to identify english words
48 
49  ## Greek uppercase to lowercase mapping
50  self._capital_letters = {}
51  ## Greek words->English standard phonemes mapping
52  self._phonemes = {}
53  ## Two digit Greek letters->English phonemes mapping
55  ## Special two digit Greek letter->English phonemes mapping
57  ## All special two digit Greek letter->English phonemes mapping
59  ## Special Greek words ->English phonemes mapping
61  ## Standard Greek letter->English phonemes mapping
62  self._letters = {}
63  ## Greek letters->English letters mapping
64  self._literal_letters = {}
65 
66  self._configureLetters()
67 
68  ## Creates the basic Greek letter to English configuration
69  def _configureLetters(self):
70 
71  f_base_pre = [u'π', u'τ', u'κ', u'θ', u'χ', u'σ', u'ξ', u'ψ']
72  f_base = []
73  for l in f_base_pre:
74  f_base.append(l.encode('utf-8'))
75 
76  v_base_pre = [u'δ', u'γ', u'ζ', u'λ', u'ρ', u'μ', u'ν', u'α', u'ά', u'ε',\
77  u'έ', u'η', u'ή', u'ι', u'ί', u'ϊ', u'ΐ', u'ο', u'ό', u'υ', u'ύ', u'ϋ'\
78  u'ΰ', u'ω', u'ώ']
79  v_base = []
80  for l in v_base_pre:
81  v_base.append(l.encode('utf-8'))
82 
83  self._capital_letters[(u'Α').encode('utf-8')] = (u'α').encode('utf-8')
84  self._capital_letters[(u'Ά').encode('utf-8')] = (u'ά').encode('utf-8')
85  self._capital_letters[(u'Β').encode('utf-8')] = (u'β').encode('utf-8')
86  self._capital_letters[(u'Γ').encode('utf-8')] = (u'γ').encode('utf-8')
87  self._capital_letters[(u'Δ').encode('utf-8')] = (u'δ').encode('utf-8')
88  self._capital_letters[(u'Ε').encode('utf-8')] = (u'ε').encode('utf-8')
89  self._capital_letters[(u'Έ').encode('utf-8')] = (u'έ').encode('utf-8')
90  self._capital_letters[(u'Ζ').encode('utf-8')] = (u'ζ').encode('utf-8')
91  self._capital_letters[(u'Η').encode('utf-8')] = (u'η').encode('utf-8')
92  self._capital_letters[(u'Ή').encode('utf-8')] = (u'ή').encode('utf-8')
93  self._capital_letters[(u'Θ').encode('utf-8')] = (u'θ').encode('utf-8')
94  self._capital_letters[(u'Ι').encode('utf-8')] = (u'ι').encode('utf-8')
95  self._capital_letters[(u'Ί').encode('utf-8')] = (u'ί').encode('utf-8')
96  self._capital_letters[(u'Ϊ').encode('utf-8')] = (u'ϊ').encode('utf-8')
97  self._capital_letters[(u'Κ').encode('utf-8')] = (u'κ').encode('utf-8')
98  self._capital_letters[(u'Λ').encode('utf-8')] = (u'λ').encode('utf-8')
99  self._capital_letters[(u'Μ').encode('utf-8')] = (u'μ').encode('utf-8')
100  self._capital_letters[(u'Ν').encode('utf-8')] = (u'ν').encode('utf-8')
101  self._capital_letters[(u'Ξ').encode('utf-8')] = (u'ξ').encode('utf-8')
102  self._capital_letters[(u'Ο').encode('utf-8')] = (u'ο').encode('utf-8')
103  self._capital_letters[(u'Ό').encode('utf-8')] = (u'ό').encode('utf-8')
104  self._capital_letters[(u'Π').encode('utf-8')] = (u'π').encode('utf-8')
105  self._capital_letters[(u'Ρ').encode('utf-8')] = (u'ρ').encode('utf-8')
106  self._capital_letters[(u'Σ').encode('utf-8')] = (u'σ').encode('utf-8')
107  self._capital_letters[(u'Τ').encode('utf-8')] = (u'τ').encode('utf-8')
108  self._capital_letters[(u'Υ').encode('utf-8')] = (u'γ').encode('utf-8')
109  self._capital_letters[(u'Ύ').encode('utf-8')] = (u'ύ').encode('utf-8')
110  self._capital_letters[(u'Ϋ').encode('utf-8')] = (u'ϋ').encode('utf-8')
111  self._capital_letters[(u'Φ').encode('utf-8')] = (u'φ').encode('utf-8')
112  self._capital_letters[(u'Χ').encode('utf-8')] = (u'χ').encode('utf-8')
113  self._capital_letters[(u'Ψ').encode('utf-8')] = (u'ψ').encode('utf-8')
114  self._capital_letters[(u'Ω').encode('utf-8')] = (u'ω').encode('utf-8')
115  self._capital_letters[(u'Ώ').encode('utf-8')] = (u'ώ').encode('utf-8')
116 
117  self._phonemes[(u'ου').encode('utf-8')] = 'UW '
118  self._phonemes[(u'ού').encode('utf-8')] = 'UW '
119  self._phonemes[(u'μπ').encode('utf-8')] = 'B '
120  self._phonemes[(u'ντ').encode('utf-8')] = 'D '
121  self._phonemes[(u'γκ').encode('utf-8')] = 'G ' #?
122  self._phonemes[(u'γγ').encode('utf-8')] = 'G ' #?
123  self._phonemes[(u'τσ').encode('utf-8')] = 'CH ' #?
124  self._phonemes[(u'τζ').encode('utf-8')] = 'JH ' #?
125  self._phonemes[(u'σσ').encode('utf-8')] = 'S ' #?
126  self._phonemes[(u'κκ').encode('utf-8')] = 'K '
127  self._phonemes[(u'ββ').encode('utf-8')] = 'V '
128  self._phonemes[(u'λλ').encode('utf-8')] = 'L '
129  self._phonemes[(u'μμ').encode('utf-8')] = 'M '
130  self._phonemes[(u'νν').encode('utf-8')] = 'N '
131  self._phonemes[(u'ππ').encode('utf-8')] = 'P '
132  self._phonemes[(u'ρρ').encode('utf-8')] = 'R '
133  self._phonemes[(u'ττ').encode('utf-8')] = 'T '
134 
135  self._two_digit_letters[(u'αι').encode('utf-8')] = 'EH '
136  self._two_digit_letters[(u'αί').encode('utf-8')] = 'EH '
137  self._two_digit_letters[(u'ει').encode('utf-8')] = 'IH '
138  self._two_digit_letters[(u'εί').encode('utf-8')] = 'IH '
139  self._two_digit_letters[(u'οι').encode('utf-8')] = 'IH '
140  self._two_digit_letters[(u'οί').encode('utf-8')] = 'IH '
141  self._two_digit_letters[(u'υι').encode('utf-8')] = 'IH '
142  self._two_digit_letters[(u'υί').encode('utf-8')] = 'IH '
143 
144  self._special_two_digit_letters.append((u'αυ').encode('utf-8'))
145  self._special_two_digit_letters.append((u'αύ').encode('utf-8'))
146  self._special_two_digit_letters.append((u'ευ').encode('utf-8'))
147  self._special_two_digit_letters.append((u'εύ').encode('utf-8'))
148  special_two_digit_letters_v = {}
149  special_two_digit_letters_v[(u'αυ').encode('utf-8')] = (u'αβ').encode('utf-8')
150  special_two_digit_letters_v[(u'αύ').encode('utf-8')] = (u'άβ').encode('utf-8')
151  special_two_digit_letters_v[(u'ευ').encode('utf-8')] = (u'εβ').encode('utf-8')
152  special_two_digit_letters_v[(u'εύ').encode('utf-8')] = (u'έβ').encode('utf-8')
153  special_two_digit_letters_f = {}
154  special_two_digit_letters_f[(u'αυ').encode('utf-8')] = (u'αφ').encode('utf-8')
155  special_two_digit_letters_f[(u'αύ').encode('utf-8')] = (u'άφ').encode('utf-8')
156  special_two_digit_letters_f[(u'ευ').encode('utf-8')] = (u'εφ').encode('utf-8')
157  special_two_digit_letters_f[(u'εύ').encode('utf-8')] = (u'έφ').encode('utf-8')
158 
159  for tdl in self._special_two_digit_letters:
160  for fb in f_base:
161  self._all_special_two_digit_letters[tdl + fb] = \
162  special_two_digit_letters_f[tdl] + fb
163  for tdl in self._special_two_digit_letters:
164  for vb in v_base:
165  self._all_special_two_digit_letters[tdl + vb] = \
166  special_two_digit_letters_v[tdl] + vb
167 
168  self._s_specific_rules[(u'σγ').encode('utf-8')] = 'Z W '
169  self._s_specific_rules[(u'σβ').encode('utf-8')] = 'Z V '
170  self._s_specific_rules[(u'σδ').encode('utf-8')] = 'Z DH '
171  self._s_specific_rules[(u'σμ').encode('utf-8')] = 'Z M '
172  self._s_specific_rules[(u'σν').encode('utf-8')] = 'Z N '
173  self._s_specific_rules[(u'σλ').encode('utf-8')] = 'Z L '
174  self._s_specific_rules[(u'σρ').encode('utf-8')] = 'Z R '
175  self._s_specific_rules[(u'σμπ').encode('utf-8')] = 'Z B '
176  self._s_specific_rules[(u'σντ').encode('utf-8')] = 'Z D '
177 
178  self._letters[(u'α').encode('utf-8')] = 'AA ' # when AE?
179  self._letters[(u'ά').encode('utf-8')] = 'AA '
180  self._letters[(u'β').encode('utf-8')] = 'V '
181  self._letters[(u'γ').encode('utf-8')] = 'W '
182  self._letters[(u'δ').encode('utf-8')] = 'DH '
183  self._letters[(u'ε').encode('utf-8')] = 'EH '
184  self._letters[(u'έ').encode('utf-8')] = 'EH '
185  self._letters[(u'ζ').encode('utf-8')] = 'Z '
186  self._letters[(u'η').encode('utf-8')] = 'IH '
187  self._letters[(u'ή').encode('utf-8')] = 'IH '
188  self._letters[(u'θ').encode('utf-8')] = 'TH '
189  self._letters[(u'ι').encode('utf-8')] = 'IH '
190  self._letters[(u'ί').encode('utf-8')] = 'IH '
191  self._letters[(u'ϊ').encode('utf-8')] = 'IH '
192  self._letters[(u'ΐ').encode('utf-8')] = 'IH '
193  self._letters[(u'κ').encode('utf-8')] = 'K '
194  self._letters[(u'λ').encode('utf-8')] = 'L '
195  self._letters[(u'μ').encode('utf-8')] = 'M '
196  self._letters[(u'ν').encode('utf-8')] = 'N '
197  self._letters[(u'ξ').encode('utf-8')] = 'K S '
198  self._letters[(u'ο').encode('utf-8')] = 'OW '
199  self._letters[(u'ό').encode('utf-8')] = 'OW '
200  self._letters[(u'π').encode('utf-8')] = 'P '
201  self._letters[(u'ρ').encode('utf-8')] = 'R '
202  self._letters[(u'σ').encode('utf-8')] = 'S '
203  self._letters[(u'τ').encode('utf-8')] = 'T '
204  self._letters[(u'υ').encode('utf-8')] = 'IH '
205  self._letters[(u'ύ').encode('utf-8')] = 'IH '
206  self._letters[(u'ϋ').encode('utf-8')] = 'IH '
207  self._letters[(u'ΰ').encode('utf-8')] = 'IH '
208  self._letters[(u'φ').encode('utf-8')] = 'F '
209  self._letters[(u'χ').encode('utf-8')] = 'HH '
210  self._letters[(u'ψ').encode('utf-8')] = 'P S '
211  self._letters[(u'ω').encode('utf-8')] = 'OW '
212  self._letters[(u'ώ').encode('utf-8')] = 'OW '
213  self._letters[(u'ς').encode('utf-8')] = 'S '
214 
215  self._literal_letters[(u'α').encode('utf-8')] = 'a' # when AE?
216  self._literal_letters[(u'ά').encode('utf-8')] = 'a\''
217  self._literal_letters[(u'β').encode('utf-8')] = 'b'
218  self._literal_letters[(u'γ').encode('utf-8')] = 'g'
219  self._literal_letters[(u'δ').encode('utf-8')] = 'd'
220  self._literal_letters[(u'ε').encode('utf-8')] = 'e'
221  self._literal_letters[(u'έ').encode('utf-8')] = 'e\''
222  self._literal_letters[(u'ζ').encode('utf-8')] = 'z'
223  self._literal_letters[(u'η').encode('utf-8')] = 'h'
224  self._literal_letters[(u'ή').encode('utf-8')] = 'h\''
225  self._literal_letters[(u'θ').encode('utf-8')] = 'th'
226  self._literal_letters[(u'ι').encode('utf-8')] = 'i'
227  self._literal_letters[(u'ί').encode('utf-8')] = 'i\''
228  self._literal_letters[(u'ϊ').encode('utf-8')] = 'i:'
229  self._literal_letters[(u'ΐ').encode('utf-8')] = 'i\':'
230  self._literal_letters[(u'κ').encode('utf-8')] = 'k'
231  self._literal_letters[(u'λ').encode('utf-8')] = 'l'
232  self._literal_letters[(u'μ').encode('utf-8')] = 'm'
233  self._literal_letters[(u'ν').encode('utf-8')] = 'n'
234  self._literal_letters[(u'ξ').encode('utf-8')] = 'ks'
235  self._literal_letters[(u'ο').encode('utf-8')] = 'o'
236  self._literal_letters[(u'ό').encode('utf-8')] = 'o\''
237  self._literal_letters[(u'π').encode('utf-8')] = 'p'
238  self._literal_letters[(u'ρ').encode('utf-8')] = 'r'
239  self._literal_letters[(u'σ').encode('utf-8')] = 's'
240  self._literal_letters[(u'ς').encode('utf-8')] = 's\''
241  self._literal_letters[(u'τ').encode('utf-8')] = 't'
242  self._literal_letters[(u'υ').encode('utf-8')] = 'u'
243  self._literal_letters[(u'ύ').encode('utf-8')] = 'u\''
244  self._literal_letters[(u'ϋ').encode('utf-8')] = 'u:'
245  self._literal_letters[(u'ΰ').encode('utf-8')] = 'u\':'
246  self._literal_letters[(u'φ').encode('utf-8')] = 'f'
247  self._literal_letters[(u'χ').encode('utf-8')] = 'x'
248  self._literal_letters[(u'ψ').encode('utf-8')] = 'ps'
249  self._literal_letters[(u'ω').encode('utf-8')] = 'w'
250  self._literal_letters[(u'ώ').encode('utf-8')] = 'w\''
251 
252 
253  ## Transforms the Greek words into phonemes for the Sphinx configuration
254  #
255  # @param words [list::string] The set of Greek words
256  #
257  # @return enhanced_words [dictionary] The Greek word->phonemes mapping
258  # @return englified_words [dictionary] The Greek word->Englified Greek word mapping
259  def _transformWords(self, words):
260  enhanced_words = {}
261  englified_words = {}
262  for word in words:
263  initial_word = word
264  RappUtilities.rapp_print ("Initial word: " + initial_word)
265  # transform capital _letters
266  for cap in self._capital_letters:
267  initial_word = initial_word.replace(cap, self._capital_letters[cap])
268  RappUtilities.rapp_print ("Caps to small: " + initial_word)
269  # fix english version of _letters
270  eng_w = initial_word
271  for lit in self._literal_letters:
272  eng_w = eng_w.replace(lit, self._literal_letters[lit])
273  englified_words[eng_w] = word
274  RappUtilities.rapp_print ("Englified: " + eng_w)
275  # check _phonemes
276  for ph in self._phonemes:
277  initial_word = initial_word.replace(ph, self._phonemes[ph])
278  RappUtilities.rapp_print ("Phonemes: " + initial_word)
279  # check special two digit letters
280  for stdl in self._all_special_two_digit_letters:
281  initial_word = initial_word.replace(stdl, \
283  RappUtilities.rapp_print ("Special 2 digit letters: " + initial_word)
284  # check two-digit letters
285  for let in self._two_digit_letters:
286  initial_word = initial_word.replace(let, self._two_digit_letters[let])
287  RappUtilities.rapp_print ("2 digit letters: " + initial_word)
288  # check specific rules
289  for sr in self._s_specific_rules:
290  initial_word = initial_word.replace(sr, self._s_specific_rules[sr])
291  RappUtilities.rapp_print ("specific rules: " + initial_word)
292  # check the rest of the letters
293  for l in self._letters:
294  initial_word = initial_word.replace(l, self._letters[l])
295  RappUtilities.rapp_print ("rest of letters: " + initial_word)
296 
297  enhanced_words[eng_w] = []
298  temp = initial_word.split(' ')
299  if len(temp) > 0:
300  temp = temp[:-1]
301  enhanced_words[eng_w] = temp
302 
303  return [enhanced_words, englified_words]
304 
305  ## Englify Greek words
306  #
307  # @param words [list::string] The set of Greek words
308  def _englify_words(self, words):
309  englified_words = []
310  for word in words:
311  eng_w = word
312  # First transform the Capitals
313  for cap in self._capital_letters:
314  eng_w = eng_w.replace(cap, self._capital_letters[cap])
315  for lit in self._literal_letters:
316  eng_w = eng_w.replace(lit, self._literal_letters[lit])
317  englified_words.append(eng_w)
318  return englified_words
319 
320 
321  ## Computes the Limited Greek Configuration
322  #
323  # @param words [list::string] The set of words to be identified
324  # @param grammar [list::string] The Sphinx grammar parameter
325  # @param sentences [list::string] The Sphinx sentences parameter
326  #
327  # @return limited_sphinx_configuration [dictionary] The Limited Greek configuration
328  # @return englified_to_greek_dict [dictionary] A dictionary to transform the englified greek words to actual greek words
329  def getLimitedVocebularyConfiguration(self, words, grammar, sentences):
330 
331  # Get phonemes for Greek words and dictionary for Englified->Greek mapping
332  [englified_phonemes_dict, englified_to_greek_dict] = \
333  self._transformWords( words )
334 
335  # Englify Greek grammar and sentences
336  englified_grammar = self._englify_words(grammar)
337  englified_sentences = self._englify_words(sentences)
338 
339  try:
340  limited_sphinx_configuration = \
341  self._vocabulary.createConfigurationFiles( \
342  englified_phonemes_dict, englified_grammar, englified_sentences \
343  )
344  except RappError as e:
345  raise RappError(e.value)
346 
347  return [limited_sphinx_configuration, englified_to_greek_dict]
def _configureLetters
Creates the basic Greek letter to English configuration.
_special_two_digit_letters
Special two digit Greek letter->English phonemes mapping.
_literal_letters
Greek letters->English letters mapping.
def getLimitedVocebularyConfiguration
Computes the Limited Greek Configuration.
_all_special_two_digit_letters
All special two digit Greek letter->English phonemes mapping.
_s_specific_rules
Special Greek words ->English phonemes mapping.
Allows the creation of configuration files for Greek Sphinx speech recognition.
Allows the creation of configuration files for Sphinx speech recognition.
_two_digit_letters
Two digit Greek letters->English phonemes mapping.
_letters
Standard Greek letter->English phonemes mapping.
_capital_letters
Greek uppercase to lowercase mapping.
Allows the creation of configuration files for English Sphinx speech recognition. ...
def _transformWords
Transforms the Greek words into phonemes for the Sphinx configuration.
_phonemes
Greek words->English standard phonemes mapping.
_english_support
Allows the creation of configuration files for English words.