RAPP Platform  v0.6.0
RAPP Platform is a collection of ROS nodes and back-end processes that aim to deliver ready-to-use generic services to robots
 All Classes Namespaces Files Functions Variables Macros
google_news_engine.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # -*- encode: utf-8 -*-
3 
4 #Copyright 2015 RAPP
5 
6 #Licensed under the Apache License, Version 2.0 (the "License");
7 #you may not use this file except in compliance with the License.
8 #You may obtain a copy of the License at
9 
10  #http://www.apache.org/licenses/LICENSE-2.0
11 
12 #Unless required by applicable law or agreed to in writing, software
13 #distributed under the License is distributed on an "AS IS" BASIS,
14 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 #See the License for the specific language governing permissions and
16 #limitations under the License.
17 
18 # Authors: Aris Thallas
19 # contact: aris.thallas@{iti.gr, gmail.com}
20 
22  NewsEngineBase,
23  RappUtilities,
24  RappError
25  )
26 
27 
28 ## @class GoogleNewsEngine
29 # @brief Google news engine hndler
31 
32  ## @brief Constructor
33  def __init__(self):
34  NewsEngineBase.__init__(self)
35  self._url = 'https://ajax.googleapis.com/ajax/services/search/news'
36 
37  # Some default parameter values
38  self._params = {}
39  self._params['v'] = '1.0'
40  self._params['rsz'] = '8'
41 
42  self._max_requests = 20
43  self._max_stories = 30
44 
45  ## @brief Fetch the news
46  #
47  # @param req
48  # [rapp_platform_ros_communications::NewsExplorer::NewsExplorerSrv]
49  # The service request
50  #
51  # @return [list<dict>] The server results containing the stories
52  def fetch_news(self, req):
53  if req.storyNum < 0:
54  error = 'Requested negative number of news stories.'
55  RappUtilities.rapp_print(error, 'ERROR')
56  raise RappError(error)
57  elif req.storyNum == 0:
58  warn = 'Requested zero news stories. Providing default number of 5'
59  RappUtilities.rapp_print(warn, 'DEBUG')
60  req.storyNum = 5
61 
62  if req.storyNum < self._max_stories:
63  max_stories = req.storyNum
64  else:
65  warn = 'Too many stories requested. Truncating to: ' + \
66  str(self._max_stories)
67  RappUtilities.rapp_print(warn, 'DEBUG')
68  max_stories = self._max_stories
69 
70  max_stories = req.storyNum if req.storyNum < self._max_stories else \
71  self._max_stories
72 
73  iterations = 0
74  final_stories = []
75 
76  while iterations < self._max_requests and \
77  len(final_stories) < max_stories:
78 
79  param_dict = self._handle_params(req, iterations)
80  iterations += 1
81 
82  # Fetch a number of results from the server
83  # (8 per request, due to Google's restrictions)
84  try:
85  response = self._http_request.perform_request(self._url,
86  param_dict)
87  except RappError as err:
88  RappUtilities.rapp_print(err, 'ERROR')
89  raise err
90 
91  if response['responseStatus'] != 200:
92  err = 'Http request failed. Error code: ' + str(response['responseStatus'])
93  RappUtilities.rapp_print(err, 'ERROR')
94  raise RappError(err)
95 
96  # Process servers results. Extract titles etc and add to previous
97  # results
98  try:
99  final_stories = self._handle_server_response(
100  response, final_stories, req.excludeTitles)
101  except RappError as err:
102  RappUtilities.rapp_print(err, 'ERROR')
103  raise RappError(err)
104 
105  # Keep the requested number of stories
106  final_stories = final_stories[:max_stories]
107  return final_stories
108 
109  ## @brief Handles the server's response
110  #
111  # @param response [] The server's response to the request module.
112  # @param story_list [string] The current list of stories
113  # @param exclude_list [string]
114  # The list of titles to be excluded from the results
115  def _handle_server_response(self, response, story_list, exclude_list):
116 
117  keys = {'titleNoFormatting': 'title',
118  'content': 'content',
119  'publisher': 'publisher',
120  'publishedDate': 'publishedDate',
121  'unescapedUrl': 'url'}
122 
123  new_story_list = list(story_list)
124  for result in response['responseData']['results']:
125  story = {}
126 
127  story = self.rapp_http_json_parser.find_values(keys, result)
128 
129  if story['title'].encode('utf-8') in exclude_list:
130  continue
131 
132  # Keep unique stories
133  new_story_list.append(story)
134  new_story_list = {v['title']: v for v in new_story_list}.values()
135  return new_story_list
136 
137  ## @brief Create parameter dictionary for request module
138  #
139  # @param req
140  # [rapp_platform_ros_communications::NewsExplorer::NewsExplorerSrv]
141  # The service request
142  #
143  # @return params [dict] The parameters
144  def _handle_params(self, req, iters):
145  params = {}
146  query_str = ' '.join(req.keywords)
147  if query_str == '':
148  if req.topic == '':
149  RappUtilities.rapp_print('Wrong query provided.' +
150  ' Falling back to default topic',
151  'DEBUG')
152  params['topic'] = 'h'
153  else:
154  if req.topic != '':
155  RappUtilities.rapp_print('Provided both query and topic. ' +
156  'Ignoring topic',
157  'DEBUG')
158  params['q'] = query_str
159 
160  if req.regionEdition != '':
161  params['ned'] = req.regionEdition
162  # params['start'] = str(iters * int(self._params['rsz']) % 64)
163  params['start'] = str(iters)
164 
165  params.update(self._params)
166  return params
def _handle_server_response
Handles the server's response.
def _handle_params
Create parameter dictionary for request module.