OpenTREP Logo  0.08.01
C++ Open Travel Request Parsing Library
Loading...
Searching...
No Matches
opentrep-searcher.cpp
Go to the documentation of this file.
1// STL
2#include <cassert>
3#include <iostream>
4#include <sstream>
5#include <fstream>
6#include <vector>
7#include <string>
8// Boost (Extended STL)
9#include <boost/date_time/posix_time/posix_time.hpp>
10#include <boost/date_time/gregorian/gregorian.hpp>
11#include <boost/tokenizer.hpp>
12#include <boost/program_options.hpp>
13// OpenTREP
15#include <opentrep/DBType.hpp>
18#include <opentrep/Location.hpp>
20#include <opentrep/config/opentrep-paths.hpp>
21
22
23// //////// Type definitions ///////
24typedef std::vector<std::string> WordList_T;
25
26
27// //////// Constants //////
31const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-searcher.log");
32
36const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francisco rio de janero los angeles reykyavki");
37
46const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE = 0;
47
52
53
54// //////////////////////////////////////////////////////////////////////
55void tokeniseStringIntoWordList (const std::string& iPhrase,
56 WordList_T& ioWordList) {
57 // Empty the word list
58 ioWordList.clear();
59
60 // Boost Tokeniser
61 typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T;
62
63 // Define the separators
64 const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\"");
65
66 // Initialise the phrase to be tokenised
67 Tokeniser_T lTokens (iPhrase, lSepatorList);
68 for (Tokeniser_T::const_iterator tok_iter = lTokens.begin();
69 tok_iter != lTokens.end(); ++tok_iter) {
70 const std::string& lTerm = *tok_iter;
71 ioWordList.push_back (lTerm);
72 }
73}
74
75// //////////////////////////////////////////////////////////////////////
76std::string createStringFromWordList (const WordList_T& iWordList) {
77 std::ostringstream oStr;
78
79 unsigned short idx = iWordList.size();
80 for (WordList_T::const_iterator itWord = iWordList.begin();
81 itWord != iWordList.end(); ++itWord, --idx) {
82 const std::string& lWord = *itWord;
83 oStr << lWord;
84 if (idx > 1) {
85 oStr << " ";
86 }
87 }
88
89 return oStr.str();
90}
91
92
93// ///////// Parsing of Options & Configuration /////////
94// A helper function to simplify the main part.
95template<class T> std::ostream& operator<< (std::ostream& os,
96 const std::vector<T>& v) {
97 std::copy (v.begin(), v.end(), std::ostream_iterator<T> (os, " "));
98 return os;
99}
100
103
105int readConfiguration (int argc, char* argv[],
106 unsigned short& ioSpellingErrorDistance,
107 std::string& ioQueryString,
108 std::string& ioXapianDBFilepath,
109 std::string& ioSQLDBTypeString,
110 std::string& ioSQLDBConnectionString,
111 unsigned short& ioDeploymentNumber,
112 std::string& ioLogFilename,
113 unsigned short& ioSearchType,
114 std::ostringstream& oStr) {
115
116 // Initialise the travel query string, if that one is empty
117 if (ioQueryString.empty() == true) {
118 ioQueryString = K_OPENTREP_DEFAULT_QUERY_STRING;
119 }
120
121 // Transform the query string into a list of words (STL strings)
122 WordList_T lWordList;
123 tokeniseStringIntoWordList (ioQueryString, lWordList);
124
125 // Declare a group of options that will be allowed only on command line
126 boost::program_options::options_description generic ("Generic options");
127 generic.add_options()
128 ("prefix", "print installation prefix")
129 ("version,v", "print version string")
130 ("help,h", "produce help message");
131
132 // Declare a group of options that will be allowed both on command
133 // line and in config file
134 boost::program_options::options_description config ("Configuration");
135 config.add_options()
136 ("error,e",
137 boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE),
138 "Spelling error distance (e.g., 3)")
139 ("xapiandb,d",
140 boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
141 "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
142 ("sqldbtype,t",
143 boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
144 "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, pg for PostgreSQL, mysql for MariaDB/MySQL)")
145 ("sqldbconx,s",
146 boost::program_options::value< std::string >(&ioSQLDBConnectionString),
147 "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, "
148 "\"dbname=trep_trep user=trep password=trep\" for PostgreSQL, "
149 "\"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
150 ("deploymentnb,m",
151 boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
152 "Deployment number (from to N, where N=1 normally)")
153 ("log,l",
154 boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
155 "Filepath for the logs")
156 ("type,y",
157 boost::program_options::value<unsigned short>(&ioSearchType)->default_value(K_OPENTREP_DEFAULT_SEARCH_TYPE),
158 "Type of search request (0 = full text, 1 = coordinates)")
159 ("query,q",
160 boost::program_options::value< WordList_T >(&lWordList)->multitoken(),
161 "Travel query word list (e.g. sna francisco rio de janero los angeles reykyavki), "
162 "which sould be located at the end of the command line (otherwise, "
163 "the other options would be interpreted as part of that travel query word list)")
164 ;
165
166 // Hidden options, will be allowed both on command line and
167 // in config file, but will not be shown to the user.
168 boost::program_options::options_description hidden ("Hidden options");
169 hidden.add_options()
170 ("copyright",
171 boost::program_options::value< std::vector<std::string> >(),
172 "Show the copyright (license)");
173
174 boost::program_options::options_description cmdline_options;
175 cmdline_options.add(generic).add(config).add(hidden);
176
177 boost::program_options::options_description config_file_options;
178 config_file_options.add(config).add(hidden);
179
180 boost::program_options::options_description visible ("Allowed options");
181 visible.add(generic).add(config);
182
183 boost::program_options::positional_options_description p;
184 p.add ("copyright", -1);
185
186 boost::program_options::variables_map vm;
187 boost::program_options::
188 store (boost::program_options::command_line_parser (argc, argv).
189 options (cmdline_options).positional(p).run(), vm);
190
191 std::ifstream ifs ("opentrep-searcher.cfg");
192 boost::program_options::store (parse_config_file (ifs, config_file_options),
193 vm);
194 boost::program_options::notify (vm);
195
196 if (vm.count ("help")) {
197 std::cout << visible << std::endl;
199 }
200
201 if (vm.count ("version")) {
202 std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
204 }
205
206 if (vm.count ("prefix")) {
207 std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
209 }
210
211 if (vm.count ("deploymentnb")) {
212 ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
213 oStr << "Deployment number: " << ioDeploymentNumber << std::endl;
214 }
215
216 if (vm.count ("xapiandb")) {
217 ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
218 oStr << "Xapian database filepath is: " << ioXapianDBFilepath
219 << ioDeploymentNumber << std::endl;
220 }
221
222 if (vm.count ("sqldbtype")) {
223 ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
224 oStr << "SQL database type is: " << ioSQLDBTypeString << std::endl;
225 }
226
227 // Derive the detault connection string depending on the SQL database type
228 const OPENTREP::DBType lDBType (ioSQLDBTypeString);
229 if (lDBType == OPENTREP::DBType::NODB) {
230 ioSQLDBConnectionString = "";
231
232 } else if (lDBType == OPENTREP::DBType::SQLITE3) {
233 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
234
235 } else if (lDBType == OPENTREP::DBType::PG) {
236 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_PG_CONN_STRING;
237
238 } else if (lDBType == OPENTREP::DBType::MYSQL) {
239 ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
240 }
241
242 // Set the SQL database connection string, if any is given
243 if (vm.count ("sqldbconx")) {
244 ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
245 }
246
247 // Reporting of the SQL database connection string
248 if (lDBType == OPENTREP::DBType::SQLITE3
249 || lDBType == OPENTREP::DBType::PG
250 || lDBType == OPENTREP::DBType::MYSQL) {
251 const std::string& lSQLDBConnString =
253 ioSQLDBConnectionString,
254 ioDeploymentNumber);
255 //
256 oStr << "SQL database connection string is: " << lSQLDBConnString
257 << std::endl;
258 }
259
260 if (vm.count ("log")) {
261 ioLogFilename = vm["log"].as< std::string >();
262 oStr << "Log filename is: " << ioLogFilename << std::endl;
263 }
264
265 oStr << "The type of search is: " << ioSearchType << std::endl;
266
267 oStr << "The spelling error distance is: " << ioSpellingErrorDistance
268 << std::endl;
269
270 ioQueryString = createStringFromWordList (lWordList);
271 oStr << "The travel query string is: " << ioQueryString << std::endl;
272
273 return 0;
274}
275
279std::string parseQuery (OPENTREP::OPENTREP_Service& ioOpentrepService,
280 const OPENTREP::TravelQuery_T& iTravelQuery) {
281 std::ostringstream oStr;
282
283 // Query the Xapian database (index)
284 OPENTREP::WordList_T lNonMatchedWordList;
285 OPENTREP::LocationList_T lLocationList;
286 const OPENTREP::NbOfMatches_T nbOfMatches =
287 ioOpentrepService.interpretTravelRequest (iTravelQuery, lLocationList,
288 lNonMatchedWordList);
289
290 oStr << nbOfMatches << " (geographical) location(s) have been found "
291 << "matching your query (`" << iTravelQuery << "'). "
292 << lNonMatchedWordList.size() << " word(s) was/were left unmatched."
293 << std::endl;
294
295 if (nbOfMatches != 0) {
297 for (OPENTREP::LocationList_T::const_iterator itLocation =
298 lLocationList.begin();
299 itLocation != lLocationList.end(); ++itLocation, ++idx) {
300 const OPENTREP::Location& lLocation = *itLocation;
301 oStr << " [" << idx << "]: " << lLocation << std::endl;
302 }
303 }
304
305 if (lNonMatchedWordList.empty() == false) {
306 oStr << "List of unmatched words:" << std::endl;
307
309 for (OPENTREP::WordList_T::const_iterator itWord =
310 lNonMatchedWordList.begin();
311 itWord != lNonMatchedWordList.end(); ++itWord, ++idx) {
312 const OPENTREP::Word_T& lWord = *itWord;
313 oStr << " [" << idx << "]: " << lWord << std::endl;
314 }
315 }
316
317 return oStr.str();
318}
319
320// /////////////// M A I N /////////////////
321int main (int argc, char* argv[]) {
322
323 // Travel query
324 OPENTREP::TravelQuery_T lTravelQuery;
325
326 // Output log File
327 std::string lLogFilename;
328
329 // Xapian database name (directory of the index)
330 std::string lXapianDBNameStr;
331
332 // Type of search
333 unsigned short lSearchType;
334
335 // Xapian spelling error distance
336 unsigned short lSpellingErrorDistance;
337
338 // SQL database type
339 std::string lSQLDBTypeStr;
340
341 // SQL database connection string
342 std::string lSQLDBConnectionStr;
343
344 // Deployment number/version
345 OPENTREP::DeploymentNumber_T lDeploymentNumber;
346
347 // Log stream for the introduction part
348 std::ostringstream oIntroStr;
349
350 // Call the command-line option parser
351 const int lOptionParserStatus =
352 readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery,
353 lXapianDBNameStr, lSQLDBTypeStr, lSQLDBConnectionStr,
354 lDeploymentNumber, lLogFilename, lSearchType, oIntroStr);
355
356 if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
357 return 0;
358 }
359
360 // Set the log parameters
361 std::ofstream logOutputFile;
362 // open and clean the log outputfile
363 logOutputFile.open (lLogFilename.c_str());
364 logOutputFile.clear();
365
366 // Report the parameters
367 std::cout << oIntroStr.str();
368
369 // DEBUG
370 // Get the current time in UTC Timezone
371 boost::posix_time::ptime lTimeUTC =
372 boost::posix_time::second_clock::universal_time();
373 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
374 << __LINE__ << "]:Parameters:" << std::endl
375 << oIntroStr.str() << std::endl;
376
377 //
378 std::ostringstream oStr;
379 if (lSearchType == 0) {
380 // Initialise the context
381 const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
382 const OPENTREP::DBType lDBType (lSQLDBTypeStr);
383 const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
384 OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lXapianDBName,
385 lDBType, lSQLDBConnStr,
386 lDeploymentNumber);
387
388 // Check the directory of the Xapian database/index exists and is accessible
390 opentrepService.getFilePaths();
391 const OPENTREP::TravelDBFilePath_T& lActualXapianDBDir= lFPSet.second.first;
392 const bool lExistXapianDBDir =
393 opentrepService.checkXapianDBOnFileSystem (lActualXapianDBDir);
394 if (lExistXapianDBDir == false) {
395 std::ostringstream errorStr;
396 errorStr << "Error - The file-path to the Xapian database/index ('"
397 << lActualXapianDBDir
398 << "') does not exist or is not a directory." << std::endl;
399 errorStr << "\tThat usually means that the OpenTREP indexer "
400 << "(opentrep-indexer) has not been launched yet, "
401 << "or that it has operated on a different Xapian "
402 << "database/index file-path." << std::endl;
403 errorStr << "\tFor instance the Xapian database/index may have been "
404 << "created with a different deployment number ("
405 << lDeploymentNumber << " being the current deployment number)";
406 std::cerr << errorStr.str() << std::endl;
407 return -1;
408 }
409
410 // Parse the query and retrieve the places from Xapian only
411 const std::string& lOutput = parseQuery (opentrepService, lTravelQuery);
412 oStr << lOutput;
413
414 } else {
415 oStr << "Finding the airports closest to: " << lTravelQuery << std::endl;
416 }
417
418 //
419 std::cout << oStr.str();
420
421 // Get the current time in UTC Timezone
422 lTimeUTC = boost::posix_time::second_clock::universal_time();
423 logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
424 << __LINE__ << "]:Results:" << std::endl
425 << oStr.str() << std::endl;
426
427 // Close the Log outputFile
428 logOutputFile.close();
429
430 return 0;
431}
Interface for the OPENTREP Services.
bool checkXapianDBOnFileSystem(const TravelDBFilePath_T &) const
std::pair< const PORFilePath_T, const DBFilePathPair_T > FilePathSet_T
FilePathSet_T getFilePaths() const
NbOfMatches_T interpretTravelRequest(const std::string &iTravelQuery, LocationList_T &, WordList_T &)
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
std::list< Word_T > WordList_T
const std::string DEFAULT_OPENTREP_PG_CONN_STRING
std::string Word_T
std::string TravelQuery_T
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
std::list< Location > LocationList_T
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
unsigned short DeploymentNumber_T
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
unsigned short NbOfMatches_T
const int K_OPENTREP_EARLY_RETURN_STATUS
std::vector< std::string > WordList_T
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
std::string createStringFromWordList(const WordList_T &iWordList)
int main(int argc, char *argv[])
const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE
int readConfiguration(int argc, char *argv[], unsigned short &ioSpellingErrorDistance, std::string &ioQueryString, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, std::string &ioLogFilename, unsigned short &ioSearchType, std::ostringstream &oStr)
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-searcher.log")
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
std::string parseQuery(OPENTREP::OPENTREP_Service &ioOpentrepService, const OPENTREP::TravelQuery_T &iTravelQuery)
std::ostream & operator<<(std::ostream &os, const std::vector< T > &v)
const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE
const std::string K_OPENTREP_DEFAULT_QUERY_STRING("sna francisco rio de janero los angeles reykyavki")
Enumeration of database types.
Definition DBType.hpp:17
Structure modelling a (geographical) location.
Definition Location.hpp:25