Hall-D Software  alpha
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
XParsers.cpp
Go to the documentation of this file.
1 /*
2  * XParsers: service classes to support parsing of xml domuments
3  * using standard DOM parsing tools
4  *
5  * Class implementation
6  * September 21, 2003
7  * Richard Jones
8  *
9  * Bundled with the error handler classes are two utility functions:
10  *
11  * 1. parseInputDocument - parser implemented using the old-style
12  * XercesDOMParser interface based on the example code in
13  * $XERCESCROOT/samples/DOMPrint
14  *
15  * 2. buildDOMDocument - parser implemented using the w3c standard
16  * DOMBuilder interface based on the example code in
17  * $XERCESCROOT/samples/DOMCount
18  *
19  * Implementation Notes:
20  * ---------------------
21  * To prevent memory leaks, each of these parsers only retains a single
22  * document in memory at a time. The next call will destroy the DOM
23  * tree created on the previous call and return the resources to the
24  * pool. To prevent this behavior, call the parser with the argument
25  * perm=true, in which case the resulting DOMDocument will persist for
26  * the rest of the lifetime of the program.
27  *
28  *
29  * Modification Notes:
30  * --------------------
31  * 11/7/2012 DL
32  * Added EntityResolver class to keep track of all of the XML files
33  * pulled in by the parser so an md5 checksum could be performed.
34  * results are written to a FORTRAN function called "md5geom" so the
35  * checksum can be accessed programatically.
36  *
37  * 6/12/2012 DL
38  * Xerces 3 has done away with the DOMBuilder API, yet retains
39  * the DOMParser. It seems the code using the routines in this file
40  * looked to the pre-processor variable OLD_STYLE_XERCES_PARSER to
41  * decide whether to call parseInputDocument() or buildDOMDocument().
42  * The former being called if the variable was defined implying
43  * the former was likely to be deprecated. The simplest change that
44  * could be made to get this working with XERCES 3 was to turn the
45  * buildDOMDocument() routine into a wrapper for the parseInputDocument()
46  * routine. This is done below.
47  *
48  */
49 
50 #include <fstream>
51 using namespace std;
52 
53 #include <xercesc/sax/SAXParseException.hpp>
54 #include <xercesc/parsers/XercesDOMParser.hpp>
55 #include <xercesc/framework/LocalFileFormatTarget.hpp>
56 
57 #include "XParsers.hpp"
58 #include "XString.hpp"
59 #include "md5.h"
60 
62 
63 /*
64  * FIX_XERCES_getElementById_BUG does a store/load cycle at parsing time
65  * to fully instantiate entity references on the document tree.
66  * See xerces-c++ bug 12800 at http://nagoya.apache.org
67  */
68 #define FIX_XERCES_getElementById_BUG true
69 
70 #define X(str) XString(str).unicode_str()
71 #define S(str) str.c_str()
72 
73 xercesc::DOMDocument* parseInputDocument(const XString& xmlFile, bool keep)
74 {
75  static xercesc::XercesDOMParser* scratchParser=0;
76  xercesc::XercesDOMParser* parser;
77  if (keep)
78  {
79  parser = new xercesc::XercesDOMParser;
80  }
81  else if (scratchParser == 0)
82  {
83  parser = scratchParser = new xercesc::XercesDOMParser;
84  }
85  else
86  {
87  parser = scratchParser;
88  }
89 
90  MyEntityResolver myEntityResolver(xmlFile);
91 
92  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
93  parser->setCreateEntityReferenceNodes(false);
94  parser->setValidationSchemaFullChecking(true);
95  parser->setDoNamespaces(true);
96  parser->setDoSchema(true);
97  parser->setEntityResolver(&myEntityResolver);
98 
99  MyOwnErrorHandler errorHandler;
100  parser->setErrorHandler(&errorHandler);
101 
102  try
103  {
104  parser->parse(xmlFile.c_str());
105  myEntityResolver.GetMD5_checksum();
106  }
107  catch (const xercesc::XMLException& toCatch)
108  {
109  std::cerr
110  << "\nparseInputDocument: Error during parsing: '" << xmlFile
111  << "'\n" << "Exception message is: \n"
112  << toCatch.getMessage() << "\n" << std::endl;
113  return 0;
114  }
115  catch (const xercesc::DOMException& toCatch)
116  {
117  std::cerr
118  << "\nXParsers: Error during parsing: '" << xmlFile << "'\n"
119  << "Exception message is: \n"
120  << toCatch.msg << "\n" << std::endl;
121  xercesc::XMLPlatformUtils::Terminate();
122  return 0;
123  }
124  catch (...)
125  {
126  std::cerr
127  << "\nparseInputDocument: Unexpected exception during parsing: '"
128  << xmlFile << "'\n";
129  xercesc::XMLPlatformUtils::Terminate();
130  return 0;
131  }
132 
133  if (errorHandler.getSawErrors())
134  {
135  std::cerr << "\nErrors occured, no output available\n" << std::endl;
136  return 0;
137  }
138 
139  return parser->getDocument();
140 }
141 
142 xercesc::DOMDocument* buildDOMDocument(const XString& xmlFile, bool keep)
143 {
144 return parseInputDocument(xmlFile, keep);
145 #if 0 // below no longer works in XERCES 3
146 
147  xercesc::DOMImplementation *impl =
148  xercesc:: DOMImplementationRegistry::getDOMImplementation(X("LS"));
149  static xercesc::DOMBuilder* scratchBuilder=0;
150  xercesc::DOMBuilder* builder;
151  if (keep)
152  {
153  builder = ((xercesc::DOMImplementationLS*)impl)->createDOMBuilder(
154  xercesc::DOMImplementationLS::MODE_SYNCHRONOUS, 0);
155  }
156  else if (scratchBuilder == 0)
157  {
158  builder = scratchBuilder = ((xercesc::DOMImplementationLS*)impl)->
159  createDOMBuilder(xercesc::DOMImplementationLS::MODE_SYNCHRONOUS,
160  0);
161  }
162  else
163  {
164  builder = scratchBuilder;
165  }
166  XString tmpFileS = ".tmp-"+xmlFile.basename();
167 
168  builder->setFeature(xercesc::XMLUni::fgDOMValidation, true);
169  builder->setFeature(xercesc::XMLUni::fgDOMNamespaces, true);
170  builder->setFeature(xercesc::XMLUni::fgDOMDatatypeNormalization, true);
171  builder->setFeature(xercesc::XMLUni::fgDOMEntities, false);
172  builder->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, true);
173  builder->setFeature(xercesc::XMLUni::fgXercesSchema, true);
174 
175  MyDOMErrorHandler errHandler;
176  builder->setErrorHandler(&errHandler);
177 
178  xercesc::DOMDocument* doc = 0;
179 
180  try {
181  builder->resetDocumentPool();
182  doc = builder->parseURI(xmlFile.c_str());
183 #if defined FIX_XERCES_getElementById_BUG
184  xercesc::DOMWriter* writer = ((xercesc::DOMImplementationLS*)impl)->
185  createDOMWriter();
186  xercesc::LocalFileFormatTarget* lfft =
187  new xercesc::LocalFileFormatTarget(X(tmpFileS));
188  writer->writeNode(lfft,*(doc->getDocumentElement()));
189  delete lfft;
190  delete writer;
191  builder->resetDocumentPool();
192  doc = builder->parseURI(X(tmpFileS));
193 #endif
194  }
195  catch (const xercesc::XMLException& toCatch) {
196  std::cout << "Exception message is: \n" << toCatch.getMessage() << "\n";
197  return 0;
198  }
199  catch (const xercesc::DOMException& toCatch) {
200  std::cout << "Exception message is: \n" << toCatch.msg << "\n";
201  return 0;
202  }
203  catch (...) {
204  std::cout << "Unexpected Exception \n" ;
205  return 0;
206  }
207 
208  if (errHandler.getSawErrors())
209  {
210  std::cerr << "\nErrors occured, no output available\n" << std::endl;
211  return 0;
212  }
213 
214  return doc;
215 #endif // 0
216 }
217 
219  fSawErrors(false)
220 {
221 }
222 
224 {
225 }
226 
227 // Overrides of the SAX ErrorHandler interface
228 
229 void MyOwnErrorHandler::error(const xercesc::SAXParseException& e)
230 {
231  fSawErrors = true;
232  XString systemId(e.getSystemId());
233  XString message(e.getMessage());
234  std::cerr
235  << "\nparseInputDocument: Error at file " << S(systemId)
236  << ", line " << e.getLineNumber()
237  << ", char " << e.getColumnNumber()
238  << "\n Message: " << S(message) << std::endl;
239 }
240 
241 void MyOwnErrorHandler::fatalError(const xercesc::SAXParseException& e)
242 {
243  fSawErrors = true;
244  XString systemId(e.getSystemId());
245  XString message(e.getMessage());
246  std::cerr
247  << "\nparseInputDocument: Fatal Error at file " << S(systemId)
248  << ", line " << e.getLineNumber()
249  << ", char " << e.getColumnNumber()
250  << "\n Message: " << S(message) << std::endl;
251 }
252 
253 void MyOwnErrorHandler::warning(const xercesc::SAXParseException& e)
254 {
255  XString systemId(e.getSystemId());
256  XString message(e.getMessage());
257  std::cerr
258  << "\nparseInputDocument: Warning at file " << S(systemId)
259  << ", line " << e.getLineNumber()
260  << ", char " << e.getColumnNumber()
261  << "\n Message: " << S(message) << std::endl;
262 }
263 
265 {
266 }
267 
269 
270  fSawErrors(false)
271 {
272 }
273 
275 {
276 }
277 
278 // MyDOMHandlers: Overrides of the DOM ErrorHandler interface
279 
280 bool MyDOMErrorHandler::handleError(const xercesc::DOMError& domError)
281 {
282  fSawErrors = true;
283  if (domError.getSeverity() == xercesc::DOMError::DOM_SEVERITY_WARNING)
284  std::cerr << "\nWarning at file ";
285  else if (domError.getSeverity() == xercesc::DOMError::DOM_SEVERITY_ERROR)
286  std::cerr << "\nError at file ";
287  else
288  std::cerr << "\nFatal Error at file ";
289 
290  std::cerr
291  << XString(domError.getLocation()->getURI()).c_str()
292  << ", line " << domError.getLocation()->getLineNumber()
293  << ", char " << domError.getLocation()->getColumnNumber()
294  << "\n Message: " << XString(domError.getMessage()).c_str()
295  << std::endl;
296 
297  return true;
298 }
299 
301 {
302  fSawErrors = false;
303 }
304 
305 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
306 
307 //----------------------------------
308 // MyEntityResolver (constructor)
309 //----------------------------------
311 {
312  xml_filenames.push_back(xmlFile);
313 
314  string fname = xmlFile;
315  size_t pos = fname.find_last_of('/');
316  if(pos != string::npos){
317  path = fname.substr(0,pos) + "/";
318  }
319 }
320 
321 //----------------------------------
322 // MyEntityResolver (destructor)
323 //----------------------------------
325 {
326 
327 }
328 
329 //----------------------------------
330 // resolveEntity
331 //----------------------------------
332 xercesc::InputSource* MyEntityResolver::resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId)
333 {
334  /// This method gets called from the xerces parser each time it
335  /// opens a file (except for the top-level file). For each of these,
336  /// record the name of the file being opened, then just return NULL
337  /// to have xerces handle opening the file in the normal way.
338 
339  // Do some backflips to get strings into std::string format
340  std::string my_publicId = "";
341  std::string my_systemId = "";
342  if(publicId){
343  char *my_publicId_ptr = xercesc::XMLString::transcode(publicId);
344  my_publicId = my_publicId_ptr;
345  xercesc::XMLString::release(&my_publicId_ptr);
346  }
347  if(systemId){
348  char *my_systemId_ptr = xercesc::XMLString::transcode(systemId);
349  my_systemId = my_systemId_ptr;
350  xercesc::XMLString::release(&my_systemId_ptr);
351  }
352  //std::cerr<<"publicId="<<my_publicId<<" systemId="<<my_systemId<<std::endl;
353 
354  // The systemId seems to be the one we want
355  xml_filenames.push_back(path + my_systemId);
356 
357  return NULL; // have xerces handle this using its defaults
358 }
359 
360 //----------------------------------
361 // GetXMLFilenames
362 //----------------------------------
363 std::vector<std::string> MyEntityResolver::GetXMLFilenames(void)
364 {
365  return xml_filenames;
366 }
367 
368 //----------------------------------
369 // GetMD5_checksum
370 //----------------------------------
372 {
373  /// This will calculate an MD5 checksum using all of the files currently
374  /// in the list of XML files. To do this, it opens each file and reads it
375  /// in, in its entirety, updating the checksum as it goes. The checksum is
376  /// returned as a hexadecimal string.
377 
378  md5_state_t pms;
379  md5_init(&pms);
380  for(unsigned int i=0; i<xml_filenames.size(); i++){
381 
382  //std::cerr<<".... Adding file to MD5 checksum : " << xml_filenames[i] << std::endl;
383 
384  ifstream ifs(xml_filenames[i].c_str());
385  if(!ifs.is_open())continue;
386 
387  // get length of file:
388  ifs.seekg (0, ios::end);
389  unsigned int length = ifs.tellg();
390  ifs.seekg (0, ios::beg);
391 
392  // allocate memory:
393  char *buff = new char [length];
394 
395  // read data as a block:
396  ifs.read (buff,length);
397  ifs.close();
398 
399  md5_append(&pms, (const md5_byte_t *)buff, length);
400 
401  delete[] buff;
402 
403  //std::cerr<<".... Adding file to MD5 checksum : " << xml_filenames[i] << " (size=" << length << ")" << std::endl;
404  }
405 
406  md5_byte_t digest[16];
407  md5_finish(&pms, digest);
408 
409  char hex_output[16*2 + 1];
410  for(int di = 0; di < 16; ++di) sprintf(hex_output + di * 2, "%02x", digest[di]);
411 
412  return last_md5_checksum = hex_output;
413 }
414 
#define X(str)
Definition: XParsers.cpp:70
MyEntityResolver(const XString &xmlFile)
Definition: XParsers.cpp:310
char string[256]
sprintf(text,"Post KinFit Cut")
xercesc::DOMDocument * buildDOMDocument(const XString &xmlFile, bool keep)
Definition: XParsers.cpp:142
void md5_init(md5_state_t *pms)
Definition: md5.c:313
xercesc::DOMDocument * parseInputDocument(const XString &xmlFile, bool keep)
Definition: XParsers.cpp:73
std::vector< std::string > xml_filenames
Definition: XParsers.hpp:95
void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
Definition: md5.c:323
std::vector< std::string > GetXMLFilenames(void)
Definition: XParsers.cpp:363
TEllipse * e
void error(const xercesc::SAXParseException &e)
Definition: XParsers.cpp:229
std::string path
Definition: XParsers.hpp:96
bool getSawErrors() const
Definition: XParsers.hpp:75
bool getSawErrors() const
Definition: XParsers.hpp:52
void fatalError(const xercesc::SAXParseException &e)
Definition: XParsers.cpp:241
unsigned char md5_byte_t
Definition: md5.h:63
void md5_finish(md5_state_t *pms, md5_byte_t digest[16])
Definition: md5.c:361
void warning(const xercesc::SAXParseException &e)
Definition: XParsers.cpp:253
bool handleError(const xercesc::DOMError &domError)
Definition: XParsers.cpp:280
std::string GetMD5_checksum(void)
Definition: XParsers.cpp:371
xercesc::InputSource * resolveEntity(const XMLCh *const publicId, const XMLCh *const systemId)
Definition: XParsers.cpp:332
std::string last_md5_checksum
Definition: XParsers.cpp:61
#define S(str)
Definition: XParsers.cpp:71
const XString basename() const
Definition: XString.cpp:78