Hall-D Software  alpha
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hddm-xml.cpp
Go to the documentation of this file.
1 /*
2  * hddm-xml : tool that reads in a HDDM document (Hall D Data Model)
3  * and translates it into plain-text xml.
4  *
5  * Version 1.3 - Richard Jones, July 2014.
6  * - Added support for input hddm streams with additional features
7  * provided through the c++ API, including on-the-fly compression with
8  * zlib and bzlib2, and per-record crc32 integrity checks.
9  *
10  * Version 1.2 - Richard Jones, December 2005.
11  * - Updated code to use STL strings and vectors instead of old c-style
12  * pre-allocated arrays and strXXX functions.
13  * - Moved functions into classes grouped by function for better clarity.
14  * - Introduced the XStream class library instead of the direct interface
15  * to the rpc/xdr c-library function. This also gives access to a nice
16  * integrated set of compression/decompression streambuf classes.
17  *
18  * Version 1.1 - Richard Jones, September 2003.
19  * - Updated code to work with the new DOM-2 implementation Xerces-c
20  * from apache.org. Significant changes have taken place in the API
21  * since DOM-1.
22  * - Added support for new types "long" (int64), "string" (char arrays of
23  * arbitrary length), and "anyURI" (special case of string).
24  * - Switched from native encoding to the use of the XDR library to make
25  * hddm files machine-independent.
26  *
27  * Original version - Richard Jones, June 4 2001.
28  *
29  *
30  * Programmer's Notes:
31  * -------------------
32  * 1. The output from hddm-xml is a well-formed xml document.
33  *
34  * 2. The hddm stream contains a xml header that functions as a prototype
35  * of the xml output.
36  *
37  * 3. This tool can read any hddm stream. No recompilation is required.
38  *
39  * 4. The code has been tested with the xerces-c DOM implementation from
40  * Apache, and is intended to be used with the xerces-c library.
41  *
42  * 5. Output is sent by default to stdout and can be changed with the
43  * -o option.
44  */
45 
46 // #define VERBOSE_HDDM_LOGGING 1
47 #define BAD_CRC_IS_ONLY_WARNING 1
48 
49 #include <xercesc/util/PlatformUtils.hpp>
50 #include <xercesc/dom/DOMNamedNodeMap.hpp>
51 
52 #include "XParsers.hpp"
53 #include "XString.hpp"
54 
55 #include <assert.h>
56 #include <stdlib.h>
57 #include <stdio.h>
58 #include <rpc/rpc.h>
59 #include <unistd.h>
60 #include <xstream/z.h>
61 #include <xstream/bz.h>
62 #include <xstream/xdr.h>
63 #include <xstream/digest.h>
64 
65 #include <iostream>
66 #include <fstream>
67 #include <sstream>
68 
69 #include "particleType.h"
70 
71 
72 #define X(str) XString(str).unicode_str()
73 #define S(str) str.c_str()
74 
75 using namespace xercesc;
76 
78 
79 class XMLmaker
80 {
81  public:
82  std::ofstream xout;
83 
84  XMLmaker() {};
85  ~XMLmaker() {};
86 
87  void writeXML(const XString& s);
88  void constructXML(xstream::xdr::istream *ifx, DOMElement* el,
89  int size, int depth);
90 };
91 
92 class istreambuffer : public std::streambuf {
93  public:
94  istreambuffer(char* buffer, std::streamsize bufferLength) {
95  setg(buffer, buffer, buffer + bufferLength);
96  }
97 
98  std::streampos tellg() {
99  return gptr() - eback();
100  }
101 
102  void seekg(std::streampos pos) {
103  reset();
104  gbump(pos);
105  }
106 
107  int size() {
108  return egptr() - gptr();
109  }
110 
111  void reset() {
112  char *gbegin = eback();
113  char *gend = egptr();
114  setg(gbegin, gbegin, gend);
115  }
116 
117  char *getbuf() {
118  return eback();
119  }
120 };
121 
122 class ostreambuffer : public std::streambuf {
123  public:
124  ostreambuffer(char* buffer, std::streamsize bufferLength) {
125  setp(buffer, buffer + bufferLength);
126  }
127 
128  std::streampos tellp() {
129  return pptr() - pbase();
130  }
131 
132  void seekp(std::streampos pos) {
133  reset();
134  pbump(pos);
135  }
136 
137  int size() {
138  return pptr() - pbase();
139  }
140 
141  void reset() {
142  char *pbegin = pbase();
143  char *pend = epptr();
144  setp(pbegin, pend);
145  }
146 
147  char *getbuf() {
148  return pbase();
149  }
150 };
151 
152 void usage()
153 {
154  std::cerr
155  << "\nUsage:\n"
156  << " hddm-xml [-n <count>] [-o <filename>] [HDDM file]\n\n"
157  << "Options:\n"
158  << " -o <filename> write to <filename>.xml"
159  << " -n <count> limit output to <count> records"
160  << std::endl;
161 }
162 
163 
164 int main(int argC, char* argV[])
165 {
166  XString xFilename;
167 
168  try
169  {
170  XMLPlatformUtils::Initialize();
171  }
172  catch (const XMLException* toCatch)
173  {
174  XString msg(toCatch->getMessage());
175  std::cerr
176  << "hddm-xml: Error during initialization! :\n"
177  << S(msg) << std::endl;
178  return 1;
179  }
180 
181  int reqcount=-1;
182  int argInd;
183  for (argInd = 1; argInd < argC; argInd++)
184  {
185  if (argV[argInd][0] != '-')
186  {
187  break;
188  }
189  else if (strcmp(argV[argInd],"-o") == 0)
190  {
191  xFilename = argV[++argInd];
192  }
193  else if (strcmp(argV[argInd],"-n") == 0)
194  {
195  if (!sscanf(argV[++argInd],"%d",&reqcount))
196  {
197  usage();
198  return 1;
199  }
200  }
201  else
202  {
203  usage();
204  return 1;
205  }
206  }
207 
208  XString hddmFile;
209  std::istream* ifs;
210  if (argInd == argC)
211  {
212  ifs = &std::cin;
213  }
214  else if (argInd == argC - 1)
215  {
216  hddmFile = XString(argV[argInd]);
217  ifs = new std::ifstream(hddmFile.c_str());
218  }
219  else
220  {
221  usage();
222  return 1;
223  }
224  if (!ifs->good())
225  {
226  std::cerr
227  << "hddm-xml: Error opening input stream " << hddmFile << std::endl;
228  exit(1);
229  }
230  std::ostringstream tmpFileStr;
231  tmpFileStr << "tmp" << getpid();
232  std::ofstream ofs(tmpFileStr.str().c_str());
233  if (! ofs.is_open())
234  {
235  std::cerr
236  << "hddm-xml: Error opening temp file " << tmpFileStr.str() << std::endl;
237  exit(2);
238  }
239 
240  ofs << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
241  XString xmlHeader;
242  XString line;
243  if (getline(*ifs,line))
244  {
245  if (line.substr(0,5) == "<?xml")
246  {
247  std::cerr
248  << "hddm-xml: Error reading input stream " << hddmFile
249  << std::endl;
250  std::cerr
251  << "Input file appears to be an xml document!" << std::endl;
252  exit(1);
253  }
254  else if (line.substr(0,5) == "<HDDM")
255  {
256  xmlHeader = line + "\n";
257  ofs << line;
258  }
259  else
260  {
261  std::cerr
262  << "hddm-xml: Input stream does not contain valid hddm header"
263  << std::endl;
264  exit(1);
265  }
266  }
267  else
268  {
269  std::cerr
270  << "hddm-xml: Error reading from input stream " << hddmFile
271  << std::endl;
272  exit(1);
273  }
274  while (getline(*ifs,line))
275  {
276  ofs << line;
277  if (line == "</HDDM>")
278  {
279  break;
280  }
281  }
282  ofs.close();
283 
284 #if defined OLD_STYLE_XERCES_PARSER
285  DOMDocument* document = parseInputDocument(tmpFileStr.str().c_str(),false);
286 #else
287  DOMDocument* document = buildDOMDocument(tmpFileStr.str().c_str(),false);
288 #endif
289  if (document == 0)
290  {
291  std::cerr
292  << "hddm-xml : Error parsing HDDM document, "
293  << "cannot continue" << std::endl;
294  return 1;
295  }
296  unlink(tmpFileStr.str().c_str());
297 
298  DOMElement* rootEl = document->getDocumentElement();
299  XString rootS(rootEl->getTagName());
300  if (rootS != "HDDM")
301  {
302  std::cerr
303  << "hddm-xml error: root element of input document is "
304  << "\"" << S(rootS) << "\", expected \"HDDM\""
305  << std::endl;
306  return 1;
307  }
308 
309  XMLmaker builder;
310  if (xFilename.size())
311  {
312  XString fname(xFilename + ".xml");
313  builder.xout.open(fname.c_str());
314  }
315 
316  builder.writeXML("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
317  builder.writeXML(xmlHeader);
318 
319  int event_buffer_size;
320  char *event_buffer = new char[event_buffer_size = 1000000];
321  istreambuffer *isbuf = new istreambuffer(event_buffer,event_buffer_size);
323  int integrity_check_mode = 0;
324  int compression_mode = 0;
325  while (reqcount && ifs->good())
326  {
327  DOMNodeList* contList = rootEl->getChildNodes();
328  int contLength = contList->getLength();
329  int tsize;
330  ifs->read(event_buffer,4);
331  if (ifs->eof()) {
332  break;
333  }
334  isbuf->reset();
335  *ifx >> tsize;
336 #ifdef VERBOSE_HDDM_LOGGING
337  XString tnameS(rootEl->getTagName());
338  std::cerr << "hddm-xml : tag " << S(tnameS)
339  << " found with size " << tsize
340  << std::endl;
341 #endif
342  if (tsize <= 0)
343  {
344  break;
345  }
346  else if (tsize == 1) {
347  int size, format, flags;
348  ifs->read(event_buffer+4,4);
349  *ifx >> size;
350  ifs->read(event_buffer+8,size);
351  *ifx >> format >> flags;
352  int compression_flags = flags & 0xf0;
353  int integrity_flags = flags & 0x0f;
354  std::streambuf *fin_sb = 0;
355  xstream::z::istreambuf *zin_sb = 0;
356  xstream::bz::istreambuf *bzin_sb = 0;
357  int *leftovers = new int[100];
358  int sizeof_leftovers = sizeof(int[100]);
359  leftovers[0] = 0;
360  if (compression_flags == compression_mode) {
361  fin_sb = ifs->rdbuf();
362  }
363  else if (size == 8 && format == 0 && compression_flags == 0x10) {
364  if (compression_mode == 0x20) {
365  bzin_sb = (xstream::bz::istreambuf*)ifs->rdbuf();
366  }
367  compression_mode = compression_flags;
368  zin_sb = new xstream::z::istreambuf(ifs->rdbuf(),
369  leftovers, sizeof_leftovers);
370  ifs->rdbuf(zin_sb);
371  if (bzin_sb != 0)
372  delete bzin_sb;
373  }
374  else if (size == 8 && format == 0 && compression_flags == 0x20) {
375  if (compression_mode == 0x10) {
376  zin_sb = (xstream::z::istreambuf*)ifs->rdbuf();
377  }
378  compression_mode = compression_flags;
379  bzin_sb = new xstream::bz::istreambuf(ifs->rdbuf(),
380  leftovers, sizeof_leftovers);
381  ifs->rdbuf(bzin_sb);
382  if (zin_sb != 0)
383  delete zin_sb;
384  }
385  else {
386  if (compression_mode == 0x20) {
387  bzin_sb = (xstream::bz::istreambuf*)ifs->rdbuf();
388  fin_sb = bzin_sb->get_streambuf();
389  }
390  else if (compression_mode == 0x10) {
391  zin_sb = (xstream::z::istreambuf*)ifs->rdbuf();
392  fin_sb = zin_sb->get_streambuf();
393  }
394  compression_mode = compression_flags;
395  ifs->rdbuf(fin_sb);
396  if (zin_sb != 0)
397  delete zin_sb;
398  if (bzin_sb != 0)
399  delete bzin_sb;
400  }
401  if (size == 8 && format == 0 && integrity_flags == 0x0) {
402  integrity_check_mode = 0;
403  }
404  else if (size == 8 && format == 0 && integrity_flags == 0x1) {
405  integrity_check_mode = 1;
406  }
407  else {
408  std::cerr << "hddm-xml error: unrecognized stream modifier"
409  " encountered, this stream is no longer readable."
410  << std::endl;
411  break;
412  }
413  continue;
414  }
415  else if (tsize+4 > event_buffer_size) {
416  delete ifx;
417  delete isbuf;
418  char *new_buffer = new char[event_buffer_size = tsize+1000];
419  isbuf = new istreambuffer(new_buffer,event_buffer_size);
420  ifx = new xstream::xdr::istream(isbuf);
421  memcpy(new_buffer,event_buffer,4);
422  *ifx >> tsize;
423  delete[] event_buffer;
424  event_buffer = new_buffer;
425  }
426  ifs->read(event_buffer+4,tsize);
427  --reqcount;
428 
429  if (integrity_check_mode == 1) {
430  char crcbuf[10];
431  istreambuffer sbuf(crcbuf,10);
432  xstream::xdr::istream xstr(&sbuf);
433  unsigned int recorded_crc;
434  ifs->read(crcbuf,4);
435  xstr >> recorded_crc;
436  xstream::digest::crc32 crc;
437  std::ostream out(&crc);
438  out.write(event_buffer,tsize+4);
439  out.flush();
440  if (crc.digest() != recorded_crc) {
441 #if BAD_CRC_IS_ONLY_WARNING
442  static int bad_crc_warning_needed = true;
443  char errmsg[] =
444  "WARNING: data integrity crc check failed on input.\n"
445  "This may be the result of a bug in the xstream library\n"
446  "if you are analyzing a data file that was generated by\n"
447  "code prior to svn rev 18530. If this concerns you, \n"
448  "regenerate the file using a newer build of the sim-recon\n"
449  "tools and it should go away.\n";
450  if (bad_crc_warning_needed) {
451  std::cerr << errmsg << std::endl;
452  bad_crc_warning_needed = false;
453  }
454 #else
455  std::cerr << "hddm-xml error: crc32 check error on input stream"
456  " encountered, this stream is no longer readable."
457  << std::endl;
458  break;
459 #endif
460  }
461  }
462 
463  for (int c = 0; c < contLength; c++)
464  {
465  DOMNode* cont = contList->item(c);
466  short type = cont->getNodeType();
467  if (type == DOMNode::ELEMENT_NODE)
468  {
469  DOMElement* contEl = (DOMElement*) cont;
470  int size;
471  *ifx >> size;
472 #ifdef VERBOSE_HDDM_LOGGING
473  XString cnameS(contEl->getTagName());
474  std::cerr << "hddm-xml : top-level tag " << S(cnameS)
475  << " found with size " << size
476  << std::endl;
477 #endif
478  if (size > 0)
479  {
480  builder.constructXML(ifx,contEl,size,1);
481  }
482  else {
483  XString repS(contEl->getAttribute(X("minOccurs")));
484  int rep = (repS == "")? 1 : atoi(S(repS));
485  if (rep != 0) {
486  XString conameS(contEl->getTagName());
487  std::cerr << "hddm-xml warning: top-level tag " << S(conameS)
488  << " found with zero size "
489  << "inside an event with size " << tsize
490  << " continue? [y/n] ";
491  std::string ans;
492  std::cin >> ans;
493  if (ans[0] != 'y' && ans[0] != 'Y') {
494  exit(5);
495  }
496  }
497  }
498  }
499  }
500  }
501 
502  builder.writeXML("</HDDM>\n");
503 
504  if (ifs != &std::cin)
505  {
506  ((std::ifstream*)ifs)->close();
507  }
508  XMLPlatformUtils::Terminate();
509  return 0;
510 }
511 
512 /* write a string to xml output stream, either stdout or a file */
513 
515 {
516  if (xout.is_open())
517  {
518  xout << s;
519  }
520  else
521  {
522  std::cout << s;
523  }
524 }
525 
526 /* Generate the output xml document according the DOM;
527  * at entry the buffer pointer bp points the the word after the word count
528  */
529 
531  DOMElement* el, int size, int depth)
532 {
533  XString tagS(el->getTagName());
534  XString repS(el->getAttribute(X("maxOccurs")));
535  int rep = (repS == "unbounded")? INT_MAX :
536  (repS == "")? 1 :
537  atoi(S(repS));
538  if (explicit_repeat_count && rep > 1)
539  {
540  *ifx >> rep;
541  size -= 4;
542  }
543 
544  int r;
545  for (r = 0; r < rep && size > 0; r++)
546  {
547  for (int d = 0; d < depth; d++)
548  {
549  writeXML(" ");
550  }
551  writeXML("<");
552  writeXML(S(tagS));
553  DOMNamedNodeMap* attrList = el->getAttributes();
554  int listLength = attrList->getLength();
555  for (int a = 0; a < listLength; a++)
556  {
557  XString nameS(attrList->item(a)->getNodeName());
558  XString typeS(attrList->item(a)->getNodeValue());
559  std::ostringstream attrStr;
560  if (typeS == "int")
561  {
562  int32_t value;
563  *ifx >> value;
564  size -= 4;
565  attrStr << " " << nameS << "=\"" << value << "\"";
566  }
567  else if (typeS == "long")
568  {
569  int64_t value;
570  *ifx >> value;
571  size -= 8;
572  attrStr << " " << nameS << "=\"" << value << "\"";
573  }
574  else if (typeS == "float")
575  {
576  float value;
577  *ifx >> value;
578  size -= 4;
579  attrStr << " " << nameS << "=\"" << value << "\"";
580  }
581  else if (typeS == "double")
582  {
583  double value;
584  *ifx >> value;
585  size -= 8;
586  attrStr << " " << nameS << "=\"" << value << "\"";
587  }
588  else if (typeS == "boolean")
589  {
590  bool_t value;
591  *ifx >> value;
592  size -= 4;
593  attrStr << " " << nameS << "=\"" << value << "\"";
594  }
595  else if (typeS == "Particle_t")
596  {
597  int32_t value;
598  *ifx >> value;
599  size -= 4;
600  attrStr << " " << nameS << "=\"" << ParticleType((Particle_t)value) << "\"";
601  }
602  else if (typeS == "string" || typeS == "anyURI")
603  {
604  std::string value;
605  *ifx >> value;
606  int strsize = value.size();
607  size -= strsize + 4 + ((strsize % 4)? 4-(strsize % 4) : 0);
608  attrStr << " " << nameS << "=\"" << value << "\"";
609  }
610  else if (nameS == "minOccurs" || nameS == "maxOccurs")
611  {
612  ;
613  }
614  else
615  {
616  attrStr << " " << nameS << "=\"" << typeS << "\"";
617  }
618  writeXML(attrStr.str());
619  }
620 
621  DOMNodeList* contList = el->getChildNodes();
622  int contLength = contList->getLength();
623  if (contLength > 1)
624  {
625  writeXML(">\n");
626  }
627  else
628  {
629  writeXML(" />\n");
630  }
631 
632  for (int c = 0; c < contLength; c++)
633  {
634  DOMNode* cont = contList->item(c);
635  short type = cont->getNodeType();
636  if (type == DOMNode::ELEMENT_NODE)
637  {
638  DOMElement* contEl = (DOMElement*) cont;
639  int csize;
640  *ifx >> csize;
641  size -= 4;
642 #ifdef VERBOSE_HDDM_LOGGING
643  XString cnameS(contEl->getTagName());
644  std::cerr << "hddm-xml : tag " << S(cnameS)
645  << " found with size " << csize
646  << std::endl;
647 #endif
648  if (csize > 0) {
649  constructXML(ifx,contEl,csize,depth +1);
650  size -= csize;
651  }
652 #ifdef VERBOSE_HDDM_LOGGING
653  else {
654  XString irepS(contEl->getAttribute(X("minOccurs")));
655  int irep = (irepS == "")? 1 : atoi(S(irepS));
656  if (irep != 0) {
657  XString conameS(contEl->getTagName());
658  std::cerr << "hddm-xml warning: tag " << S(conameS)
659  << " found with zero size, "
660  << "continue? [y/n] ";
661  std::string ans;
662  std::cin >> ans;
663  if (ans[0] != 'y' && ans[0] != 'Y') {
664  exit(5);
665  }
666  }
667  }
668 #endif
669  }
670  }
671 
672  if (contLength > 1)
673  {
674  for (int d = 0; d < depth; d++)
675  {
676  writeXML(" ");
677  }
678  XString endTag("</"+tagS+">\n");
679  writeXML(endTag);
680  }
681  }
682  if (size != 0) {
683  std::cerr << "hddm-xml : size mismatch in tag " << S(tagS)
684  << ", remainder is " << size
685  << ", cannot continue." << std::endl;
686  exit(5);
687  }
688  else if (explicit_repeat_count && r != rep) {
689  std::cerr << "hddm-xml : repeat count mismatch in tag " << S(tagS)
690  << ", expected " << rep << " but saw " << r
691  << ", cannot continue." << std::endl;
692  exit(5);
693  }
694 }
C++ iostream like interface to read and write xdr streams.
#define X(str)
Definition: hddm-xml.cpp:72
double tsize
Definition: DIRC_digihit.C:36
char * getbuf()
Definition: hddm-xml.cpp:147
char string[256]
#define c
xercesc::DOMDocument * buildDOMDocument(const XString &xmlFile, bool keep)
Definition: XParsers.cpp:142
std::streampos tellp()
Definition: hddm-xml.cpp:128
C++ objects to calculate digests of data.
void usage()
Definition: t_rest.cxx:114
static char * ParticleType(Particle_t p)
Definition: particleType.h:142
xercesc::DOMDocument * parseInputDocument(const XString &xmlFile, bool keep)
Definition: XParsers.cpp:73
void reset()
Definition: hddm-xml.cpp:111
std::streampos tellg()
Definition: hddm-xml.cpp:98
#define S(str)
Definition: hddm-xml.cpp:73
ostreambuffer(char *buffer, std::streamsize bufferLength)
Definition: hddm-xml.cpp:124
void reset()
Definition: hddm-xml.cpp:141
void writeXML(const XString &s)
Definition: hddm-xml.cpp:514
C++ streambuf interface to read and write file formats supported by Zlib.
~XMLmaker()
Definition: hddm-xml.cpp:85
XMLmaker()
Definition: hddm-xml.cpp:84
istreambuffer(char *buffer, std::streamsize bufferLength)
Definition: hddm-xml.cpp:94
C++ streambuf interface to read and write bzip2 streams.
std::ofstream xout
Definition: hddm-xml.cpp:82
Input xdr stream class.
Definition: xdr.h:127
void seekp(std::streampos pos)
Definition: hddm-xml.cpp:132
void seekg(std::streampos pos)
Definition: hddm-xml.cpp:102
int explicit_repeat_count
Definition: hddm-root.cpp:43
void constructXML(xstream::xdr::istream *ifx, DOMElement *el, int size, int depth)
Definition: hddm-xml.cpp:530
int main(int argc, char *argv[])
Definition: gendoc.cc:6
char * getbuf()
Definition: hddm-xml.cpp:117
Particle_t
Definition: particleType.h:12