Hall-D Software  alpha
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
xml-hddm.cpp
Go to the documentation of this file.
1 /*
2  * xml-hddm : tool that reads in a plain-text xml data document
3  * and writes it out as a hddm (Hall D Data Model)
4  * following the template provided.
5  *
6  * Version 1.2 - Richard Jones, December 2005.
7  * - Updated code to use STL strings and vectors instead of old c-style
8  * pre-allocated arrays and strXXX functions.
9  * - Moved functions into classes grouped by function for better clarity.
10  * - Introduced the XStream class library instead of the direct interface
11  * to the rpc/xdr c-library function. This also gives access to a nice
12  * integrated set of compression/decompression streambuf classes.
13  *
14  * Version 1.1 - Richard Jones, September 2003.
15  * - Updated code to work with the new DOM-2 implementation Xerces-c
16  * from apache.org. Significant changes have taken place in the API
17  * since DOM-1.
18  * - Added support for new types "long" (int64), "string" (char arrays of
19  * arbitrary length), and "anyURI" (special case of string).
20  * - Switched from native encoding to the use of the XDR library to make
21  * hddm files machine-independent.
22  *
23  * Original version - Richard Jones, October 1 2001.
24  *
25  *
26  * Programmer's Notes:
27  * -------------------
28  * 1. The output from xml-hddm is a valid hddm data stream.
29  *
30  * 2. Two inputs are required: the input xml data document, and a xml
31  * template describing the structure of the data in the document.
32  * Both documents must be well-formed. In addition, the data document
33  * must conform to the hddm specification document. Only if both of
34  * these requirements are met is it possible to insure that the data
35  * document can be expressed in a hddm stream.
36  *
37  * 3. The code has been tested with the xerces-c DOM implementation from
38  * Apache, and is intended to be used with the xerces-c library.
39  *
40  * 4. Output is sent by default to stdout and can be changed with the
41  * -o option.
42  */
43 
44 #include <xercesc/util/PlatformUtils.hpp>
45 #include <xercesc/dom/DOMNamedNodeMap.hpp>
46 
47 #include "XParsers.hpp"
48 #include "XString.hpp"
49 
50 #include <assert.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <rpc/rpc.h>
54 #include <unistd.h>
55 
56 #include <fstream>
57 #include <string>
58 #include <sstream>
59 #include <xstream/xdr.h>
60 
61 #include "particleType.h"
62 
63 using namespace xercesc;
64 
65 #define X(str) XString(str).unicode_str()
66 #define S(str) str.c_str()
67 
69 
70 class HDDMmaker
71 {
72  public:
73  std::ostream* ofs;
74 
75  HDDMmaker() {};
76  ~HDDMmaker() {};
77 
78  void constructDocument(DOMElement* el);
79  void outputStream(DOMElement* thisEl, DOMElement* modelEl,
80  std::ostream& ofs);
81 };
82 
83 void usage()
84 {
85  std::cerr
86  << "\nUsage:\n"
87  << " xml-hddm [-o <filename>] -t <template> [input file]\n\n"
88  << "Options:\n"
89  << " -t <template> read template from <template>\n"
90  << " -o <filename> write to hddm file <filename>"
91  << std::endl;
92 }
93 
94 
95 int main(int argC, char* argV[])
96 {
97  std::ifstream* ifs;
98  XString templFilename;
99  XString outFilename;
100 
101  try
102  {
103  XMLPlatformUtils::Initialize();
104  }
105  catch (const XMLException* toCatch)
106  {
107  std::cerr
108  << "hddm-xml: Error during initialization! :\n"
109  << toCatch->getMessage() << std::endl;
110  return 1;
111  }
112 
113  int argInd;
114  for (argInd = 1; argInd < argC; argInd++)
115  {
116  if (argV[argInd][0] != '-')
117  {
118  break;
119  }
120  else if (strcmp(argV[argInd],"-t") == 0)
121  {
122  templFilename = argV[++argInd];
123  }
124  else if (strcmp(argV[argInd],"-o") == 0)
125  {
126  outFilename = argV[++argInd];
127  }
128  else
129  {
130  usage();
131  return 1;
132  }
133  }
134 
135  if (templFilename.size() == 0)
136  {
137  usage();
138  exit(1);
139  }
140 
141  HDDMmaker builder;
142  if (outFilename.size())
143  {
144  XString fname(outFilename + ".hddm");
145  builder.ofs = new std::ofstream(fname.c_str());
146  }
147  else
148  {
149  builder.ofs = &std::cout;
150  }
151 
152 #if defined OLD_STYLE_XERCES_PARSER
153  DOMDocument* document = parseInputDocument(templFilename.c_str(),true);
154 #else
155  DOMDocument* document = buildDOMDocument(templFilename.c_str(),true);
156 #endif
157  if (document == 0)
158  {
159  std::cerr
160  << "xml-hddm : Error parsing template HDDM document, "
161  << "cannot continue" << std::endl;
162  return 1;
163  }
164 
165  DOMElement* rootEl = document->getDocumentElement();
166  XString rootS(rootEl->getTagName());
167  if (rootS != "HDDM")
168  {
169  std::cerr
170  << "xml-hddm error: root element of input document is "
171  << "\"" << S(rootS) << "\", expected \"HDDM\""
172  << std::endl;
173  exit(1);
174  }
175 
176  builder.constructDocument(rootEl);
177 
178  XString xmlFile;
179  if (argInd == argC)
180  {
181  ifs = new std::ifstream(0);
182  }
183  else if (argInd == argC - 1)
184  {
185  xmlFile = XString(argV[argInd]);
186  ifs = new std::ifstream(xmlFile.c_str());
187  }
188  else
189  {
190  usage();
191  return 1;
192  }
193 
194  if (! ifs->is_open())
195  {
196  std::cerr
197  << "xml-hddm: Error opening input stream "
198  << xmlFile << std::endl;
199  exit(1);
200  }
201 
202  XString xmlHeader;
203  XString docHeader;
204  XString line;
205  if (std::getline(*ifs,line))
206  {
207  if (line.substr(0,5) != "<?xml")
208  {
209  std::cerr
210  << "xml-hddm: Error reading input stream "
211  << xmlFile << std::endl;
212  std::cerr
213  << "Input file does not appear to be an xml document!"
214  << std::endl;
215  exit(1);
216  }
217  }
218  else
219  {
220  std::cerr
221  << "xml-hddm: Error reading from input stream "
222  << xmlFile << std::endl;
223  exit(1);
224  }
225  xmlHeader = line;
226  if (std::getline(*ifs,line) && line.substr(0,5) != "<HDDM")
227  {
228  std::cerr
229  << "xml-hddm: Input document tag is not HDDM!"
230  << std::endl;
231  exit(1);
232  }
233  docHeader = line;
234 
235  xstream::xdr::ostream ofx(*builder.ofs);
236  std::stringstream tmpFileStr;
237  tmpFileStr << "tmp" << getpid();
238  while (getline(*ifs,line))
239  {
240  if (line.size() > 500000)
241  {
242  std::cerr
243  << "xml-hddm: line too long in input document" << std::endl;
244  exit(1);
245  }
246 
247  XString text(line);
248 
249  std::ofstream ofs(tmpFileStr.str().c_str());
250  if (! ofs.is_open())
251  {
252  std::cerr
253  << "xml-hddm: Error opening temp file "
254  << tmpFileStr.str() << std::endl;
255  exit(2);
256  }
257  ofs << xmlHeader << std::endl;
258  ofs << docHeader << std::endl;
259 
260  while (text.size())
261  {
262  XString::size_type start = text.find_first_of("<");
263  if (start == XString::npos)
264  {
265  break;
266  }
267  else if (text.substr(start,2) == "</")
268  {
269  break;
270  }
271  XString::size_type end = text.find_first_of('>');
272  while (end == XString::npos)
273  {
274  if (line.size() > 400000)
275  {
276  std::cerr
277  << "xml-hddm: tag too long in input document" << std::endl;
278  exit(1);
279  }
280  else
281  {
282  std::getline(*ifs,line);
283  text += line;
284  }
285  end = text.find_first_of('>');
286  }
287  if (text.substr(end-1,2) == "/>")
288  {
289  ofs << text.substr(0,end+1) << std::endl;
290  text.erase(0,end+1);
291  }
292  else
293  {
294  XString endTag;
295  endTag = "</" + text.substr(start+1,
296  text.find_first_of(" \t",start)-start-1);
297  while (text.find(endTag) == XString::npos)
298  {
299  ofs << text << std::endl;
300  std::getline(*ifs,text);
301  }
302  ofs << text << std::endl;
303  }
304  ofs << "</HDDM>" << std::endl;
305  ofs.close();
306 
307 #if defined OLD_STYLE_XERCES_PARSER
308  document = parseInputDocument(tmpFile.str().c_str(),false);
309 #else
310  document = buildDOMDocument(tmpFileStr.str().c_str(),false);
311 #endif
312  if (document == 0)
313  {
314  std::cerr
315  << "xml-hddm : Error parsing HDDM document, "
316  << "cannot continue" << std::endl;
317  delete ifs;
318  return 1;
319  }
320  unlink(tmpFileStr.str().c_str());
321 
322  DOMElement* thisEl = document->getDocumentElement();
323 
324  std::ostringstream ofsbuf;
325  xstream::xdr::ostream ofxbuf(ofsbuf);
326  builder.outputStream(thisEl,rootEl,ofsbuf);
327  int size = (int)ofsbuf.tellp();
328  ofx << ((size > 0)? size : 0);
329  if (size > 0)
330  {
331  *builder.ofs << ofsbuf.str();
332  }
333  }
334  }
335 
336  if (builder.ofs != &std::cout) {
337  ((std::ofstream*)builder.ofs)->close();
338  }
339  unlink(tmpFileStr.str().c_str());
340  XMLPlatformUtils::Terminate();
341  return 0;
342 }
343 
344 /* Generate the xml document template in normal form */
345 
346 void HDDMmaker::constructDocument(DOMElement* el)
347 {
348  static int indent = 0;
349  for (int n = 0; n < indent; n++)
350  {
351  *ofs << " ";
352  }
353 
354  XString tagS(el->getTagName());
355  *ofs << "<" << tagS;
356  DOMNamedNodeMap* attrList = el->getAttributes();
357  int attrListLength = attrList->getLength();
358  for (int a = 0; a < attrListLength; a++)
359  {
360  DOMNode* node = attrList->item(a);
361  XString nameS(node->getNodeName());
362  XString valueS(node->getNodeValue());
363  *ofs << " " << nameS << "=\"" << valueS << "\"";
364  }
365 
366  DOMNodeList* contList = el->getChildNodes();
367  int contListLength = contList->getLength();
368  if (contListLength > 0)
369  {
370  *ofs << ">" << std::endl;
371  indent++;
372  for (int c = 0; c < contListLength; c++)
373  {
374  DOMNode* node = contList->item(c);
375  if (node->getNodeType() == DOMNode::ELEMENT_NODE)
376  {
377  DOMElement* contEl = (DOMElement*) node;
378  constructDocument(contEl);
379  }
380  }
381  indent--;
382  for (int n = 0; n < indent; n++)
383  {
384  *ofs << " ";
385  }
386  *ofs << "</" << tagS << ">" << std::endl;
387  }
388  else
389  {
390  *ofs << " />" << std::endl;
391  }
392 }
393 
394 /* Generate the output binary stream according the HDDM template */
395 
396 void HDDMmaker::outputStream(DOMElement* thisEl, DOMElement* modelEl,
397  std::ostream& ofs)
398 {
399  XString modelS(modelEl->getTagName());
400  XString thisS(thisEl->getTagName());
401 
402  DOMNamedNodeMap* modelAttrList = modelEl->getAttributes();
403  int modelAttrListLength = modelAttrList->getLength();
404  DOMNamedNodeMap* thisAttrList = thisEl->getAttributes();
405  int thisAttrListLength = thisAttrList->getLength();
406  XString minS(modelEl->getAttribute(X("minOccurs")));
407  XString maxS(modelEl->getAttribute(X("maxOccurs")));
408  int expectAttrCount = modelAttrList->getLength()
409  - (minS == ""? 0 : 1)
410  - (maxS == ""? 0 : 1);
411  if (thisAttrListLength != expectAttrCount)
412  {
413  std::cerr
414  << "xml-hddm: Inconsistency in input xml document" << std::endl
415  << "tag " << S(thisS) << " in input document with "
416  << thisAttrListLength << " attributes " << std::endl
417  << "matched to tag " << S(modelS) << " in hddm template "
418  << "with " << expectAttrCount << " attributes." << std::endl;
419  exit(1);
420  }
421 
422  xstream::xdr::ostream ofx(ofs);
423  for (int a = 0; a < modelAttrListLength; a++)
424  {
425  XString attrS(modelAttrList->item(a)->getNodeName());
426  XString typeS(modelAttrList->item(a)->getNodeValue());
427  XString valueS(thisEl->getAttribute(X(attrS)));
428  if (attrS == "maxOccurs" || attrS == "minOccurs")
429  {
430  continue;
431  }
432  else if (valueS == "" and typeS != "string")
433  {
434  std::cerr
435  << "xml-hddm: Inconsistency in input xml document" << std::endl
436  << "tag " << S(thisS) << " in input document is missing "
437  << "attribute " << S(attrS) << std::endl;
438  exit(1);
439  }
440  std::stringstream valueStr(valueS);
441  if (typeS == "int")
442  {
443  int32_t val;
444  valueStr >> val;
445  ofx << val;
446  }
447  if (typeS == "long")
448  {
449  int64_t val;
450  valueStr >> val;
451  ofx << val;
452  }
453  else if (typeS == "float")
454  {
455  float val;
456  if (valueS == "nan") {
457  val = NAN;
458  }
459  else if (valueS == "-nan") {
460  val = -NAN;
461  }
462  else if (valueS == "inf") {
463  val = INFINITY;
464  }
465  else if (valueS == "-inf") {
466  val = -INFINITY;
467  }
468  else {
469  valueStr >> val;
470  }
471  ofx << val;
472  }
473  else if (typeS == "double")
474  {
475  double val;
476  if (valueS == "nan") {
477  val = NAN;
478  }
479  else if (valueS == "-nan") {
480  val = -NAN;
481  }
482  else if (valueS == "inf") {
483  val = INFINITY;
484  }
485  else if (valueS == "-inf") {
486  val = -INFINITY;
487  }
488  else {
489  valueStr >> val;
490  }
491  ofx << val;
492  }
493  else if (typeS == "boolean")
494  {
495  int val;
496  valueStr >> val;
497  ofx << val;
498  }
499  else if (typeS == "Particle_t")
500  {
501  int32_t val;
502  for (val = 0; val < 99; val++)
503  {
504  if (valueS == ParticleType((Particle_t)val))
505  {
506  break;
507  }
508  }
509  ofx << val;
510  }
511  else if (typeS == "string" || typeS == "anyURI")
512  {
513  ofx << valueS;
514  }
515  else
516  {
517  // other types are treated as comments
518  }
519  }
520 
521  DOMNodeList* thisList = thisEl->getChildNodes();
522  int thisListLength = thisList->getLength();
523  DOMNodeList* modelList = modelEl->getChildNodes();
524  int modelListLength = modelList->getLength();
525  for (int m = 0; m < modelListLength; m++)
526  {
527  DOMNode* mode = modelList->item(m);
528  short type = mode->getNodeType();
529  if (type == DOMNode::ELEMENT_NODE)
530  {
531  DOMElement* model = (DOMElement*) mode;
532  XString modelS(model->getTagName());
533  /*
534  XString reqS(model->getAttribute(X("minOccurs")));
535  int req = (reqS == "unbounded")? INT_MAX :
536  (reqS == "")? 1 :
537  atoi(S(reqS));
538  */
539  XString repS(model->getAttribute(X("maxOccurs")));
540  int rep = (repS == "unbounded")? INT_MAX :
541  (repS == "")? 1 :
542  atoi(S(repS));
543  int repCount=0;
544  std::stringstream ofsbuf;
545  xstream::xdr::ostream ofxbuf(ofsbuf);
546  for (int i = 0; i < thisListLength; i++)
547  {
548  DOMNode* instance = thisList->item(i);
549  short type = instance->getNodeType();
550  if (type == DOMNode::ELEMENT_NODE)
551  {
552  DOMElement* instanceEl = (DOMElement*) instance;
553  XString nameS(instanceEl->getTagName());
554  if (nameS == modelS)
555  {
556  outputStream(instanceEl,model,ofsbuf);
557  if (repCount++ && (rep == 1))
558  {
559  std::cerr
560  << "xml-hddm: Inconsistency in input xml document"
561  << std::endl
562  << "tag " << S(thisS) << " in input document contains"
563  << " multiple instances of tag " << S(nameS)
564  << std::endl
565  << "but it does not have a maxOccurs=\"*\" attribute "
566  << "in the template." << std::endl;
567  exit(1);
568  }
569  else if (repCount > rep) {
570  std::cerr
571  << "xml-hddm: Inconsistency in input xml document"
572  << std::endl
573  << "tag " << S(nameS) << " in the template has "
574  << "maxOccurs=" << rep << std::endl
575  << "but the input document contains more than "
576  << rep << " instances." << std::endl;
577  exit(1);
578  }
579  }
580  }
581  }
582 
583  int size = (int)ofsbuf.tellp();
584  if (explicit_repeat_count && rep > 1)
585  {
586  ofx << (int32_t)((size > 0)? size+sizeof(int) : sizeof(int))
587  << (int32_t)repCount;
588  }
589  else
590  {
591  ofx << (int32_t)((size > 0)? size : 0);
592  }
593  if (size > 0)
594  {
595  ofs << ofsbuf.str();
596  }
597  }
598  }
599 }
~HDDMmaker()
Definition: xml-hddm.cpp:76
C++ iostream like interface to read and write xdr streams.
void constructDocument(DOMElement *el)
Definition: xml-hddm.cpp:346
#define c
xercesc::DOMDocument * buildDOMDocument(const XString &xmlFile, bool keep)
Definition: XParsers.cpp:142
void usage()
Definition: t_rest.cxx:114
static char * ParticleType(Particle_t p)
Definition: particleType.h:142
Output xdr stream class.
Definition: xdr.h:62
xercesc::DOMDocument * parseInputDocument(const XString &xmlFile, bool keep)
Definition: XParsers.cpp:73
#define S(str)
Definition: xml-hddm.cpp:66
void outputStream(DOMElement *thisEl, DOMElement *modelEl, std::ostream &ofs)
Definition: xml-hddm.cpp:396
std::ostream * ofs
Definition: xml-hddm.cpp:73
#define X(str)
Definition: xml-hddm.cpp:65
int explicit_repeat_count
Definition: hddm-root.cpp:43
char text[100]
int main(int argc, char *argv[])
Definition: gendoc.cc:6
Particle_t
Definition: particleType.h:12