libdap++  Updated for version 3.12.0
D4ParserSax2.cc
Go to the documentation of this file.
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 #define DODS_DEBUG 1
28 #define DODS_DEBUG2 1
29 
30 #include <iostream>
31 #include <sstream>
32 
33 #include <cstring>
34 #include <cstdarg>
35 
36 #include "BaseType.h"
37 #include "Byte.h"
38 #include "Int8.h"
39 #include "Int16.h"
40 #include "UInt16.h"
41 #include "Int32.h"
42 #include "UInt32.h"
43 #include "Int64.h"
44 #include "UInt64.h"
45 
46 #include "Float32.h"
47 #include "Float64.h"
48 
49 #include "Str.h"
50 #include "Url.h"
51 
52 #include "Constructor.h"
53 
54 #include "D4Group.h"
55 
56 #include "Array.h"
57 #include "Structure.h"
58 #include "Sequence.h"
59 #include "Grid.h"
60 
61 #include "D4ParserSax2.h"
62 
63 #include "util.h"
64 // #include "mime_util.h"
65 #include "debug.h"
66 
67 namespace libdap {
68 
69 static const not_used char *states[] = {
70  "parser_start",
71 
72  // inside_group is the state just after parsing the start of a Group
73  // element.
74  "inside_group",
75 
76  "inside_attribute_container",
77  "inside_attribute",
78  "inside_attribute_value",
79  "inside_other_xml_attribute",
80 
81  "inside_enum_def",
82  "inside_enum_const",
83 
84  // This covers Byte, ..., Url, Opaque
85  "inside_simple_type",
86 
87  "inside_array",
88  "inside_dimension",
89 
90  "inside_grid",
91  "inside_map",
92 
93  "inside_structure",
94  "inside_sequence",
95 
96  "parser_unknown",
97  "parser_error" };
98 
99 // Glue the BaseTypeFactory to the enum-based factory defined statically
100 // here.
101 
102 BaseType *D4ParserSax2::factory(Type t, const string & name)
103 {
104  switch (t) {
105  case dods_byte_c:
106  return d_factory->NewByte(name);
107  case dods_uint8_c:
108  return d_factory->NewUInt8(name);
109  case dods_int8_c:
110  return d_factory->NewInt8(name);
111  case dods_int16_c:
112  return d_factory->NewInt16(name);
113  case dods_uint16_c:
114  return d_factory->NewUInt16(name);
115  case dods_int32_c:
116  return d_factory->NewInt32(name);
117  case dods_uint32_c:
118  return d_factory->NewUInt32(name);
119  case dods_int64_c:
120  return d_factory->NewInt64(name);
121  case dods_uint64_c:
122  return d_factory->NewUInt64(name);
123  case dods_float32_c:
124  return d_factory->NewFloat32(name);
125  case dods_float64_c:
126  return d_factory->NewFloat64(name);
127  case dods_str_c:
128  return d_factory->NewStr(name);
129  case dods_url_c:
130  return d_factory->NewUrl(name);
131  case dods_url4_c:
132  return d_factory->NewURL(name);
133  case dods_array_c:
134  return d_factory->NewArray(name);
135  case dods_structure_c:
136  return d_factory->NewStructure(name);
137  case dods_sequence_c:
138  return d_factory->NewSequence(name);
139  case dods_grid_c:
140  return d_factory->NewGrid(name);
141  case dods_group_c:
142  return d_factory->NewGroup(name);
143  default:
144  return 0;
145  }
146 }
147 
148 static bool is_valid_enum_value(const Type &t, long long value)
149 {
150  switch (t) {
151  case dods_int8_c:
152  return (value >= DODS_SCHAR_MIN && value <= DODS_SCHAR_MAX);
153  case dods_byte_c:
154  case dods_uint8_c:
155  return (value >= 0 && static_cast<unsigned long long>(value) <= DODS_UCHAR_MAX);
156  case dods_int16_c:
157  return (value >= DODS_SHRT_MIN && value <= DODS_SHRT_MAX);
158  case dods_uint16_c:
159  return (value >= 0 && static_cast<unsigned long long>(value) <= DODS_USHRT_MAX);
160  case dods_int32_c:
161  return (value >= DODS_INT_MIN && value <= DODS_INT_MAX);
162  case dods_uint32_c:
163  return (value >= 0 && static_cast<unsigned long long>(value) <= DODS_UINT_MAX);
164  case dods_int64_c:
165  return (value >= DODS_LLONG_MIN && value <= DODS_LLONG_MAX);
166  case dods_uint64_c:
167  return (value >= 0 && static_cast<unsigned long long>(value) <= DODS_ULLONG_MAX);
168  default:
169  return false;
170  }
171 }
172 
173 static bool is_not(const char *name, const char *tag)
174 {
175  return strcmp(name, tag) != 0;
176 }
177 
178 void D4ParserSax2::set_state(D4ParserSax2::ParseState state)
179 {
180  s.push(state);
181 }
182 
183 D4ParserSax2::ParseState D4ParserSax2::get_state() const
184 {
185  return s.top();
186 }
187 
188 void D4ParserSax2::pop_state()
189 {
190  s.pop();
191 }
192 
197 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
198 {
199  if (!xml_attrs.empty())
200  xml_attrs.clear(); // erase old attributes
201 
202  unsigned int index = 0;
203  for (int i = 0; i < nb_attributes; ++i, index += 5) {
204  // Make a value using the attribute name and the prefix, namespace URI
205  // and the value. The prefix might be null.
206  xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *) attributes[index]),
207  XMLAttribute(attributes + index + 1)));
208 
209  DBG(cerr << "Attribute '" << (const char *)attributes[index] << "': "
210  << xml_attrs[(const char *)attributes[index]].value << endl);
211  }
212 }
213 
214 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
215 {
216  for (int i = 0; i < nb_namespaces; ++i) {
217  // make a value with the prefix and namespace URI. The prefix might be
218  // null.
219  namespace_table.insert(
220  map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *) namespaces[i * 2] : "",
221  (const char *) namespaces[i * 2 + 1]));
222  }
223 }
224 
229 bool D4ParserSax2::check_required_attribute(const string & attr)
230 {
231  map<string, XMLAttribute>::iterator i = xml_attrs.find(attr);
232  if (i == xml_attrs.end())
233  ddx_fatal_error(this, "Required attribute '%s' not found.", attr.c_str());
234  return true;
235 }
236 
242 bool D4ParserSax2::check_attribute(const string & attr)
243 {
244  return (xml_attrs.find(attr) != xml_attrs.end());
245 }
246 
255 void D4ParserSax2::process_attribute_helper(const xmlChar **attrs, int nb_attributes)
256 {
257  // These methods set the state to parser_error if a problem is found.
258  transfer_xml_attrs(attrs, nb_attributes);
259 
260  bool error = !(check_required_attribute(string("name")) && check_required_attribute(string("type")));
261  if (error)
262  return;
263 
264  if (xml_attrs["type"].value == "Container") {
265  set_state(inside_attribute_container);
266 
267  AttrTable *child;
268  AttrTable *parent = at_stack.top();
269 
270  child = parent->append_container(xml_attrs["name"].value);
271  at_stack.push(child); // save.
272  DBG2(cerr << "Pushing at" << endl);
273  }
274  else if (xml_attrs["type"].value == "OtherXML") {
275  set_state(inside_other_xml_attribute);
276 
277  dods_attr_name = xml_attrs["name"].value;
278  dods_attr_type = xml_attrs["type"].value;
279  }
280  else {
281  set_state(inside_attribute);
282 
283  dods_attr_name = xml_attrs["name"].value;
284  dods_attr_type = xml_attrs["type"].value;
285  }
286 }
287 
292 void D4ParserSax2::process_enum_def_helper(const xmlChar **attrs, int nb_attributes)
293 {
294  // These methods set the state to parser_error if a problem is found.
295  transfer_xml_attrs(attrs, nb_attributes);
296 
297  bool error = !(check_required_attribute(string("name"))
298  && check_required_attribute(string("basetype")));
299  if (error)
300  return;
301 
302  Type t = get_type(xml_attrs["basetype"].value.c_str());
303  if (!is_integer_type(t)) {
304  ddx_fatal_error(this, "Error: The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
305  xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
306  // So that the parse can continue, make the type UInt64
307  t = dods_uint64_c;
308  }
309 
310  d_enum_def = new D4EnumDef(xml_attrs["name"].value, t);
311 
312  set_state(inside_enum_def);
313 }
314 
319 void D4ParserSax2::process_enum_const_helper(const xmlChar **attrs, int nb_attributes)
320 {
321  // These methods set the state to parser_error if a problem is found.
322  transfer_xml_attrs(attrs, nb_attributes);
323 
324  bool error = !(check_required_attribute(string("name"))
325  && check_required_attribute(string("value")));
326  if (error)
327  return;
328 
329  istringstream iss(xml_attrs["value"].value);
330  long long value = 0;
331  iss >> skipws >> value;
332  if (iss.fail() || iss.bad()) {
333  ddx_fatal_error(this, "Error: Expected an integer value for an Enumeration constant, got '%s' instead.",
334  xml_attrs["value"].value.c_str());
335  }
336  else if (!is_valid_enum_value(d_enum_def->get_type(), value))
337  ddx_fatal_error(this, "Error: In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
338  xml_attrs["value"].value.c_str(), type_name(d_enum_def->get_type()).c_str());
339 
340  else {
341  d_enum_def->add_value(xml_attrs["name"].value, value);
342  }
343 
344  set_state(inside_enum_const);
345 }
346 
350 void D4ParserSax2::process_dimension(const xmlChar **attrs, int nb_attributes)
351 {
352  transfer_xml_attrs(attrs, nb_attributes);
353  if (check_required_attribute(string("size"))) {
354  set_state(inside_dimension);
355  Array *ap = dynamic_cast<Array *>(bt_stack.top());
356  if (!ap) {
357  ddx_fatal_error(this, "Parse error: Expected an array variable.");
358  return;
359  }
360 
361  ap->append_dim(atoi(xml_attrs["size"].value.c_str()), xml_attrs["name"].value);
362  }
363 }
364 
371 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
372 {
373  if (strcmp(name, "Attribute") == 0) {
374  process_attribute_helper(attrs, nb_attributes);
375  // next state: inside_attribtue or inside_attribute_container
376  return true;
377  }
378 
379  return false;
380 }
381 
387 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
388 {
389  if (strcmp(name, "Enumeration") == 0) {
390  process_enum_def_helper(attrs, nb_attributes);
391  // next state: inside_enum_def
392  return true;
393  }
394 
395  return false;
396 }
397 
398 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
399 {
400  if (strcmp(name, "EnumConst") == 0) {
401  process_enum_const_helper(attrs, nb_attributes);
402  // next state: inside_enum_const
403  return true;
404  }
405 
406  return false;
407 }
408 
414 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
415 {
416  Type t = get_type(name);
417  if (is_simple_type(t)) {
418  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
419  return true;
420  }
421  else if (strcmp(name, "Structure") == 0) {
422  process_variable_helper(dods_structure_c, inside_structure, attrs, nb_attributes);
423  return true;
424  }
425  else if (strcmp(name, "Sequence") == 0) {
426  process_variable_helper(dods_sequence_c, inside_sequence, attrs, nb_attributes);
427  return true;
428  }
429 
430  return false;
431 }
432 
440 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
441 {
442  transfer_xml_attrs(attrs, nb_attributes);
443 
444  set_state(s);
445 
446  // TODO Arrays in DAP2 were not required to have names. DAP4 is going to
447  // need a different parsing logic, but we'll come to that...
448  if (check_required_attribute("name")) { // throws on error/false
449  BaseType *btp = factory(t, xml_attrs["name"].value);
450  if (!btp)
451  ddx_fatal_error(this, "Internal parser error; could not instantiate the variable '%s'.",
452  xml_attrs["name"].value.c_str());
453 
454  // Once we make the new variable, we not only load it on to the
455  // BaseType stack, we also load its AttrTable on the AttrTable stack.
456  // The attribute processing software always operates on the AttrTable
457  // at the top of the AttrTable stack (at_stack).
458  bt_stack.push(btp);
459  at_stack.push(&btp->get_attr_table());
460  }
461 }
462 
463 void D4ParserSax2::finish_variable(const char *tag, Type t, const char *expected)
464 {
465  if (strcmp(tag, expected) != 0) {
466  D4ParserSax2::ddx_fatal_error(this, "Expected an end tag for a %s; found '%s' instead.", expected, tag);
467  return;
468  }
469 
470  pop_state();
471 
472  BaseType *btp = bt_stack.top();
473 
474  bt_stack.pop();
475  at_stack.pop();
476 
477  if (btp->type() != t) {
478  D4ParserSax2::ddx_fatal_error(this, "Internal error: Expected a %s variable.", expected);
479  return;
480  }
481  // Once libxml2 validates, this can go away. 05/30/03 jhrg
482  if (t == dods_array_c && dynamic_cast<Array *>(btp)->dimensions() == 0) {
483  D4ParserSax2::ddx_fatal_error(this, "No dimension element included in the Array '%s'.", btp->name().c_str());
484  return;
485  }
486 
487  BaseType *parent = bt_stack.top();
488 
489  if (!(parent->is_vector_type() || parent->is_constructor_type())) {
490  D4ParserSax2::ddx_fatal_error(this, "Tried to add the array variable '%s' to a non-constructor type (%s %s).",
491  tag, bt_stack.top()->type_name().c_str(), bt_stack.top()->name().c_str());
492  return;
493  }
494 
495  parent->add_var(btp);
496 }
497 
509 {
510  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
511  parser->error_msg = "";
512  parser->char_data = "";
513 
514  parser->set_state(parser_start);
515 
516  DBG2(cerr << "Parser state: " << states[parser->get_state()] << endl);
517 }
518 
522 {
523  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
524 
525  DBG2(cerr << "Ending state == " << states[parser->get_state()] << endl);
526 
527  if (parser->get_state() != parser_start)
528  D4ParserSax2::ddx_fatal_error(parser, "The document contained unbalanced tags.");
529 
530  // If we've found any sort of error, don't make the DMR; intern() will
531  // take care of the error.
532  if (parser->get_state() == parser_error)
533  return;
534 
535  // TODO Decide to remove the outer Group or leave it in place and
536  // modify print_xml_writer() so that the DMR is correct.
537 
538  parser->dds->add_var_nocopy(parser->bt_stack.top());
539  parser->bt_stack.pop();
540 }
541 
542 void D4ParserSax2::ddx_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
543  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
544  const xmlChar **attributes)
545 {
546  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
547  const char *localname = (const char *) l;
548 
549  DBG2(cerr << "start element: " << localname << ", state: " << states[parser->get_state()] << endl);
550 
551  switch (parser->get_state()) {
552  case parser_start:
553  if (strcmp(localname, "Group") == 0) {
554 
555  parser->set_state(inside_group);
556 
557  parser->root_ns = URI ? (const char *) URI : "";
558  parser->transfer_xml_attrs(attributes, nb_attributes);
559 
560  // Set values in/for the DDS
561  if (parser->check_required_attribute(string("name")))
562  parser->dds->set_dataset_name(parser->xml_attrs["name"].value);
563 
564  if (parser->check_attribute("dapVersion"))
565  parser->dds->set_dap_version(parser->xml_attrs["dapVersion"].value);
566 
567  // FIXME no way to record DMR version information
568 #if 0
569  if (parser->check_attribute("dmrVersion"))
570  parser->dds->set_dap_version(parser->xml_attrs["dmrVersion"].value);
571 #endif
572  if (parser->check_attribute("base"))
573  parser->dds->set_request_xml_base(parser->xml_attrs["base"].value);
574 
575  if (!parser->root_ns.empty())
576  parser->dds->set_namespace(parser->root_ns);
577 
578  // Set name of the Group; push on stack
579  BaseType *btp = parser->factory(dods_group_c, "root");
580  parser->bt_stack.push(btp);
581  parser->at_stack.push(&btp->get_attr_table());
582  }
583  else
585  "Expected DMR to start with a Group element; found '%s' instead.", localname);
586  break;
587 
588  // The state 'inside_group' means we have just parsed the start
589  // of a Group element, but none of the child elements
590  case inside_group:
591  // TODO Add Dimension and Group
592  if (parser->process_attribute(localname, attributes, nb_attributes))
593  break;
594  else if (parser->process_variable(localname, attributes, nb_attributes))
595  break;
596  else if (parser->process_enum_def(localname, attributes, nb_attributes))
597  break;
598  else
600  "Expected an Attribute, or variable element; found '%s' instead.", localname);
601  break;
602 
603  case inside_attribute_container:
604  if (parser->process_attribute(localname, attributes, nb_attributes))
605  break;
606  else
607  D4ParserSax2::ddx_fatal_error(parser, "Expected an Attribute or Alias element; found '%s' instead.",
608  localname);
609  break;
610 
611  case inside_attribute:
612  if (parser->process_attribute(localname, attributes, nb_attributes))
613  break;
614  else if (strcmp(localname, "value") == 0)
615  parser->set_state(inside_attribute_value);
616  else
617  ddx_fatal_error(parser, "Expected an 'Attribute', 'Alias' or 'value' element; found '%s' instead.",
618  localname);
619  break;
620 
621  case inside_attribute_value:
622  // FIXME
623  break;
624 
625  case inside_other_xml_attribute:
626  DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname << endl);
627 
628  parser->other_xml_depth++;
629 
630  // Accumulate the elements here
631 
632  parser->other_xml.append("<");
633  if (prefix) {
634  parser->other_xml.append((const char *) prefix);
635  parser->other_xml.append(":");
636  }
637  parser->other_xml.append(localname);
638 
639  if (nb_namespaces != 0) {
640  parser->transfer_xml_ns(namespaces, nb_namespaces);
641 
642  for (map<string, string>::iterator i = parser->namespace_table.begin();
643  i != parser->namespace_table.end(); ++i) {
644  parser->other_xml.append(" xmlns");
645  if (!i->first.empty()) {
646  parser->other_xml.append(":");
647  parser->other_xml.append(i->first);
648  }
649  parser->other_xml.append("=\"");
650  parser->other_xml.append(i->second);
651  parser->other_xml.append("\"");
652  }
653  }
654 
655  if (nb_attributes != 0) {
656  parser->transfer_xml_attrs(attributes, nb_attributes);
657  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
658  parser->other_xml.append(" ");
659  if (!i->second.prefix.empty()) {
660  parser->other_xml.append(i->second.prefix);
661  parser->other_xml.append(":");
662  }
663  parser->other_xml.append(i->first);
664  parser->other_xml.append("=\"");
665  parser->other_xml.append(i->second.value);
666  parser->other_xml.append("\"");
667  }
668  }
669 
670  parser->other_xml.append(">");
671  break;
672 
673  case inside_enum_def:
674  // process an EnumConst element
675  if (parser->process_enum_const(localname, attributes, nb_attributes))
676  break;
677  else
678  ddx_fatal_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
679  break;
680 
681  case inside_enum_const:
682  // Nothing to do; this element has no content
683  break;
684 
685  case inside_simple_type:
686  if (parser->process_attribute(localname, attributes, nb_attributes))
687  break;
688  else
689  ddx_fatal_error(parser, "Expected an 'Attribute' or 'Alias' element; found '%s' instead.", localname);
690  break;
691 
692  case inside_array:
693  if (parser->process_attribute(localname, attributes, nb_attributes))
694  break;
695  else if (is_not(localname, "Array") && parser->process_variable(localname, attributes, nb_attributes))
696  break;
697  else if (strcmp(localname, "dimension") == 0) {
698  parser->process_dimension(attributes, nb_attributes);
699  // next state: inside_dimension
700  }
701  else
702  ddx_fatal_error(parser, "Expected an 'Attribute' or 'Alias' element; found '%s' instead.", localname);
703  break;
704 
705  case inside_dimension:
706  ddx_fatal_error(parser,
707  "Internal parser error; unexpected state, inside dimension while processing element '%s'.",
708  localname);
709  break;
710 
711  case inside_structure:
712  if (parser->process_attribute(localname, attributes, nb_attributes))
713  break;
714  else if (parser->process_variable(localname, attributes, nb_attributes))
715  break;
716  else
718  "Expected an Attribute, Alias or variable element; found '%s' instead.", localname);
719  break;
720 
721  case inside_sequence:
722  if (parser->process_attribute(localname, attributes, nb_attributes))
723  break;
724  else if (parser->process_variable(localname, attributes, nb_attributes))
725  break;
726  else
728  "Expected an Attribute, Alias or variable element; found '%s' instead.", localname);
729  break;
730 
731  case inside_grid:
732  if (parser->process_attribute(localname, attributes, nb_attributes))
733  break;
734  else if (strcmp(localname, "Array") == 0)
735  parser->process_variable_helper(dods_array_c, inside_array, attributes, nb_attributes);
736  else if (strcmp(localname, "Map") == 0)
737  parser->process_variable_helper(dods_array_c, inside_map, attributes, nb_attributes);
738  else
740  "Expected an Attribute, Alias or variable element; found '%s' instead.", localname);
741  break;
742 
743  case inside_map:
744  if (parser->process_attribute(localname, attributes, nb_attributes))
745  break;
746  else if (is_not(localname, "Array") && is_not(localname, "Sequence") && is_not(localname, "Grid")
747  && parser->process_variable(localname, attributes, nb_attributes))
748  break;
749  else if (strcmp(localname, "dimension") == 0) {
750  parser->process_dimension(attributes, nb_attributes);
751  // next state: inside_dimension
752  }
753  else
754  ddx_fatal_error(parser,
755  "Expected an 'Attribute', 'Alias', variable or 'dimension' element; found '%s' instead.",
756  localname);
757  break;
758 
759  case parser_unknown:
760  // *** Never used? If so remove/error
761  parser->set_state(parser_unknown);
762  break;
763 
764  case parser_error:
765  break;
766  }
767 
768  DBGN(cerr << " ... " << states[parser->get_state()] << endl);
769 }
770 
771 void D4ParserSax2::ddx_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
772 {
773  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
774  const char *localname = (const char *) l;
775 
776  DBG2(cerr << "End element " << localname << " (state "
777  << states[parser->get_state()] << ")" << endl);
778 
779  switch (parser->get_state()) {
780  case parser_start:
781  ddx_fatal_error(parser,
782  "Internal parser error; unexpected state, inside start state while processing element '%s'.",
783  localname);
784  break;
785 
786  case inside_group:
787  if (strcmp(localname, "Group") == 0)
788  parser->pop_state();
789  else
790  D4ParserSax2::ddx_fatal_error(parser, "Expected an end Group tag; found '%s' instead.", localname);
791  break;
792 
793  case inside_attribute_container:
794  if (strcmp(localname, "Attribute") == 0) {
795  parser->pop_state();
796  parser->at_stack.pop(); // pop when leaving a container.
797  }
798  else
799  D4ParserSax2::ddx_fatal_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
800  break;
801 
802  case inside_attribute:
803  if (strcmp(localname, "Attribute") == 0)
804  parser->pop_state();
805  else
806  D4ParserSax2::ddx_fatal_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
807  break;
808 
809  case inside_attribute_value:
810  if (strcmp(localname, "value") == 0) {
811  parser->pop_state();
812  AttrTable *atp = parser->at_stack.top();
813  atp->append_attr(parser->dods_attr_name, parser->dods_attr_type, parser->char_data);
814  parser->char_data = ""; // Null this after use.
815  }
816  else
817  D4ParserSax2::ddx_fatal_error(parser, "Expected an end value tag; found '%s' instead.", localname);
818 
819  break;
820 
821  case inside_other_xml_attribute: {
822  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
823 
824  DBGN(cerr << endl << "\t Popping the 'inside_other_xml_attribute' state"
825  << endl);
826 
827  parser->pop_state();
828 
829  AttrTable *atp = parser->at_stack.top();
830  atp->append_attr(parser->dods_attr_name, parser->dods_attr_type, parser->other_xml);
831 
832  parser->other_xml = ""; // Null this after use.
833  }
834  else {
835  DBGN(cerr << endl << "\t inside_other_xml_attribute: " << localname
836  << ", depth: " << parser->other_xml_depth << endl);
837  if (parser->other_xml_depth == 0)
839  "Expected an OtherXML attribute to end! Instead I found '%s'", localname);
840  parser->other_xml_depth--;
841 
842  parser->other_xml.append("</");
843  if (prefix) {
844  parser->other_xml.append((const char *) prefix);
845  parser->other_xml.append(":");
846  }
847  parser->other_xml.append(localname);
848  parser->other_xml.append(">");
849  }
850  break;
851  }
852 
853  case inside_enum_def:
854  if (strcmp(localname, "Enumeration") == 0) {
855  BaseType *btp = parser->bt_stack.top();
856  if (!btp || btp->type() != dods_group_c)
857  D4ParserSax2::ddx_fatal_error(parser, "Expected a Group to be the current item, while finishing up an %s.", localname);
858  else {
859  D4Group *g = static_cast<D4Group*>(btp);
860  g->add_enumeration_nocopy(parser->d_enum_def);
861  parser->pop_state();
862  }
863  }
864  else {
865  D4ParserSax2::ddx_fatal_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
866  }
867  break;
868 
869  case inside_enum_const:
870  if (strcmp(localname, "EnumConst") == 0)
871  parser->pop_state();
872  else
873  D4ParserSax2::ddx_fatal_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
874  break;
875 
876  case inside_simple_type:
877  if (is_simple_type(get_type(localname))) {
878  parser->pop_state();
879  BaseType *btp = parser->bt_stack.top();
880  parser->bt_stack.pop();
881  parser->at_stack.pop();
882 
883  BaseType *parent = parser->bt_stack.top();
884 
885  // NB: This works because we seed the stack with a dummy
886  // structure instance at the start of the parse. When the
887  // parse is complete the variables in that structure will
888  // be transferred to the DDS. Or maybe someday we will really
889  // use a Structure instance in DDS...
890  if (parent->is_vector_type() || parent->is_constructor_type())
891  parent->add_var(btp);
892  else
894  "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).", localname,
895  parser->bt_stack.top()->type_name().c_str(), parser->bt_stack.top()->name().c_str());
896  }
897  else
898  D4ParserSax2::ddx_fatal_error(parser, "Expected an end tag for a simple type; found '%s' instead.",
899  localname);
900  break;
901 
902  case inside_array:
903  parser->finish_variable(localname, dods_array_c, "Array");
904  break;
905 
906  case inside_dimension:
907  if (strcmp(localname, "dimension") == 0)
908  parser->pop_state();
909  else
910  D4ParserSax2::ddx_fatal_error(parser, "Expected an end dimension tag; found '%s' instead.", localname);
911  break;
912 
913  case inside_structure:
914  parser->finish_variable(localname, dods_structure_c, "Structure");
915  break;
916 
917  case inside_sequence:
918  parser->finish_variable(localname, dods_sequence_c, "Sequence");
919  break;
920 
921  case inside_grid:
922  parser->finish_variable(localname, dods_grid_c, "Grid");
923  break;
924 
925  case inside_map:
926  parser->finish_variable(localname, dods_array_c, "Map");
927  break;
928 
929  case parser_unknown:
930  parser->pop_state();
931  break;
932 
933  case parser_error:
934  break;
935  }
936 
937  DBGN(cerr << " ... " << states[parser->get_state()] << endl);
938 }
939 
943 void D4ParserSax2::ddx_get_characters(void * p, const xmlChar * ch, int len)
944 {
945  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
946 
947  switch (parser->get_state()) {
948  case inside_attribute_value:
949  parser->char_data.append((const char *) (ch), len);
950  DBG2(cerr << "Characters: '" << parser->char_data << "'" << endl);
951  break;
952 
953  case inside_other_xml_attribute:
954  parser->other_xml.append((const char *) (ch), len);
955  DBG2(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
956  break;
957 
958  default:
959  break;
960  }
961 }
962 
967 void D4ParserSax2::ddx_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
968 {
969  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
970 
971  switch (parser->get_state()) {
972  case inside_other_xml_attribute:
973  parser->other_xml.append((const char *) (ch), len);
974  break;
975 
976  default:
977  break;
978  }
979 }
980 
986 void D4ParserSax2::ddx_get_cdata(void *p, const xmlChar *value, int len)
987 {
988  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
989 
990  switch (parser->get_state()) {
991  case inside_other_xml_attribute:
992  parser->other_xml.append((const char *) (value), len);
993  break;
994 
995  case parser_unknown:
996  break;
997 
998  default:
999  D4ParserSax2::ddx_fatal_error(parser, "Found a CData block but none are allowed by DAP.");
1000 
1001  break;
1002  }
1003 }
1004 
1009 xmlEntityPtr D4ParserSax2::ddx_get_entity(void *, const xmlChar * name)
1010 {
1011  return xmlGetPredefinedEntity(name);
1012 }
1013 
1024 void D4ParserSax2::ddx_fatal_error(void * p, const char *msg, ...)
1025 {
1026  va_list args;
1027  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1028 
1029  parser->set_state(parser_error);
1030 
1031  va_start(args, msg);
1032  char str[1024];
1033  vsnprintf(str, 1024, msg, args);
1034  va_end(args);
1035 
1036  int line = xmlSAX2GetLineNumber(parser->ctxt);
1037 
1038  parser->error_msg += "At line " + long_to_string(line) + ": ";
1039  parser->error_msg += string(str) + string("\n");
1040 }
1041 
1043 
1044 void D4ParserSax2::cleanup_parse(xmlParserCtxtPtr & context) const
1045 {
1046  if (!context->wellFormed) {
1047  context->sax = NULL;
1048  xmlFreeParserCtxt(context);
1049  throw D4ParseError(string("\nThe DDX is not a well formed XML document.\n") + error_msg);
1050  }
1051 
1052  if (!context->valid) {
1053  context->sax = NULL;
1054  xmlFreeParserCtxt(context);
1055  throw D4ParseError(string("\nThe DDX is not a valid document.\n") + error_msg);
1056  }
1057 
1058  if (get_state() == parser_error) {
1059  context->sax = NULL;
1060  xmlFreeParserCtxt(context);
1061  throw D4ParseError(string("\nError parsing DMR response.\n") + error_msg);
1062  }
1063 
1064  context->sax = NULL;
1065  xmlFreeParserCtxt(context);
1066 }
1067 
1084 void D4ParserSax2::intern(istream &f, DDS *dest_dds)
1085 {
1086  // Code example from libxml2 docs re: read from a stream.
1087 
1088  if (!f.good())
1089  throw InternalErr(__FILE__, __LINE__, "Input stream not open or read error");
1090 
1091  const int size = 1024;
1092  char chars[size];
1093 
1094  f.getline(chars, size);
1095  int res = f.gcount();
1096  if (res > 0) {
1097 
1098  DBG(cerr << "line: (" << res << "): " << chars << endl);
1099  xmlParserCtxtPtr context = xmlCreatePushParserCtxt(NULL, NULL, chars, res - 1, "stream");
1100 
1101  ctxt = context; // need ctxt for error messages
1102  dds = dest_dds; // dump values here
1103  d_factory = dynamic_cast<D4BaseTypeFactory*>(dds->get_factory());
1104  if (!d_factory)
1105  throw InternalErr(__FILE__, __LINE__, "Invalid factory class");
1106 
1107  xmlSAXHandler ddx_sax_parser;
1108  memset(&ddx_sax_parser, 0, sizeof(xmlSAXHandler));
1109 
1110  ddx_sax_parser.getEntity = &D4ParserSax2::ddx_get_entity;
1111  ddx_sax_parser.startDocument = &D4ParserSax2::ddx_start_document;
1112  ddx_sax_parser.endDocument = &D4ParserSax2::ddx_end_document;
1113  ddx_sax_parser.characters = &D4ParserSax2::ddx_get_characters;
1114  ddx_sax_parser.ignorableWhitespace = &D4ParserSax2::ddx_ignoreable_whitespace;
1115  ddx_sax_parser.cdataBlock = &D4ParserSax2::ddx_get_cdata;
1116  ddx_sax_parser.warning = &D4ParserSax2::ddx_fatal_error;
1117  ddx_sax_parser.error = &D4ParserSax2::ddx_fatal_error;
1118  ddx_sax_parser.fatalError = &D4ParserSax2::ddx_fatal_error;
1119  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1120  ddx_sax_parser.startElementNs = &D4ParserSax2::ddx_start_element;
1121  ddx_sax_parser.endElementNs = &D4ParserSax2::ddx_end_element;
1122 
1123  context->sax = &ddx_sax_parser;
1124  context->userData = this;
1125  context->validate = true;
1126 
1127  f.getline(chars, size);
1128  while ((f.gcount() > 0)) {
1129  DBG(cerr << "line: (" << f.gcount() << "): " << chars << endl);
1130  xmlParseChunk(ctxt, chars, f.gcount() - 1, 0);
1131  f.getline(chars, size);
1132  }
1133  // This call ends the parse: The fourth argument of xmlParseChunk is
1134  // the bool 'terminate.'
1135  xmlParseChunk(ctxt, chars, 0, 1);
1136 
1137  cleanup_parse(context);
1138  }
1139 }
1140 
1152 void D4ParserSax2::intern(const string &document, DDS *dest_dds)
1153 {
1154  istringstream iss(document);
1155  intern(iss, dest_dds);
1156 #if 0
1157  // Create the context pointer explicitly so that we can store a pointer
1158  // to it in the DDXParserDAP4 instance. This provides a way to generate our
1159  // own error messages *with* line numbers. The messages are pretty
1160  // meaningless otherwise. This means that we use an interface from the
1161  // 'parser internals' header, and not the 'parser' header. However, this
1162  // interface is also used in one of the documented examples, so it's
1163  // probably pretty stable. 06/02/03 jhrg
1164  xmlParserCtxtPtr context = xmlCreateFileParserCtxt(document.c_str());
1165  if (!context)
1166  throw D4ParseError(string("Could not initialize the parser with the file: '") + document + string("'."));
1167 
1168  dds = dest_dds; // dump values here
1169  ctxt = context; // need ctxt for error messages
1170 
1171  xmlSAXHandler ddx_sax_parser;
1172  memset(&ddx_sax_parser, 0, sizeof(xmlSAXHandler));
1173 
1174  ddx_sax_parser.getEntity = &D4ParserSax2::ddx_get_entity;
1175  ddx_sax_parser.startDocument = &D4ParserSax2::ddx_start_document;
1176  ddx_sax_parser.endDocument = &D4ParserSax2::ddx_end_document;
1177  ddx_sax_parser.characters = &D4ParserSax2::ddx_get_characters;
1178  ddx_sax_parser.ignorableWhitespace = &D4ParserSax2::ddx_ignoreable_whitespace;
1179  ddx_sax_parser.cdataBlock = &D4ParserSax2::ddx_get_cdata;
1180  ddx_sax_parser.warning = &D4ParserSax2::ddx_fatal_error;
1181  ddx_sax_parser.error = &D4ParserSax2::ddx_fatal_error;
1182  ddx_sax_parser.fatalError = &D4ParserSax2::ddx_fatal_error;
1183  ddx_sax_parser.initialized = XML_SAX2_MAGIC;
1184  ddx_sax_parser.startElementNs = &D4ParserSax2::ddx_start_element;
1185  ddx_sax_parser.endElementNs = &D4ParserSax2::ddx_end_element;
1186 
1187  context->sax = &ddx_sax_parser;
1188  context->userData = this;
1189  context->validate = false;
1190 
1191  xmlParseDocument(context);
1192 
1193  cleanup_parse(context);
1194 #endif
1195 }
1196 
1197 } // namespace libdap