8 #ifndef ORCUS_SAX_PARSER_HPP 9 #define ORCUS_SAX_PARSER_HPP 11 #include "sax_parser_base.hpp" 29 template<
typename _Handler,
typename _Config = sax_parser_default_config>
33 typedef _Handler handler_type;
34 typedef _Config config_type;
36 sax_parser(
const char* content,
const size_t size, handler_type& handler);
50 void element_open(
const char* begin_pos);
51 void element_close(
const char* begin_pos);
53 void declaration(
const char* name_check);
60 handler_type& m_handler;
63 template<
typename _Handler,
typename _Config>
65 const char* content,
const size_t size, handler_type& handler) :
71 template<
typename _Handler,
typename _Config>
72 sax_parser<_Handler,_Config>::~sax_parser()
76 template<
typename _Handler,
typename _Config>
77 void sax_parser<_Handler,_Config>::parse()
85 assert(m_buffer_pos == 0);
88 template<
typename _Handler,
typename _Config>
89 void sax_parser<_Handler,_Config>::header()
94 if (!has_char() || cur_char() !=
'<')
95 throw sax::malformed_xml_error(
"xml file must begin with '<'.", offset());
97 if (config_type::baseline_version >= 11)
101 if (next_char_checked() !=
'?')
102 throw sax::malformed_xml_error(
"xml file must begin with '<?'.", offset());
108 template<
typename _Handler,
typename _Config>
109 void sax_parser<_Handler,_Config>::body()
113 if (cur_char() ==
'<')
116 if (!m_root_elem_open)
120 else if (m_nest_level)
128 template<
typename _Handler,
typename _Config>
129 void sax_parser<_Handler,_Config>::element()
131 assert(cur_char() ==
'<');
132 const char* pos = mp_char;
133 char c = next_char_checked();
143 declaration(
nullptr);
146 if (!is_alpha(c) && c !=
'_')
147 throw sax::malformed_xml_error(
"expected an alphabet.", offset());
152 template<
typename _Handler,
typename _Config>
153 void sax_parser<_Handler,_Config>::element_open(
const char* begin_pos)
155 assert(is_alpha(cur_char()) || cur_char() ==
'_');
157 sax::parser_element elem;
158 element_name(elem, begin_pos);
167 if (next_and_char() !=
'>')
168 throw sax::malformed_xml_error(
"expected '/>' to self-close the element.", offset());
170 elem.end_pos = mp_char;
171 m_handler.start_element(elem);
173 m_handler.end_element(elem);
175 m_root_elem_open =
false;
176 #if ORCUS_DEBUG_SAX_PARSER 177 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
185 elem.end_pos = mp_char;
187 m_handler.start_element(elem);
189 #if ORCUS_DEBUG_SAX_PARSER 190 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
199 template<
typename _Handler,
typename _Config>
200 void sax_parser<_Handler,_Config>::element_close(
const char* begin_pos)
202 assert(cur_char() ==
'/');
205 sax::parser_element elem;
206 element_name(elem, begin_pos);
208 if (cur_char() !=
'>')
209 throw sax::malformed_xml_error(
"expected '>' to close the element.", offset());
211 elem.end_pos = mp_char;
213 m_handler.end_element(elem);
214 #if ORCUS_DEBUG_SAX_PARSER 215 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
218 m_root_elem_open =
false;
221 template<
typename _Handler,
typename _Config>
222 void sax_parser<_Handler,_Config>::special_tag()
224 assert(cur_char() ==
'!');
226 size_t len = remains();
228 throw sax::malformed_xml_error(
"special tag too short.", offset());
230 switch (next_and_char())
235 if (next_and_char() !=
'-')
236 throw sax::malformed_xml_error(
"comment expected.", offset());
240 throw sax::malformed_xml_error(
"malformed comment.", offset());
249 expects_next(
"CDATA[", 6);
257 expects_next(
"OCTYPE", 6);
264 throw sax::malformed_xml_error(
"failed to parse special tag.", offset());
268 template<
typename _Handler,
typename _Config>
269 void sax_parser<_Handler,_Config>::declaration(
const char* name_check)
271 assert(cur_char() ==
'?');
277 #if ORCUS_DEBUG_SAX_PARSER 278 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
281 if (name_check && decl_name != name_check)
283 std::ostringstream os;
284 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
285 throw sax::malformed_xml_error(os.str(), offset());
288 m_handler.start_declaration(decl_name);
292 while (cur_char_checked() !=
'?')
297 if (next_char_checked() !=
'>')
298 throw sax::malformed_xml_error(
"declaration must end with '?>'.", offset());
300 m_handler.end_declaration(decl_name);
303 #if ORCUS_DEBUG_SAX_PARSER 304 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
308 template<
typename _Handler,
typename _Config>
309 void sax_parser<_Handler,_Config>::cdata()
311 size_t len = remains();
315 const char* p0 = mp_char;
316 size_t i = 0, match = 0;
317 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
331 else if (c ==
'>' && match == 2)
334 size_t cdata_len = i - 2;
335 m_handler.characters(pstring(p0, cdata_len),
false);
342 throw sax::malformed_xml_error(
"malformed CDATA section.", offset());
345 template<
typename _Handler,
typename _Config>
346 void sax_parser<_Handler,_Config>::doctype()
349 sax::doctype_declaration param;
350 name(param.root_element);
354 size_t len = remains();
356 throw sax::malformed_xml_error(
"DOCTYPE section too short.", offset());
358 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
362 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
363 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
365 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
369 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
370 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
375 has_char_throw(
"DOCTYPE section too short.");
378 value(param.fpi,
false);
380 has_char_throw(
"DOCTYPE section too short.");
382 has_char_throw(
"DOCTYPE section too short.");
384 if (cur_char() ==
'>')
387 #if ORCUS_DEBUG_SAX_PARSER 388 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
390 m_handler.doctype(param);
396 value(param.uri,
false);
398 has_char_throw(
"DOCTYPE section too short.");
400 has_char_throw(
"DOCTYPE section too short.");
402 if (cur_char() !=
'>')
403 throw sax::malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
405 #if ORCUS_DEBUG_SAX_PARSER 406 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
408 m_handler.doctype(param);
412 template<
typename _Handler,
typename _Config>
413 void sax_parser<_Handler,_Config>::characters()
415 const char* p0 = mp_char;
416 for (; has_char(); next())
418 if (cur_char() ==
'<')
421 if (cur_char() ==
'&')
424 cell_buffer& buf = get_cell_buffer();
426 buf.append(p0, mp_char-p0);
427 characters_with_encoded_char(buf);
429 m_handler.characters(pstring(),
false);
431 m_handler.characters(pstring(buf.get(), buf.size()),
true);
438 pstring val(p0, mp_char-p0);
439 m_handler.characters(val,
false);
443 template<
typename _Handler,
typename _Config>
444 void sax_parser<_Handler,_Config>::attribute()
446 sax::parser_attribute attr;
447 pstring attr_ns_name, attr_name, attr_value;
448 attribute_name(attr.ns, attr.name);
450 #if ORCUS_DEBUG_SAX_PARSER 451 std::ostringstream os;
452 os <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'";
458 std::ostringstream os;
459 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
460 throw sax::malformed_xml_error(os.str(), offset());
464 attr.transient = value(attr.value,
true);
469 #if ORCUS_DEBUG_SAX_PARSER 470 os <<
" value='" << attr.value <<
"'" << endl;
474 m_handler.attribute(attr);
Definition: sax_parser.hpp:15
static const uint8_t baseline_version
Definition: sax_parser.hpp:22
Definition: parser_base.hpp:35
Definition: base64.hpp:15
Definition: sax_parser.hpp:30
Definition: sax_parser_base.hpp:108