Commit ddbff8ff authored by Braden McDaniel's avatar Braden McDaniel Committed by Braden McDaniel
Browse files

Add support for character data parsing (and reporting)

parent d19d1cd3
......@@ -51,6 +51,13 @@ namespace endoframe {
/// \param[in] name Element name.
///
void end_element(std::string name) {}
///
/// Called when character data is encountered.
///
/// \param[in] chars Character data.
///
void characters(std::string chars) {}
};
template <typename ContentHandler>
......@@ -81,12 +88,10 @@ bool endoframe::xml::parse(std::istream & in, ContentHandler & content_handler)
try {
content_handler.start_document();
result =
phrase_parse(
pos, end,
x3::with<grammar::content_handler_tag>(
std::ref(content_handler))
[grammar::document],
x3::space);
parse(pos, end,
x3::with<grammar::content_handler_tag>(
std::ref(content_handler))
[grammar::document]);
content_handler.end_document();
} catch (const x3::expectation_failure<decltype(pos)> & ex)
{
......
......@@ -41,6 +41,19 @@ struct on_element_end {
struct e_tag_class : on_element_end {};
struct on_char_data {
template <typename T, typename Iterator, typename Context>
void on_success(const Iterator & /* first */, const Iterator & /* last */,
T & chars, const Context & context)
{
namespace x3 = boost::spirit::x3;
auto & doc_handler = x3::get<content_handler_tag>(context).get();
doc_handler.characters(std::move(chars));
}
};
struct char_data_class : on_char_data {};
const boost::spirit::x3::rule<class document> document = "document";
const boost::spirit::x3::rule<class prolog> prolog = "prolog";
const boost::spirit::x3::rule<class xml_decl> xml_decl = "XMLDecl";
......@@ -60,22 +73,28 @@ const boost::spirit::x3::rule<class attribute_, xml::attribute> attribute =
const boost::spirit::x3::rule<class name, std::string> name = "Name";
const boost::spirit::x3::rule<class att_value, std::string> att_value =
"AttValue";
const boost::spirit::x3::rule<char_data_class, std::string> char_data = "CharData";
#ifdef __GNUG__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wparentheses"
#endif
const auto version_info_def =
boost::spirit::x3::lit("version") > '='
> (boost::spirit::x3::lit(R"("1.0")") | "'1.0'")
const auto s
= boost::spirit::x3::omit[+boost::spirit::x3::space]
;
const auto eq
= -s > '=' > -s
;
const auto version_info_def
= s > "version" > eq > (boost::spirit::x3::lit(R"("1.0")") | "'1.0'")
;
const auto encoding_decl_def
= boost::spirit::x3::lit("encoding") > '='
> ( boost::spirit::x3::lexeme['\'' >> enc_name >> '\'']
| boost::spirit::x3::lexeme['"' >> enc_name >> '"']
)
= s > "encoding" > eq > ( boost::spirit::x3::lexeme['\'' >> enc_name >> '\'']
| boost::spirit::x3::lexeme['"' >> enc_name >> '"'] )
;
const auto enc_name_def
......@@ -84,11 +103,15 @@ const auto enc_name_def
;
const auto xml_decl_def
= boost::spirit::x3::lit("<?xml") > version_info > -encoding_decl > "?>"
= boost::spirit::x3::lit("<?xml") > version_info > -encoding_decl > -s > "?>"
;
const auto misc
= s
;
const auto prolog_def
= -xml_decl
= -xml_decl >> *misc
;
const auto name_start_char
......@@ -168,23 +191,28 @@ const auto att_value_def
;
const auto attribute_def
= name > '=' > att_value
= name > eq > att_value
;
const auto elem_start_def
= boost::spirit::x3::lexeme['<' >> name] >> *attribute
= boost::spirit::x3::lexeme['<' >> name] >> *(s >> attribute)
;
const auto empty_elem_tag_def
= elem_start >> "/>"
= elem_start >> -s >> "/>"
;
const auto s_tag_def
= elem_start >> '>'
= elem_start >> -s >> '>'
;
const auto char_data_def
= *(boost::spirit::x3::char_ - boost::spirit::x3::char_("<&"))
>> -boost::spirit::x3::lit("]]>")
;
const auto content
= *(element | reference)
= -char_data > *((element | reference) >> -char_data)
;
const auto e_tag_def
......@@ -216,6 +244,7 @@ BOOST_SPIRIT_DEFINE(
s_tag,
elem_start,
e_tag,
char_data,
name,
attribute,
att_value
......
......@@ -5,6 +5,18 @@
#include <boost/variant.hpp>
#include <endoframe/xml.h>
BOOST_AUTO_TEST_CASE(xml_decl)
{
namespace x3 = boost::spirit::x3;
const std::string decl = R"(<?xml version="1.0" encoding="UTF-8"?>)";
auto pos = decl.begin();
auto result = x3::parse(pos, decl.end(),
endoframe::xml::grammar::xml_decl);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST((pos == decl.end()) == true);
}
const std::string good_names[] = {
"my_name",
"my-name",
......@@ -22,8 +34,7 @@ BOOST_DATA_TEST_CASE(name_good, good_names)
auto pos = sample.begin();
std::string parse_result;
auto result =
phrase_parse(pos, sample.end(),
endoframe::xml::grammar::name, x3::space, parse_result);
parse(pos, sample.end(), endoframe::xml::grammar::name, parse_result);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST_REQUIRE((pos == sample.end()) == true);
BOOST_TEST(parse_result == sample);
......@@ -42,7 +53,7 @@ BOOST_DATA_TEST_CASE(name_bad, bad_names)
auto pos = sample.begin();
std::string parse_result;
phrase_parse(pos, sample.end(), endoframe::xml::grammar::name, x3::space, parse_result);
parse(pos, sample.end(), endoframe::xml::grammar::name, parse_result);
BOOST_TEST_REQUIRE((pos == sample.end()) == false);
}
......@@ -70,8 +81,8 @@ BOOST_DATA_TEST_CASE(char_ref_good, good_char_refs)
auto pos = sample.test_val.begin();
std::uint32_t parse_result;
auto result =
phrase_parse(pos, sample.test_val.end(),
endoframe::xml::grammar::char_ref, x3::space, parse_result);
parse(pos, sample.test_val.end(),
endoframe::xml::grammar::char_ref, parse_result);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST_REQUIRE((pos == sample.test_val.end()) == true);
BOOST_TEST(parse_result == sample.expect);
......@@ -101,8 +112,8 @@ BOOST_DATA_TEST_CASE(entity_ref_good, good_entity_refs)
auto pos = sample.test_val.begin();
std::string parse_result;
auto result =
phrase_parse(pos, sample.test_val.end(),
endoframe::xml::grammar::entity_ref, x3::space, parse_result);
parse(pos, sample.test_val.end(),
endoframe::xml::grammar::entity_ref, parse_result);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST_REQUIRE((pos == sample.test_val.end()) == true);
BOOST_TEST(parse_result == sample.expect);
......@@ -146,8 +157,8 @@ BOOST_DATA_TEST_CASE(reference_good, good_references)
auto pos = sample.test_val.begin();
boost::variant<std::string, std::uint32_t> parse_result;
auto result =
phrase_parse(pos, sample.test_val.end(),
endoframe::xml::grammar::reference, x3::space, parse_result);
parse(pos, sample.test_val.end(),
endoframe::xml::grammar::reference, parse_result);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST_REQUIRE((pos == sample.test_val.end()) == true);
BOOST_TEST(parse_result == sample.expect);
......@@ -179,9 +190,8 @@ BOOST_DATA_TEST_CASE(att_value_good, good_att_values)
auto pos = sample.test_val.begin();
std::string parse_result;
auto result =
phrase_parse(pos, sample.test_val.end(),
endoframe::xml::grammar::att_value, x3::space,
parse_result);
parse(pos, sample.test_val.end(),
endoframe::xml::grammar::att_value, parse_result);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST_REQUIRE((pos == sample.test_val.end()) == true);
BOOST_TEST(parse_result == sample.expect);
......@@ -201,7 +211,10 @@ static std::ostream & operator<<(std::ostream & out,
}
const good_attribute_sample good_attributes[] = {
{ "foo=\"abc&#123;abc\"", { "foo", "abc&#123;abc" } }
{ "foo=\"abc&#123;abc\"", { "foo", "abc&#123;abc" } },
{ "foo =\"abc&#123;abc\"", { "foo", "abc&#123;abc" } },
{ "foo= \"abc&#123;abc\"", { "foo", "abc&#123;abc" } },
{ "foo = \"abc&#123;abc\"", { "foo", "abc&#123;abc" } }
};
BOOST_DATA_TEST_CASE(attribute_good, good_attributes)
......@@ -211,9 +224,9 @@ BOOST_DATA_TEST_CASE(attribute_good, good_attributes)
auto pos = sample.test_val.begin();
endoframe::xml::attribute parse_result;
auto result =
phrase_parse(pos, sample.test_val.end(),
endoframe::xml::grammar::attribute, x3::space,
parse_result);
parse(pos, sample.test_val.end(),
endoframe::xml::grammar::attribute,
parse_result);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST_REQUIRE((pos == sample.test_val.end()) == true);
BOOST_TEST(parse_result.name == sample.expect.name);
......@@ -222,7 +235,6 @@ BOOST_DATA_TEST_CASE(attribute_good, good_attributes)
BOOST_AUTO_TEST_CASE(start_and_end_document)
{
namespace x3 = boost::spirit::x3;
namespace xml = endoframe::xml;
struct content_handler : xml::content_handler {
......@@ -241,8 +253,9 @@ BOOST_AUTO_TEST_CASE(start_and_end_document)
}
};
const std::string doc = R"(
<?xml version="1.0" encoding="UTF-8"?> <element> </element>
const std::string doc =
R"(<?xml version="1.0" encoding="UTF-8"?>
<element> </element>
)";
std::istringstream in{doc};
......@@ -254,7 +267,6 @@ BOOST_AUTO_TEST_CASE(start_and_end_document)
BOOST_AUTO_TEST_CASE(start_and_end_element)
{
namespace x3 = boost::spirit::x3;
namespace xml = endoframe::xml;
struct content_handler : xml::content_handler {
......@@ -281,8 +293,9 @@ BOOST_AUTO_TEST_CASE(start_and_end_element)
}
};
const std::string doc = R"(
<?xml version="1.0" encoding="UTF-8"?> <element name1="val1" name2="val2"> </element>
const std::string doc =
R"(<?xml version="1.0" encoding="UTF-8"?>
<element name1="val1" name2="val2"> </element>
)";
std::istringstream in{doc};
......@@ -291,3 +304,28 @@ BOOST_AUTO_TEST_CASE(start_and_end_element)
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST(doc_handler.end_element_called == true);
}
BOOST_AUTO_TEST_CASE(char_data)
{
namespace xml = endoframe::xml;
struct content_handler : xml::content_handler {
bool characters_called = false;
void characters(std::string chars) {
characters_called = true;
BOOST_TEST(chars == " foo ");
}
};
const std::string doc =
R"(<?xml version="1.0" encoding="UTF-8"?>
<element> foo </element>
)";
std::istringstream in{doc};
content_handler doc_handler;
auto result = xml::parse(in, doc_handler);
BOOST_TEST_REQUIRE(result == true);
BOOST_TEST(doc_handler.characters_called == true);
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment