00001 #ifndef H_XML
00002 #define H_XML
00003
00004 #include <iostream>
00005 #include <fstream>
00006 #include <sstream>
00007 #include <vector>
00008 #include <map>
00009 #include <xstring.h>
00010
00016 class xml_element
00017 {
00018 typedef xml_element& reference;
00019 typedef xml_element* pointer;
00020 typedef std::vector<xml_element*> child_vec;
00021 typedef std::map<xstring,xstring> attr_map;
00022
00023 xstring m_Type;
00024 child_vec m_Children;
00025 attr_map m_Attributes;
00026 xstring m_Content;
00027
00033 xml_element(const xml_element&) {}
00034 xml_element& operator= (const xml_element&) { return *this; }
00035 public:
00036 xml_element(const xstring& type="") : m_Type(type) {}
00037 ~xml_element()
00038 {
00039 for(iterator b=begin();b!=end();++b) delete *b;
00040 }
00041
00042 void set_type(const xstring& type) { m_Type=type; }
00043 const xstring& get_type() const { return m_Type; }
00044
00045 int get_child_count() const { return int(m_Children.size()); }
00046 void add_child(pointer p) { m_Children.push_back(p); }
00047
00048 xml_element* add_child(const xstring& type)
00049 {
00050 xml_element* child=new xml_element(type);
00051 add_child(child);
00052 return child;
00053 }
00054
00057 void remove(pointer child, bool delete_child)
00058 {
00059 for(iterator it=begin();it!=end();++it)
00060 {
00061 xml_element* c=*it;
00062 if (c==child) { m_Children.erase(it); break; }
00063 }
00064 if (delete_child) delete child;
00065 }
00066
00068 xml_element* find_child(const xstring& type)
00069 {
00070 for(iterator it=begin();it!=end();++it)
00071 {
00072 xml_element* c=*it;
00073 if (c->get_type()==type) return c;
00074 }
00075 return 0;
00076 }
00077
00078 bool has_attribute(const xstring& name) const { return m_Attributes.count(name)>0; }
00079 void set_attribute(const xstring& name, const xstring& value) { m_Attributes[name]=value; }
00080 xstring get_attribute(const xstring& name) const
00081 {
00082 attr_map::const_iterator it=m_Attributes.find(name);
00083 if (it==m_Attributes.end()) return "";
00084 return it->second;
00085 }
00086
00087 typedef child_vec::iterator iterator;
00088 typedef child_vec::const_iterator const_iterator;
00089 iterator begin() { return m_Children.begin(); }
00090 iterator end() { return m_Children.end(); }
00091 const_iterator begin() const { return m_Children.begin(); }
00092 const_iterator end() const { return m_Children.end(); }
00093
00094 typedef attr_map::const_iterator attr_iterator;
00095 attr_iterator attr_begin() const { return m_Attributes.begin(); }
00096 attr_iterator attr_end() const { return m_Attributes.end(); }
00097
00098 void print(std::ostream& os=std::cout, int indent=0, bool packed=false) const
00099 {
00100 xstring spaces=packed?xstring(""):xstring(indent,' ');
00101 xstring eol=packed?xstring(""):xstring("\n");
00102 os << spaces << "<" << get_type();
00103 for(attr_iterator it=attr_begin();it!=attr_end();++it)
00104 os << " " << it->first << "=\"" << it->second << "\"";
00105 if (get_child_count()==0 && m_Content.empty()) { os << "/>" << eol; return; }
00106 os << ">" << eol;
00107 if (!m_Content.empty()) os << spaces << m_Content << eol;
00108 for(const_iterator ci=begin();ci!=end();++ci)
00109 (*ci)->print(os,indent+2,packed);
00110 os << spaces << "</" << get_type() << ">" << eol;
00111 }
00112
00113 xstring print(bool packed)
00114 {
00115 std::ostringstream os;
00116 print(os,0,packed);
00117 return xstring(os.str());
00118 }
00119 };
00120
00121 class xml_parser
00122 {
00123 public:
00124 xml_parser() : m_InQuotes(false), m_LineNumber(1) {}
00125 private:
00126 enum Token { LTAG, RTAG, EQ, QUOTES, SLASH, IDENT, TEXT, QUESTION, XEOF };
00127
00128 bool m_InQuotes;
00129 int m_LineNumber;
00130
00131 static bool is_white_space(char c)
00132 {
00133 return (c<=32);
00134 }
00135
00136 static bool quotes_pred(char c)
00137 {
00138 return c=='"';
00139 }
00140
00141 static bool not_alnum(char c)
00142 {
00143 return ((c<'A' || c>'Z') && (c<'a' || c>'z') && (c<'0' || c>'9'));
00144 }
00145
00146 static bool is_question(char c)
00147 {
00148 return (c=='?');
00149 }
00150
00151 Token analyze(std::istream& is, xstring& token_text)
00152 {
00153 char ch=' ';
00154 while (!is.eof() && is_white_space(ch))
00155 {
00156 ch=is.get();
00157 if (ch=='\n') ++m_LineNumber;
00158 }
00159 token_text="";
00160 if (is.eof()) return XEOF;
00161 token_text=xstring(1,ch);
00162 if (ch=='"')
00163 {
00164 m_InQuotes=!m_InQuotes;
00165 return QUOTES;
00166 }
00167 if (m_InQuotes)
00168 {
00169 token_text+=read_until(is,quotes_pred);
00170 return TEXT;
00171 }
00172 if (ch=='<') { return LTAG; }
00173 if (ch=='>') { return RTAG; }
00174 if (ch=='=') { return EQ; }
00175 if (ch=='/') { return SLASH; }
00176 if (ch=='?') { return QUESTION; }
00177 token_text+=read_until(is,not_alnum);
00178 return IDENT;
00179 }
00180
00181 template<class PRED>
00182 xstring read_until(std::istream& is, PRED p)
00183 {
00184 xstring res;
00185 while (!is.eof())
00186 {
00187 char ch=is.peek();
00188 if (p(ch)) return res;
00189 ch=is.get();
00190 res+=xstring(1,ch);
00191 }
00192 return res;
00193 }
00194
00195 #define SYNTAX_ERROR throw "Syntax Error"
00196 #define EXPECT(t) { token=analyze(is,last); if (token!=t) SYNTAX_ERROR; }
00197
00198 void parse_element(std::istream& is, xml_element* parent)
00199 {
00200 xstring last;
00201 Token token;
00202 while (true)
00203 {
00204 token=analyze(is,last);
00205 if (token==XEOF) return;
00206 if (token==LTAG)
00207 {
00208 token=analyze(is,last);
00209 if (token==QUESTION)
00210 {
00211 read_until(is,is_question);
00212 EXPECT(QUESTION);
00213 EXPECT(RTAG);
00214 continue;
00215 }
00216 if (token==SLASH)
00217 {
00218 if (!parent) SYNTAX_ERROR;
00219 EXPECT(IDENT);
00220 if (last != parent->get_type()) SYNTAX_ERROR;
00221 EXPECT(RTAG);
00222 return;
00223 }
00224 else
00225 if (token==IDENT)
00226 {
00227 xml_element* child=new xml_element;
00228 child->set_type(last);
00229 parent->add_child(child);
00230 while (true)
00231 {
00232 token=analyze(is,last);
00233 if (token==XEOF) return;
00234 if (token==IDENT)
00235 {
00236 xstring attr_value,attr_name=last;
00237 EXPECT(EQ);
00238 token=analyze(is,last);
00239 if (token==QUOTES)
00240 {
00241 token=analyze(is,last);
00242 if (token==TEXT)
00243 {
00244 attr_value=last;
00245 EXPECT(QUOTES);
00246 }
00247 else
00248 if (token!=QUOTES) SYNTAX_ERROR;
00249 }
00250 else
00251 {
00252 attr_value=last;
00253 }
00254 child->set_attribute(attr_name,attr_value);
00255 }
00256 else
00257 if (token==SLASH)
00258 {
00259 EXPECT(RTAG);
00260 break;
00261 }
00262 else
00263 if (token==RTAG)
00264 {
00265 parse_element(is,child);
00266 break;
00267 }
00268 }
00269 }
00270 }
00271 }
00272 }
00273
00274 public:
00275
00276 xml_element* parse(std::istream& is)
00277 {
00278 if (is.fail()) return 0;
00279 xml_element* root=new xml_element;
00280 try
00281 {
00282 parse_element(is,root);
00283 int n=root->get_child_count();
00284 if (n>1) throw "Error: Multiple root nodes";
00285 if (n==0) throw "Error: No root node";
00286 xml_element* new_root=*(root->begin());
00287 root->remove(new_root,false);
00288 delete root;
00289 root=new_root;
00290 } catch (const char* msg)
00291 {
00292 std::cerr << "Line " << m_LineNumber << " - " << msg << std::endl;
00293 delete root; root=0;
00294 }
00295 return root;
00296 }
00297 };
00298
00299 inline xml_element* load_xml_from_file(const char* filename)
00300 {
00301 std::ifstream fin(filename);
00302 return xml_parser().parse(fin);
00303 }
00304
00305 inline xml_element* load_xml_from_text(const xstring& text)
00306 {
00307 std::istringstream is(text);
00308 return xml_parser().parse(is);
00309 }
00310
00311 inline xstring get_xml_text(xml_element* root)
00312 {
00313 std::ostringstream os;
00314 root->print(os,0,false);
00315 return xstring(os.str());
00316 }
00317
00318 inline xstring get_xml_text(xml_element& root)
00319 {
00320 return get_xml_text(&root);
00321 }
00322
00323 inline xml_element* clone_element(xml_element* root)
00324 {
00325 return load_xml_from_text(get_xml_text(root));
00326 }
00327
00328 #endif // H_XML
00329