2013-05-20 17:25:51 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
#include "xml.h"
|
2013-05-20 17:25:51 +00:00
|
|
|
|
2014-04-02 13:03:30 +00:00
|
|
|
static __inline__ int /* like getc(), but do some smart buffering */
|
2013-05-20 17:25:51 +00:00
|
|
|
xmlparser_getnext(XMLParser *x) {
|
2014-06-27 13:42:53 +00:00
|
|
|
return fgetc(x->fp);
|
|
|
|
#if 0
|
2013-05-20 17:25:51 +00:00
|
|
|
if(x->readoffset >= x->readlastbytes) {
|
|
|
|
x->readoffset = 0;
|
|
|
|
if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf), x->fp)))
|
|
|
|
return EOF; /* 0 bytes read, assume EOF */
|
|
|
|
}
|
|
|
|
return (int)x->readbuf[x->readoffset++];
|
2014-06-27 13:42:53 +00:00
|
|
|
#endif
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
|
|
|
|
2014-04-02 13:03:30 +00:00
|
|
|
static __inline__ void
|
2014-03-31 20:46:58 +00:00
|
|
|
xmlparser_parseattrs(XMLParser *x) {
|
|
|
|
size_t namelen = 0, valuelen;
|
|
|
|
int c, endsep, endname = 0;
|
2013-05-20 17:25:51 +00:00
|
|
|
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
2014-03-31 20:46:58 +00:00
|
|
|
if(isspace(c)) { /* TODO: simplify endname ? */
|
2014-05-08 14:59:03 +00:00
|
|
|
if(namelen)
|
2013-05-20 17:25:51 +00:00
|
|
|
endname = 1;
|
2014-06-28 13:27:29 +00:00
|
|
|
continue;
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
2014-05-08 14:59:03 +00:00
|
|
|
if(c == '?')
|
|
|
|
; /* ignore */
|
2014-03-31 20:46:58 +00:00
|
|
|
else if(c == '=') {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->name[namelen] = '\0';
|
|
|
|
} else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
|
|
|
|
/* attribute without value */
|
|
|
|
x->name[namelen] = '\0';
|
|
|
|
if(x->xmlattrstart)
|
|
|
|
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
|
|
|
|
if(x->xmlattr)
|
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
|
|
|
|
if(x->xmlattrend)
|
|
|
|
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
|
|
|
|
endname = 0;
|
|
|
|
x->name[0] = c;
|
|
|
|
namelen = 1;
|
|
|
|
} else if(namelen && (c == '\'' || c == '"')) {
|
|
|
|
/* attribute with value */
|
2014-03-31 20:46:58 +00:00
|
|
|
endsep = c; /* c is end separator */
|
|
|
|
if(x->xmlattrstart)
|
|
|
|
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
|
|
|
|
for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
|
|
|
|
if(c == '&' && x->xmlattrentity) { /* entities */
|
|
|
|
x->data[valuelen] = '\0';
|
|
|
|
/* call data function with data before entity if there is data */
|
|
|
|
if(valuelen && x->xmlattr)
|
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
x->data[0] = c;
|
|
|
|
valuelen = 1;
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
|
|
|
if(c == endsep)
|
|
|
|
break;
|
|
|
|
if(valuelen < sizeof(x->data) - 1)
|
|
|
|
x->data[valuelen++] = c;
|
2014-05-08 14:59:03 +00:00
|
|
|
else {
|
|
|
|
/* TODO: entity too long? this should be very strange. */
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen] = '\0';
|
|
|
|
if(x->xmlattr)
|
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
valuelen = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(c == ';') {
|
|
|
|
x->data[valuelen] = '\0';
|
|
|
|
x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
valuelen = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if(c != endsep) {
|
|
|
|
if(valuelen < sizeof(x->data) - 1) {
|
|
|
|
x->data[valuelen++] = c;
|
|
|
|
} else {
|
|
|
|
x->data[valuelen] = '\0';
|
|
|
|
if(x->xmlattr)
|
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
x->data[0] = c;
|
|
|
|
valuelen = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(c == endsep) {
|
|
|
|
x->data[valuelen] = '\0';
|
|
|
|
if(x->xmlattr)
|
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
if(x->xmlattrend)
|
|
|
|
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-05-20 17:25:51 +00:00
|
|
|
namelen = 0;
|
|
|
|
endname = 0;
|
|
|
|
} else if(namelen < sizeof(x->name) - 1)
|
|
|
|
x->name[namelen++] = c;
|
|
|
|
if(c == '>') {
|
|
|
|
break;
|
|
|
|
} else if(c == '/') {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->isshorttag = 1;
|
2013-05-20 17:25:51 +00:00
|
|
|
namelen = 0;
|
|
|
|
x->name[0] = '\0';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-02 13:03:30 +00:00
|
|
|
static __inline__ void
|
2013-05-20 17:25:51 +00:00
|
|
|
xmlparser_parsecomment(XMLParser *x) {
|
|
|
|
size_t datalen = 0, i = 0;
|
|
|
|
int c;
|
|
|
|
|
|
|
|
if(x->xmlcommentstart)
|
|
|
|
x->xmlcommentstart(x);
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
|
|
|
if(c == '-' && i < 2)
|
|
|
|
i++;
|
|
|
|
else if(c == '>') {
|
2014-03-31 20:46:58 +00:00
|
|
|
if(i == 2) { /* -- */
|
2013-05-20 17:25:51 +00:00
|
|
|
if(datalen >= 2) {
|
|
|
|
datalen -= 2;
|
|
|
|
x->data[datalen] = '\0';
|
|
|
|
if(x->xmlcomment)
|
|
|
|
x->xmlcomment(x, x->data, datalen);
|
|
|
|
}
|
|
|
|
if(x->xmlcommentend)
|
|
|
|
x->xmlcommentend(x);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i = 0;
|
|
|
|
}
|
2014-05-08 14:59:03 +00:00
|
|
|
/* || (c == '-' && d >= sizeof(x->data) - 4)) { */
|
2014-06-27 13:42:53 +00:00
|
|
|
/* TODO: what if the end has --, and it's cut on the boundary, test this. */
|
2014-05-08 14:59:03 +00:00
|
|
|
if(datalen < sizeof(x->data) - 1)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen++] = c;
|
2014-03-31 20:46:58 +00:00
|
|
|
else {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen] = '\0';
|
|
|
|
if(x->xmlcomment)
|
|
|
|
x->xmlcomment(x, x->data, datalen);
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
/* TODO:
|
|
|
|
* <test><![CDATA[1234567dddd8]]]>
|
|
|
|
*
|
|
|
|
* with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
|
|
|
|
* test comment function too for similar bug?
|
|
|
|
*
|
|
|
|
*/
|
2014-04-02 13:03:30 +00:00
|
|
|
static __inline__ void
|
2013-05-20 17:25:51 +00:00
|
|
|
xmlparser_parsecdata(XMLParser *x) {
|
|
|
|
size_t datalen = 0, i = 0;
|
|
|
|
int c;
|
|
|
|
|
|
|
|
if(x->xmlcdatastart)
|
|
|
|
x->xmlcdatastart(x);
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
|
|
|
if(c == ']' && i < 2) {
|
|
|
|
i++;
|
|
|
|
} else if(c == '>') {
|
2014-03-31 20:46:58 +00:00
|
|
|
if(i == 2) { /* ]] */
|
2013-05-20 17:25:51 +00:00
|
|
|
if(datalen >= 2) {
|
|
|
|
datalen -= 2;
|
|
|
|
x->data[datalen] = '\0';
|
2014-03-31 20:46:58 +00:00
|
|
|
if(x->xmlcdata && datalen)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlcdata(x, x->data, datalen);
|
|
|
|
}
|
|
|
|
if(x->xmlcdataend)
|
|
|
|
x->xmlcdataend(x);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i = 0;
|
|
|
|
}
|
2014-06-27 13:42:53 +00:00
|
|
|
/* TODO: what if the end has ]>, and it's cut on the boundary */
|
2014-05-08 14:59:03 +00:00
|
|
|
if(datalen < sizeof(x->data) - 1) {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen++] = c;
|
|
|
|
} else {
|
|
|
|
x->data[datalen] = '\0';
|
|
|
|
if(x->xmlcdata)
|
|
|
|
x->xmlcdata(x, x->data, datalen);
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-27 13:42:53 +00:00
|
|
|
void
|
|
|
|
xmlparser_init(XMLParser *x, FILE *fp) {
|
|
|
|
memset(x, 0, sizeof(XMLParser));
|
|
|
|
x->fp = fp;
|
|
|
|
}
|
|
|
|
|
2013-05-20 17:25:51 +00:00
|
|
|
void
|
2014-03-31 20:46:58 +00:00
|
|
|
xmlparser_parse(XMLParser *x) {
|
|
|
|
int c, ispi;
|
|
|
|
size_t datalen, tagdatalen, taglen;
|
2013-05-20 17:25:51 +00:00
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */
|
|
|
|
|
|
|
|
while(c != EOF) {
|
|
|
|
if(c == '<') { /* parse tag */
|
|
|
|
if((c = xmlparser_getnext(x)) == EOF)
|
|
|
|
return;
|
|
|
|
x->tag[0] = '\0';
|
|
|
|
x->taglen = 0;
|
|
|
|
if(c == '!') { /* cdata and comments */
|
|
|
|
for(tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
|
|
|
|
if(tagdatalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */
|
|
|
|
x->data[tagdatalen++] = c; /* TODO: prevent overflow */
|
|
|
|
if(c == '>')
|
|
|
|
break;
|
|
|
|
else if(c == '-' && tagdatalen == strlen("--") &&
|
|
|
|
(x->data[0] == '-')) { /* comment */
|
|
|
|
xmlparser_parsecomment(x);
|
|
|
|
break;
|
|
|
|
} else if(c == '[') {
|
|
|
|
if(tagdatalen == strlen("[CDATA[") &&
|
|
|
|
x->data[1] == 'C' && x->data[2] == 'D' &&
|
|
|
|
x->data[3] == 'A' && x->data[4] == 'T' &&
|
|
|
|
x->data[5] == 'A' && x->data[6] == '[') { /* cdata */
|
|
|
|
xmlparser_parsecdata(x);
|
|
|
|
break;
|
2014-04-02 20:27:29 +00:00
|
|
|
#if 0
|
2014-03-31 20:46:58 +00:00
|
|
|
} else {
|
2014-04-02 20:27:29 +00:00
|
|
|
/* TODO ? */
|
2014-03-31 20:46:58 +00:00
|
|
|
/* markup declaration section */
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF && c != ']');
|
2014-04-02 20:27:29 +00:00
|
|
|
#endif
|
2014-03-31 20:46:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else { /* normal tag (open, short open, close), processing instruction. */
|
2013-05-20 17:25:51 +00:00
|
|
|
if(isspace(c))
|
2014-03-31 20:46:58 +00:00
|
|
|
while((c = xmlparser_getnext(x)) != EOF && isspace(c));
|
|
|
|
if(c == EOF)
|
|
|
|
return;
|
|
|
|
x->tag[0] = c;
|
|
|
|
ispi = (c == '?') ? 1 : 0;
|
|
|
|
x->isshorttag = ispi;
|
|
|
|
taglen = 1;
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
|
|
|
if(c == '/') /* TODO: simplify short tag? */
|
|
|
|
x->isshorttag = 1; /* short tag */
|
|
|
|
else if(c == '>' || isspace(c)) {
|
|
|
|
x->tag[taglen] = '\0';
|
|
|
|
if(x->tag[0] == '/') { /* end tag, starts with </ */
|
|
|
|
x->taglen = --taglen; /* len -1 because of / */
|
|
|
|
if(taglen && x->xmltagend)
|
|
|
|
x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
|
|
|
|
} else {
|
|
|
|
x->taglen = taglen;
|
|
|
|
if(x->xmltagstart)
|
|
|
|
x->xmltagstart(x, x->tag, x->taglen); /* start tag */
|
|
|
|
if(isspace(c))
|
|
|
|
xmlparser_parseattrs(x);
|
|
|
|
if(x->xmltagstartparsed)
|
|
|
|
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
|
|
|
|
}
|
|
|
|
if((x->isshorttag || ispi) && x->xmltagend) /* call tagend for shortform or processing instruction */
|
|
|
|
x->xmltagend(x, x->tag, x->taglen, 1);
|
|
|
|
break;
|
|
|
|
} else if(taglen < sizeof(x->tag) - 1)
|
|
|
|
x->tag[taglen++] = c;
|
|
|
|
}
|
|
|
|
}
|
2014-05-08 14:59:03 +00:00
|
|
|
} else {
|
|
|
|
/* parse data */
|
2014-03-31 20:46:58 +00:00
|
|
|
datalen = 0;
|
|
|
|
if(x->xmldatastart)
|
|
|
|
x->xmldatastart(x);
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
|
|
|
if(c == '&' && x->xmldataentity) {
|
|
|
|
if(datalen) {
|
|
|
|
x->data[datalen] = '\0';
|
|
|
|
x->xmldata(x, x->data, datalen);
|
|
|
|
}
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
while((c = xmlparser_getnext(x)) != EOF) {
|
|
|
|
if(c == '<')
|
|
|
|
break;
|
|
|
|
if(datalen < sizeof(x->data) - 1)
|
|
|
|
x->data[datalen++] = c;
|
|
|
|
if(isspace(c))
|
|
|
|
break;
|
|
|
|
else if(c == ';') {
|
|
|
|
x->data[datalen] = '\0';
|
|
|
|
x->xmldataentity(x, x->data, datalen);
|
|
|
|
datalen = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if(c != '<') {
|
|
|
|
if(datalen < sizeof(x->data) - 1) {
|
|
|
|
x->data[datalen++] = c;
|
|
|
|
} else {
|
|
|
|
x->data[datalen] = '\0';
|
|
|
|
if(x->xmldata)
|
|
|
|
x->xmldata(x, x->data, datalen);
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(c == '<') {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen] = '\0';
|
2014-03-31 20:46:58 +00:00
|
|
|
if(x->xmldata && datalen)
|
|
|
|
x->xmldata(x, x->data, datalen);
|
|
|
|
if(x->xmldataend)
|
|
|
|
x->xmldataend(x);
|
2013-05-20 17:25:51 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|