2014-11-16 23:04:58 +00:00
|
|
|
#include <ctype.h>
|
2015-05-16 13:53:16 +00:00
|
|
|
#include <errno.h>
|
2015-07-29 10:58:06 +00:00
|
|
|
#include <stdint.h>
|
2013-05-20 17:25:51 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2014-11-16 23:04:58 +00:00
|
|
|
#include <string.h>
|
2015-05-16 13:53:16 +00:00
|
|
|
#include <unistd.h>
|
2013-05-20 17:25:51 +00:00
|
|
|
|
2014-03-31 20:46:58 +00:00
|
|
|
#include "xml.h"
|
2013-05-20 17:25:51 +00:00
|
|
|
|
2015-08-14 11:47:19 +00:00
|
|
|
struct xml_context_fd {
|
|
|
|
char buf[BUFSIZ];
|
|
|
|
int readerrno;
|
|
|
|
int fd;
|
|
|
|
size_t nread;
|
|
|
|
size_t offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct xml_context_string {
|
|
|
|
const char *str;
|
|
|
|
};
|
|
|
|
|
2015-05-16 13:53:16 +00:00
|
|
|
static int
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_getnext_string(XMLParser *x)
|
2014-11-16 23:04:58 +00:00
|
|
|
{
|
2015-08-14 11:47:19 +00:00
|
|
|
struct xml_context_string *d = (struct xml_context_string *)x->getnext_data;
|
|
|
|
|
|
|
|
if (!*(d->str))
|
2014-11-16 23:04:58 +00:00
|
|
|
return EOF;
|
2015-08-14 11:47:19 +00:00
|
|
|
return (int)*(d->str++);
|
2015-05-16 13:53:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int /* like getc(), but do some smart buffering */
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_getnext_fd(XMLParser *x)
|
2015-05-16 13:53:16 +00:00
|
|
|
{
|
2015-08-14 11:47:19 +00:00
|
|
|
struct xml_context_fd *d = (struct xml_context_fd *)x->getnext_data;
|
2015-05-16 13:53:16 +00:00
|
|
|
ssize_t r;
|
|
|
|
|
|
|
|
/* previous read error was set */
|
2015-08-14 11:47:19 +00:00
|
|
|
if (d->readerrno)
|
2014-11-16 23:04:58 +00:00
|
|
|
return EOF;
|
2015-05-16 13:53:16 +00:00
|
|
|
|
2015-08-14 11:47:19 +00:00
|
|
|
if (d->offset >= d->nread) {
|
|
|
|
d->offset = 0;
|
2015-05-16 13:53:16 +00:00
|
|
|
again:
|
2015-08-14 11:47:19 +00:00
|
|
|
r = read(d->fd, d->buf, sizeof(d->buf));
|
2015-07-28 19:56:46 +00:00
|
|
|
if (r == -1) {
|
|
|
|
if (errno == EINTR)
|
2015-05-16 13:53:16 +00:00
|
|
|
goto again;
|
2015-08-14 11:47:19 +00:00
|
|
|
d->readerrno = errno;
|
|
|
|
d->nread = 0;
|
2015-05-16 13:53:16 +00:00
|
|
|
return EOF;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (!r) {
|
2015-05-16 13:53:16 +00:00
|
|
|
return EOF;
|
|
|
|
}
|
2015-08-14 11:47:19 +00:00
|
|
|
d->nread = r;
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
2015-08-14 11:47:19 +00:00
|
|
|
return (int)d->buf[d->offset++];
|
2015-05-16 13:53:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_getnext(XMLParser *x)
|
2015-05-16 13:53:16 +00:00
|
|
|
{
|
|
|
|
return x->getnext(x);
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
|
|
|
|
2015-08-06 19:10:02 +00:00
|
|
|
static void
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parseattrs(XMLParser *x)
|
2014-11-16 23:04:58 +00:00
|
|
|
{
|
2014-03-31 20:46:58 +00:00
|
|
|
size_t namelen = 0, valuelen;
|
|
|
|
int c, endsep, endname = 0;
|
2013-05-20 17:25:51 +00:00
|
|
|
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (isspace(c)) { /* TODO: simplify endname ? */
|
|
|
|
if (namelen)
|
2013-05-20 17:25:51 +00:00
|
|
|
endname = 1;
|
2014-06-28 13:27:29 +00:00
|
|
|
continue;
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '?')
|
2014-05-08 14:59:03 +00:00
|
|
|
; /* ignore */
|
2015-07-28 19:56:46 +00:00
|
|
|
else if (c == '=') {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->name[namelen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
|
2013-05-20 17:25:51 +00:00
|
|
|
/* attribute without value */
|
|
|
|
x->name[namelen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattrstart)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattr)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattrend)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
|
|
|
|
endname = 0;
|
|
|
|
x->name[0] = c;
|
|
|
|
namelen = 1;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (namelen && (c == '\'' || c == '"')) {
|
2013-05-20 17:25:51 +00:00
|
|
|
/* attribute with value */
|
2014-03-31 20:46:58 +00:00
|
|
|
endsep = c; /* c is end separator */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattrstart)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
|
2015-08-14 11:47:19 +00:00
|
|
|
for (valuelen = 0; (c = xml_getnext(x)) != EOF;) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '&') { /* entities */
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen] = '\0';
|
|
|
|
/* call data function with data before entity if there is data */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (valuelen && x->xmlattr)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
x->data[0] = c;
|
|
|
|
valuelen = 1;
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == endsep)
|
2014-03-31 20:46:58 +00:00
|
|
|
break;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (valuelen < sizeof(x->data) - 1)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen++] = c;
|
2014-05-08 14:59:03 +00:00
|
|
|
else {
|
|
|
|
/* TODO: entity too long? this should be very strange. */
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattr)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
valuelen = 0;
|
|
|
|
break;
|
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == ';') {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattrentity)
|
2015-05-16 13:54:39 +00:00
|
|
|
x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
2014-03-31 20:46:58 +00:00
|
|
|
valuelen = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (c != endsep) {
|
|
|
|
if (valuelen < sizeof(x->data) - 1) {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen++] = c;
|
|
|
|
} else {
|
|
|
|
x->data[valuelen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattr)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
|
|
|
x->data[0] = c;
|
|
|
|
valuelen = 1;
|
|
|
|
}
|
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == endsep) {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[valuelen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattr)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlattrend)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-05-20 17:25:51 +00:00
|
|
|
namelen = 0;
|
|
|
|
endname = 0;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (namelen < sizeof(x->name) - 1) {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->name[namelen++] = c;
|
2015-05-16 13:53:16 +00:00
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '>') {
|
2013-05-20 17:25:51 +00:00
|
|
|
break;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (c == '/') {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->isshorttag = 1;
|
2013-05-20 17:25:51 +00:00
|
|
|
namelen = 0;
|
|
|
|
x->name[0] = '\0';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-06 19:10:02 +00:00
|
|
|
static void
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parsecomment(XMLParser *x)
|
2014-11-16 23:04:58 +00:00
|
|
|
{
|
2015-06-22 22:06:15 +00:00
|
|
|
static const char *end = "-->";
|
2013-05-20 17:25:51 +00:00
|
|
|
size_t datalen = 0, i = 0;
|
2015-06-22 22:06:15 +00:00
|
|
|
char tmp[4];
|
2013-05-20 17:25:51 +00:00
|
|
|
int c;
|
|
|
|
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcommentstart)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlcommentstart(x);
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == end[i]) {
|
|
|
|
if (end[++i] == '\0') { /* end */
|
2015-06-22 22:06:15 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcomment)
|
2015-06-22 22:06:15 +00:00
|
|
|
x->xmlcomment(x, x->data, datalen);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcommentend)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlcommentend(x);
|
2015-06-22 22:06:15 +00:00
|
|
|
return;
|
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (i) {
|
|
|
|
if (x->xmlcomment) {
|
2015-06-22 22:06:15 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (datalen)
|
2015-06-22 22:06:15 +00:00
|
|
|
x->xmlcomment(x, x->data, datalen);
|
|
|
|
memcpy(tmp, end, i);
|
|
|
|
tmp[i] = '\0';
|
|
|
|
x->xmlcomment(x, tmp, i);
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
|
|
|
i = 0;
|
2015-06-22 22:06:15 +00:00
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (datalen < sizeof(x->data) - 1) {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen++] = c;
|
2015-06-22 22:06:15 +00:00
|
|
|
} else {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcomment)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlcomment(x, x->data, datalen);
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-06 19:10:02 +00:00
|
|
|
static void
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parsecdata(XMLParser *x)
|
2014-11-16 23:04:58 +00:00
|
|
|
{
|
2015-06-20 22:18:44 +00:00
|
|
|
static const char *end = "]]>";
|
2013-05-20 17:25:51 +00:00
|
|
|
size_t datalen = 0, i = 0;
|
2015-06-22 21:56:43 +00:00
|
|
|
char tmp[4];
|
2013-05-20 17:25:51 +00:00
|
|
|
int c;
|
|
|
|
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcdatastart)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlcdatastart(x);
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == end[i]) {
|
|
|
|
if (end[++i] == '\0') { /* end */
|
2015-06-22 21:56:43 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcdata)
|
2015-06-20 22:18:44 +00:00
|
|
|
x->xmlcdata(x, x->data, datalen);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcdataend)
|
2015-06-22 22:06:15 +00:00
|
|
|
x->xmlcdataend(x);
|
2015-06-22 21:56:43 +00:00
|
|
|
return;
|
2015-06-20 22:18:44 +00:00
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (i) {
|
2015-06-22 22:06:15 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcdata) {
|
|
|
|
if (datalen)
|
2015-06-22 22:06:15 +00:00
|
|
|
x->xmlcdata(x, x->data, datalen);
|
2015-06-22 21:56:43 +00:00
|
|
|
memcpy(tmp, end, i);
|
|
|
|
tmp[i] = '\0';
|
|
|
|
x->xmlcdata(x, tmp, i);
|
2013-05-20 17:25:51 +00:00
|
|
|
}
|
|
|
|
i = 0;
|
2015-06-22 21:56:43 +00:00
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (datalen < sizeof(x->data) - 1) {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen++] = c;
|
|
|
|
} else {
|
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmlcdata)
|
2013-05-20 17:25:51 +00:00
|
|
|
x->xmlcdata(x, x->data, datalen);
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-20 22:18:44 +00:00
|
|
|
int
|
|
|
|
xml_codepointtoutf8(uint32_t cp, uint32_t *utf)
|
|
|
|
{
|
2015-07-28 19:56:46 +00:00
|
|
|
if (cp >= 0x10000) {
|
2015-06-20 22:18:44 +00:00
|
|
|
/* 4 bytes */
|
|
|
|
*utf = 0xf0808080 | ((cp & 0xfc0000) << 6) |
|
|
|
|
((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
|
|
|
|
(cp & 0x3f);
|
|
|
|
return 4;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (cp >= 0x00800) {
|
2015-06-20 22:18:44 +00:00
|
|
|
/* 3 bytes */
|
|
|
|
*utf = 0xe08080 |
|
|
|
|
((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
|
|
|
|
(cp & 0x3f);
|
|
|
|
return 3;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (cp >= 0x80) {
|
2015-06-20 22:18:44 +00:00
|
|
|
/* 2 bytes */
|
|
|
|
*utf = 0xc080 |
|
|
|
|
((cp & 0xfc0) << 2) | (cp & 0x3f);
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
*utf = cp & 0xff;
|
|
|
|
return *utf ? 1 : 0; /* 1 byte */
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t
|
|
|
|
xml_namedentitytostr(const char *e, char *buf, size_t bufsiz)
|
|
|
|
{
|
2015-08-07 22:06:06 +00:00
|
|
|
const struct {
|
|
|
|
char *entity;
|
|
|
|
int c;
|
|
|
|
} entities[] = {
|
|
|
|
{ .entity = "&", .c = '&' },
|
|
|
|
{ .entity = "<", .c = '<' },
|
|
|
|
{ .entity = ">", .c = '>' },
|
|
|
|
{ .entity = "'", .c = '\'' },
|
|
|
|
{ .entity = """, .c = '"' },
|
|
|
|
{ .entity = "&", .c = '&' },
|
|
|
|
{ .entity = "<", .c = '<' },
|
|
|
|
{ .entity = ">", .c = '>' },
|
|
|
|
{ .entity = "&APOS;", .c = '\'' },
|
|
|
|
{ .entity = """, .c = '"' }
|
|
|
|
};
|
2015-06-20 22:18:44 +00:00
|
|
|
size_t i;
|
|
|
|
|
|
|
|
/* buffer is too small */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (bufsiz < 2)
|
2015-06-20 22:18:44 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* doesn't start with &: can't match */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (*e != '&')
|
2015-06-20 22:18:44 +00:00
|
|
|
return 0;
|
|
|
|
|
2015-07-31 20:38:04 +00:00
|
|
|
for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
|
2015-08-07 22:06:06 +00:00
|
|
|
if (!strcmp(e, entities[i].entity)) {
|
2015-06-20 22:18:44 +00:00
|
|
|
buf[0] = entities[i].c;
|
|
|
|
buf[1] = '\0';
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t
|
|
|
|
xml_numericentitytostr(const char *e, char *buf, size_t bufsiz)
|
|
|
|
{
|
|
|
|
uint32_t l = 0, cp = 0;
|
|
|
|
size_t b, len;
|
|
|
|
char *end;
|
|
|
|
|
|
|
|
/* buffer is too small */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (bufsiz < 5)
|
2015-06-20 22:18:44 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* not a numeric entity */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (!(e[0] == '&' && e[1] == '#'))
|
2015-06-20 22:18:44 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* e[1] == '#', numeric / hexadecimal entity */
|
|
|
|
e += 2; /* skip "&#" */
|
|
|
|
errno = 0;
|
|
|
|
/* hex (16) or decimal (10) */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (*e == 'x')
|
2015-06-20 22:18:44 +00:00
|
|
|
l = strtoul(e + 1, &end, 16);
|
|
|
|
else
|
|
|
|
l = strtoul(e, &end, 10);
|
|
|
|
/* invalid value or not a well-formed entity */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (errno != 0 || (*end != '\0' && *end != ';'))
|
2015-06-20 22:18:44 +00:00
|
|
|
return 0;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (!(len = xml_codepointtoutf8(l, &cp)))
|
2015-06-20 22:18:44 +00:00
|
|
|
return 0;
|
|
|
|
/* make string */
|
2015-07-28 19:56:46 +00:00
|
|
|
for (b = 0; b < len; b++)
|
2015-06-20 22:18:44 +00:00
|
|
|
buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff;
|
|
|
|
buf[len] = '\0';
|
2015-08-06 15:54:09 +00:00
|
|
|
|
2015-06-20 22:18:44 +00:00
|
|
|
return (ssize_t)len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* convert named- or numeric entity string to buffer string
|
|
|
|
* returns byte-length of string. */
|
|
|
|
ssize_t
|
|
|
|
xml_entitytostr(const char *e, char *buf, size_t bufsiz)
|
|
|
|
{
|
|
|
|
/* buffer is too small */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (bufsiz < 5)
|
2015-06-20 22:18:44 +00:00
|
|
|
return -1;
|
|
|
|
/* doesn't start with & */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (e[0] != '&')
|
2015-06-20 22:18:44 +00:00
|
|
|
return 0;
|
|
|
|
/* named entity */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (e[1] != '#')
|
2015-06-20 22:18:44 +00:00
|
|
|
return xml_namedentitytostr(e, buf, bufsiz);
|
|
|
|
else /* numeric entity */
|
|
|
|
return xml_numericentitytostr(e, buf, bufsiz);
|
|
|
|
}
|
|
|
|
|
2015-08-14 11:47:19 +00:00
|
|
|
void
|
|
|
|
xml_parse(XMLParser *x)
|
2014-11-16 23:04:58 +00:00
|
|
|
{
|
2014-03-31 20:46:58 +00:00
|
|
|
int c, ispi;
|
|
|
|
size_t datalen, tagdatalen, taglen;
|
2013-05-20 17:25:51 +00:00
|
|
|
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF && c != '<')
|
2015-08-06 15:54:09 +00:00
|
|
|
; /* skip until < */
|
2014-03-31 20:46:58 +00:00
|
|
|
|
2015-07-28 19:56:46 +00:00
|
|
|
while (c != EOF) {
|
|
|
|
if (c == '<') { /* parse tag */
|
2015-08-14 11:47:19 +00:00
|
|
|
if ((c = xml_getnext(x)) == EOF)
|
2014-03-31 20:46:58 +00:00
|
|
|
return;
|
|
|
|
x->tag[0] = '\0';
|
|
|
|
x->taglen = 0;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '!') { /* cdata and comments */
|
2015-08-14 11:47:19 +00:00
|
|
|
for (tagdatalen = 0; (c = xml_getnext(x)) != EOF;) {
|
2015-08-06 15:54:09 +00:00
|
|
|
if (tagdatalen <= sizeof("[CDATA[") - 1) /* if (d < sizeof(x->data)) */
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[tagdatalen++] = c; /* TODO: prevent overflow */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '>')
|
2014-03-31 20:46:58 +00:00
|
|
|
break;
|
2015-08-06 15:54:09 +00:00
|
|
|
else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
|
2014-03-31 20:46:58 +00:00
|
|
|
(x->data[0] == '-')) { /* comment */
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parsecomment(x);
|
2014-03-31 20:46:58 +00:00
|
|
|
break;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (c == '[') {
|
2015-08-06 15:54:09 +00:00
|
|
|
if (tagdatalen == sizeof("[CDATA[") - 1 &&
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[1] == 'C' && x->data[2] == 'D' &&
|
|
|
|
x->data[3] == 'A' && x->data[4] == 'T' &&
|
2015-05-16 13:53:16 +00:00
|
|
|
x->data[5] == 'A' && x->data[6] == '[') { /* CDATA */
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parsecdata(x);
|
2014-03-31 20:46:58 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else { /* normal tag (open, short open, close), processing instruction. */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (isspace(c))
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF && isspace(c))
|
2015-08-06 15:54:09 +00:00
|
|
|
;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == EOF)
|
2014-03-31 20:46:58 +00:00
|
|
|
return;
|
|
|
|
x->tag[0] = c;
|
|
|
|
ispi = (c == '?') ? 1 : 0;
|
|
|
|
x->isshorttag = ispi;
|
|
|
|
taglen = 1;
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '/') /* TODO: simplify short tag? */
|
2014-03-31 20:46:58 +00:00
|
|
|
x->isshorttag = 1; /* short tag */
|
2015-07-28 19:56:46 +00:00
|
|
|
else if (c == '>' || isspace(c)) {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->tag[taglen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->tag[0] == '/') { /* end tag, starts with </ */
|
2014-03-31 20:46:58 +00:00
|
|
|
x->taglen = --taglen; /* len -1 because of / */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (taglen && x->xmltagend)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
|
|
|
|
} else {
|
|
|
|
x->taglen = taglen;
|
2014-11-11 18:17:58 +00:00
|
|
|
/* start tag */
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmltagstart)
|
2014-11-11 18:17:58 +00:00
|
|
|
x->xmltagstart(x, x->tag, x->taglen);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (isspace(c))
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parseattrs(x);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmltagstartparsed)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
|
|
|
|
}
|
2014-11-11 18:17:58 +00:00
|
|
|
/* call tagend for shortform or processing instruction */
|
2015-07-28 19:56:46 +00:00
|
|
|
if ((x->isshorttag || ispi) && x->xmltagend)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmltagend(x, x->tag, x->taglen, 1);
|
|
|
|
break;
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (taglen < sizeof(x->tag) - 1)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->tag[taglen++] = c;
|
|
|
|
}
|
|
|
|
}
|
2014-05-08 14:59:03 +00:00
|
|
|
} else {
|
2014-11-11 18:12:24 +00:00
|
|
|
/* parse tag data */
|
2014-03-31 20:46:58 +00:00
|
|
|
datalen = 0;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmldatastart)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmldatastart(x);
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '&') {
|
|
|
|
if (datalen) {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmldata)
|
2015-05-16 14:09:01 +00:00
|
|
|
x->xmldata(x, x->data, datalen);
|
2014-03-31 20:46:58 +00:00
|
|
|
}
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
2015-08-14 11:47:19 +00:00
|
|
|
while ((c = xml_getnext(x)) != EOF) {
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '<')
|
2014-03-31 20:46:58 +00:00
|
|
|
break;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (datalen < sizeof(x->data) - 1)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[datalen++] = c;
|
2015-07-28 19:56:46 +00:00
|
|
|
if (isspace(c))
|
2014-03-31 20:46:58 +00:00
|
|
|
break;
|
2015-07-28 19:56:46 +00:00
|
|
|
else if (c == ';') {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmldataentity)
|
2015-05-16 13:54:39 +00:00
|
|
|
x->xmldataentity(x, x->data, datalen);
|
2014-03-31 20:46:58 +00:00
|
|
|
datalen = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
} else if (c != '<') {
|
|
|
|
if (datalen < sizeof(x->data) - 1) {
|
2014-03-31 20:46:58 +00:00
|
|
|
x->data[datalen++] = c;
|
|
|
|
} else {
|
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmldata)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmldata(x, x->data, datalen);
|
|
|
|
x->data[0] = c;
|
|
|
|
datalen = 1;
|
|
|
|
}
|
|
|
|
}
|
2015-07-28 19:56:46 +00:00
|
|
|
if (c == '<') {
|
2013-05-20 17:25:51 +00:00
|
|
|
x->data[datalen] = '\0';
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmldata && datalen)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmldata(x, x->data, datalen);
|
2015-07-28 19:56:46 +00:00
|
|
|
if (x->xmldataend)
|
2014-03-31 20:46:58 +00:00
|
|
|
x->xmldataend(x);
|
2013-05-20 17:25:51 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-05-16 13:53:16 +00:00
|
|
|
|
|
|
|
void
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parse_string(XMLParser *x, const char *s)
|
2015-05-16 13:53:16 +00:00
|
|
|
{
|
2015-08-14 11:47:19 +00:00
|
|
|
struct xml_context_string ctx = { .str = s };
|
|
|
|
|
|
|
|
x->getnext = xml_getnext_string;
|
|
|
|
x->getnext_data = (void *)&ctx;
|
|
|
|
xml_parse(x);
|
2015-05-16 13:53:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2015-08-14 11:47:19 +00:00
|
|
|
xml_parse_fd(XMLParser *x, int fd)
|
2015-05-16 13:53:16 +00:00
|
|
|
{
|
2015-08-14 11:47:19 +00:00
|
|
|
struct xml_context_fd ctx;
|
|
|
|
|
|
|
|
memset(&ctx, 0, sizeof(ctx));
|
|
|
|
ctx.fd = fd;
|
|
|
|
|
|
|
|
x->getnext = xml_getnext_fd;
|
|
|
|
x->getnext_data = (void *)&ctx;
|
|
|
|
xml_parse(x);
|
2015-05-16 13:53:16 +00:00
|
|
|
}
|