itsb/itsb.xsd

554 lines
19 KiB
XML

<?xml version="1.0" encoding="utf-8"?>
<!--
ITSB XML Schema
Copyright (C) 2022 ~lucidiot
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
-->
<xs:schema
targetNamespace="http://tilde.town/~lucidiot/itsb/itsb.xsd"
version="1.0"
elementFormDefault="qualified"
xmlns="http://tilde.town/~lucidiot/itsb/itsb.xsd"
xmlns:itsb="http://tilde.town/~lucidiot/itsb/itsb.xsd"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
>
<xs:complexType name="URI">
<xs:simpleContent>
<xs:extension base="xs:anyURI">
<xs:attribute name="verify-ssl" type="xs:boolean" use="optional" default="true">
<xs:annotation>
<xs:documentation>
Whether or not an automated script should verify or ignore the validity
of the SSL certificate for an HTTPS URL.
This is commonly required due to poor website management.
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="CurlHeader">
<xs:annotation>
<xs:documentation>
A HTTP header sent along with the request.
</xs:documentation>
</xs:annotation>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="name" type="xs:string" use="required" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="CurlCommand">
<xs:annotation>
<xs:documentation>
Perform an HTTP request using `curl`.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="url" type="URI">
<xs:annotation>
<xs:documentation>
URL of the original HTML webpage that will be retrieved to be converted into a feed.
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="header" type="CurlHeader" minOccurs="0" maxOccurs="unbounded" />
<xs:element name="requestBody" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>
Optional request body sent along with the request. When this element is used, the request becomes a POST request.
</xs:documentation>
</xs:annotation>
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:simpleType name="PupOutputFormat">
<xs:annotation>
<xs:documentation>
Output format of a `pup` command.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="json">
<xs:annotation>
<xs:documentation>Output the selected tags as JSON (default).</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="text">
<xs:annotation>
<xs:documentation>Output the selected tags' inner text.</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="html">
<xs:annotation>
<xs:documentation>Output the selected tags as HTML.</xs:documentation>
</xs:annotation>
</xs:enumeration>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="PupCommand">
<xs:annotation>
<xs:documentation>
Apply a CSS 3 selector using `pup` on the HTML webpage.
</xs:documentation>
</xs:annotation>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="output" use="optional" type="PupOutputFormat" default="json">
<xs:annotation>
<xs:documentation>
Output format to use. Defaults to JSON.
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="Xml2JsonCommand">
<xs:annotation>
<xs:documentation>
Run XML to JSON conversion using xmltodict.
</xs:documentation>
</xs:annotation>
</xs:complexType>
<xs:complexType name="XmlNamespace">
<xs:annotation>
<xs:documentation>
An XML namespace abbreviation to be recognized by xmltodict.
</xs:documentation>
</xs:annotation>
<xs:simpleContent>
<xs:extension base="xs:anyURI">
<xs:attribute name="prefix" type="xs:string" use="optional" default="">
<xs:annotation>
<xs:documentation>
The namespace prefix to use in the XML output. When not defined, this will become the default prefix (xmlns).
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="Json2XmlCommand">
<xs:annotation>
<xs:documentation>
Run JSON to XML conversion using xmltodict.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="namespace" type="XmlNamespace" minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="short-empty" type="xs:boolean" use="optional" default="false">
<xs:annotation>
<xs:documentation>
<![CDATA[Shorten empty elements to <tag/> instead of <tag></tag>.]]>
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:complexType name="JqArgument">
<xs:annotation>
<xs:documentation>
Arguments that will be passed to the jq script using jq's --arg parameter.
</xs:documentation>
</xs:annotation>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="name" type="xs:string" use="required" />
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="JqCommand">
<xs:annotation>
<xs:documentation>
Transform a JSON document using `jq`.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="arg" type="JqArgument" minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="path" type="xs:string" use="required">
<xs:annotation>
<xs:documentation>
Path to the jq script relative to the project's jq/ directory.
</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="raw-output" type="xs:boolean" default="false">
<xs:annotation>
<xs:documentation>
Enable the -r flag, causing jq to output raw strings instead of JSON strings.
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:complexType name="ShellCommand">
<xs:annotation>
<xs:documentation>
A shell command. No escaping and no controls will be made on this command, especially on spaces and newlines; use this tag with care.
</xs:documentation>
</xs:annotation>
<xs:simpleContent>
<xs:extension base="xs:string" />
</xs:simpleContent>
</xs:complexType>
<xs:simpleType name="XidelOutputFormat">
<xs:annotation>
<xs:documentation>
Output format of a `xidel` command.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="xml">
<xs:annotation>
<xs:documentation>
XML document.
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="html">
<xs:annotation>
<xs:documentation>
XHTML document.
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="adhoc">
<xs:annotation>
<xs:documentation>
Human-readable representation. This will output the `text()` content of XML nodes, and JSON structures are output as they are, with indentation.
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="xml-wrapped">
<xs:annotation>
<xs:documentation>
Human-readable representation, embedded within an XML structure.
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="json-wrapped">
<xs:annotation>
<xs:documentation>
Human-readable representation, embedded within a JSON structure.
</xs:documentation>
</xs:annotation>
</xs:enumeration>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="XQueryAction">
<xs:annotation>
<xs:documentation>
Run an XQuery script.
</xs:documentation>
</xs:annotation>
<xs:attribute name="path" type="xs:string" use="required">
<xs:annotation>
<xs:documentation>
Path to the XQuery script relative to the project's xquery/ directory.
</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="timeout" type="xs:nonNegativeInteger" use="optional" default="60">
<xs:annotation>
<xs:documentation>
Maximum execution time for the script, in seconds. Set to 0 to disable.
</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="user-agent" type="xs:string" use="optional" default="itsb/1.0 (+https://tilde.town/~lucidiot/itsb/)">
<xs:annotation>
<xs:documentation>
User-Agent header to send along with any HTTP requests.
</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="output-format" type="XidelOutputFormat" use="optional" default="xml">
<xs:annotation>
<xs:documentation>
Format to use to output the script's results.
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:group name="Command">
<xs:choice>
<xs:element name="curl" type="CurlCommand" />
<xs:element name="pup" type="PupCommand" />
<xs:element name="xml2json" type="Xml2JsonCommand" />
<xs:element name="json2xml" type="Json2XmlCommand">
<xs:unique name="xmlNamespaces">
<xs:annotation>
<xs:documentation>
XML namespace prefixes should be unique in each json2xml element.
</xs:documentation>
</xs:annotation>
<xs:selector xpath="itsb:namespace" />
<xs:field xpath="@prefix" />
</xs:unique>
</xs:element>
<xs:element name="jq" type="JqCommand">
<xs:unique name="jqArgs">
<xs:annotation>
<xs:documentation>
Argument names sent to jq should be unique.
</xs:documentation>
</xs:annotation>
<xs:selector xpath="itsb:arg" />
<xs:field xpath="@name" />
</xs:unique>
</xs:element>
<xs:element name="shell" type="ShellCommand" />
<xs:element name="xquery" type="XQueryAction" />
</xs:choice>
</xs:group>
<xs:simpleType name="FeedFormat">
<xs:annotation>
<xs:documentation>
Format of a syndication feed.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="atom" />
<xs:enumeration value="cdf" />
<xs:enumeration value="echo" />
<xs:enumeration value="json" />
<xs:enumeration value="rdf" />
<xs:enumeration value="rss" />
<xs:enumeration value="rss3" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="FeedOutput">
<xs:annotation>
<xs:documentation>
Path where the generated feed will be stored, relative to the /feeds/ directory.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string" />
</xs:simpleType>
<xs:complexType name="Feed">
<xs:annotation>
<xs:documentation>
<![CDATA[
A single syndication feed for this source.
Can be either generated by ITSB or provided externally.
When provided externally, only the <link> tag can be used.
]]>
</xs:documentation>
</xs:annotation>
<xs:choice>
<xs:sequence>
<xs:group ref="Command" maxOccurs="unbounded" />
<xs:element name="output" type="FeedOutput" />
</xs:sequence>
<xs:element name="link" type="URI">
<xs:annotation>
<xs:documentation>
URL to an external syndication feed.
</xs:documentation>
</xs:annotation>
</xs:element>
</xs:choice>
<xs:attribute name="lang" type="xs:string" use="required">
<xs:annotation>
<xs:documentation>
Language of the reports in this feed. Not a language code; only for display purposes.
</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="type" type="xs:string" use="optional">
<xs:annotation>
<xs:documentation>
Report types, similar to the type tag on sources, for this particular feed.
Allows adding multiple feeds filtered by types.
When this type is omitted, the source's type can be assumed.
</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="format" type="FeedFormat" default="rss" />
<xs:attribute name="id" type="xs:ID" use="required" />
</xs:complexType>
<xs:complexType name="License">
<xs:annotation>
<xs:documentation>
A particular license that the source applies on its reports.
</xs:documentation>
</xs:annotation>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="url" type="xs:anyURI" use="optional">
<xs:annotation>
<xs:documentation>
URL pointing to an HTML page for a description of the license.
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="Source">
<xs:annotation>
<xs:documentation>
A single source of investigation reports that can provide one or more syndication feeds.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="name" type="xs:string">
<xs:annotation>
<xs:documentation>Source name for display purposes.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="description" minOccurs="0" type="xs:string">
<xs:annotation>
<xs:documentation>
Optional description of the source. Encoded HTML tags are allowed.
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="region" minOccurs="0" type="xs:string">
<xs:annotation>
<xs:documentation>
Optional country or region in which the source acts.
If the region is not a country, it should be suffixed with a comma, then a country: "Austin, Texas"
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="type" minOccurs="0" type="xs:string">
<xs:annotation>
<xs:documentation>
Report types, generally the means of transportation, that the source produces.
This should be a comma-separated list of types and is only meant for display purposes.
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="frequency" minOccurs="0" type="xs:string">
<xs:annotation>
<xs:documentation>
Arbitrary human estimation of the frequency of report publication for this source.
Generally expressed in reports per year, sometimes with different values for different languages.
Only for display purposes.
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="url" type="URI">
<xs:annotation>
<xs:documentation>
Official website URL that humans can use to learn more about this source.
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="license" type="License" minOccurs="0" />
<xs:element name="feed" type="Feed" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="id" type="xs:ID" use="required" />
</xs:complexType>
<xs:simpleType name="Content">
<xs:annotation>
<xs:documentation>
Raw, encoded HTML content that should be displayed as-is in the section.
Can be ignored by machines.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string" />
</xs:simpleType>
<xs:complexType name="Section">
<xs:annotation>
<xs:documentation>
A separate section displayed in an HTML directory.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="content" type="Content" minOccurs="0" maxOccurs="unbounded" />
<xs:element name="source" type="Source" minOccurs="0" maxOccurs="unbounded">
<xs:unique name="uniqueFeedAttributes">
<xs:annotation>
<xs:documentation>
The language, type and format combination of each feed must be unique within each source.
Due to constraints defined by the XML Schema 1.0 specification, feeds without a type or using the default format cannot be checked by this constraint.
</xs:documentation>
</xs:annotation>
<xs:selector xpath="itsb:feed" />
<xs:field xpath="@lang" />
<xs:field xpath="@type" />
<xs:field xpath="@format" />
</xs:unique>
</xs:element>
</xs:sequence>
<xs:attribute name="name" type="xs:string">
<xs:annotation>
<xs:documentation>Title of the section.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="id" type="xs:ID" use="required" />
</xs:complexType>
<xs:complexType name="Document">
<xs:annotation>
<xs:documentation>
ITSB documents describe both the structure for the directory page of ITSB
and the list of syndication feeds either inventoried or generated by ITSB.
This is meant to be used as a central place to hold feed configuration and
reduce duplication.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="section" type="Section" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
<xs:attribute name="version" use="required" fixed="1.0" />
</xs:complexType>
<xs:element name="itsb" type="Document">
<xs:unique name="uniqueOutputs">
<xs:annotation>
<xs:documentation>
Output file paths should be unique across the whole document.
</xs:documentation>
</xs:annotation>
<xs:selector xpath=".//itsb:output" />
<xs:field xpath="." />
</xs:unique>
<xs:unique name="uniqueSourceURLs">
<xs:annotation>
<xs:documentation>
Source URLs must be unique across the whole document.
</xs:documentation>
</xs:annotation>
<xs:selector xpath=".//itsb:source" />
<xs:field xpath="itsb:url" />
</xs:unique>
</xs:element>
</xs:schema>