Add XSD for local RSS validation

This commit is contained in:
~lucidiot 2023-09-04 11:21:31 +02:00
parent 2ed6866fe7
commit 3a3b065ff4
2 changed files with 484 additions and 1 deletions

View File

@ -13,8 +13,9 @@
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:webfeeds="http://webfeeds.org/rss/1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="xsd/rss.xsd"
>
<access:restriction relationship="allow" />
<channel>
<title>RSRSSS</title>
<link>https://envs.net/~lucidiot/rsrsss/feed.xml</link>
@ -1353,4 +1354,5 @@
<icbm:longitude>-106.4753787</icbm:longitude>
</item>
</channel>
<access:restriction relationship="allow" />
</rss>

481
xsd/rss.xsd Normal file
View File

@ -0,0 +1,481 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
XML Schema for RSS v2.0
Copyright (C) 2003-2019 Jorgen Thelin
Copyright (C) 2023 ~lucidiot
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Changes from the upstream at <https://github.com/jthelin/rss2schema>:
-->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
elementFormDefault="unqualified"
version="2.0.2.19">
<xs:annotation>
<xs:documentation>XML Schema for RSS v2.0 feed files.</xs:documentation>
<xs:documentation>Based on version 2.0.2.18 of the schema provided at https://github.com/jthelin/rss2schema</xs:documentation>
<xs:documentation>Based on the RSS 2.0 specification document at https://www.rssboard.org/rss-specification</xs:documentation>
</xs:annotation>
<xs:element name="rss">
<xs:complexType>
<xs:sequence>
<xs:element name="channel" type="RssChannel"/>
<xs:any namespace="##other" processContents="lax" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
<xs:attribute name="version" type="xs:decimal" use="required" fixed="2.0"/>
<xs:anyAttribute namespace="##any"/>
</xs:complexType>
</xs:element>
<xs:complexType name="RssItem">
<xs:annotation>
<xs:documentation>An item may represent a "story" -- much like a story in a newspaper or magazine; if so its description is a synopsis of the story, and the link points to the full story. An item may also be complete in itself, if so, the description contains the text (entity-encoded HTML is allowed), and the link and title may be omitted.</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:choice maxOccurs="unbounded">
<xs:element name="title" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>The title of the item.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="description" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>The item synopsis.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="link" type="xs:anyURI" minOccurs="0">
<xs:annotation>
<xs:documentation>The URL of the item.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="author" type="EmailAddress" minOccurs="0">
<xs:annotation>
<xs:documentation>Email address of the author of the item.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="category" type="Category" minOccurs="0">
<xs:annotation>
<xs:documentation>Includes the item in one or more categories. </xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="comments" type="xs:anyURI" minOccurs="0">
<xs:annotation>
<xs:documentation>URL of a page for comments relating to the item.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="enclosure" type="Enclosure" minOccurs="0">
<xs:annotation>
<xs:documentation>Describes a media object that is attached to the item.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="guid" type="Guid" minOccurs="0">
<xs:annotation>
<xs:documentation>guid or permalink URL for this entry</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="pubDate" type="Rfc822FormatDate" minOccurs="0">
<xs:annotation>
<xs:documentation>Indicates when the item was published.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="source" type="Source" minOccurs="0">
<xs:annotation>
<xs:documentation>The RSS channel that the item came from.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:any namespace="##other" processContents="lax" minOccurs="0" maxOccurs="unbounded">
<xs:annotation>
<xs:documentation>Extensibility element.</xs:documentation>
</xs:annotation>
</xs:any>
</xs:choice>
</xs:sequence>
<xs:anyAttribute namespace="##any"/>
</xs:complexType>
<xs:complexType name="RssChannel">
<xs:sequence>
<xs:choice maxOccurs="unbounded">
<xs:element name="title" type="xs:string">
<xs:annotation>
<xs:documentation>The name of the channel. It's how people refer to your service. If you have an HTML website that contains the same information as your RSS file, the title of your channel should be the same as the title of your website.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="link" type="xs:anyURI">
<xs:annotation>
<xs:documentation>The URL to the HTML website corresponding to the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="description" type="xs:string">
<xs:annotation>
<xs:documentation>Phrase or sentence describing the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="language" type="xs:language" minOccurs="0">
<xs:annotation>
<xs:documentation>The language the channel is written in. This allows aggregators to group all Italian language sites, for example, on a single page. A list of allowable values for this element, as provided by Netscape, is here. You may also use values defined by the W3C.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="copyright" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>Copyright notice for content in the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="managingEditor" type="EmailAddress" minOccurs="0">
<xs:annotation>
<xs:documentation>Email address for person responsible for editorial content.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="webMaster" type="EmailAddress" minOccurs="0">
<xs:annotation>
<xs:documentation>Email address for person responsible for technical issues relating to channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="pubDate" type="Rfc822FormatDate" minOccurs="0">
<xs:annotation>
<xs:documentation>The publication date for the content in the channel. All date-times in RSS conform to the Date and Time Specification of RFC 822, with the exception that the year may be expressed with two characters or four characters (four preferred).</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="lastBuildDate" type="Rfc822FormatDate" minOccurs="0">
<xs:annotation>
<xs:documentation>The last time the content of the channel changed.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="category" type="Category" minOccurs="0">
<xs:annotation>
<xs:documentation>Specify one or more categories that the channel belongs to.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="generator" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>A string indicating the program used to generate the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="docs" type="xs:anyURI" minOccurs="0">
<xs:annotation>
<xs:documentation>A URL that points to the documentation for the format used in the RSS file. It's probably a pointer to this page. It's for people who might stumble across an RSS file on a Web server 25 years from now and wonder what it is.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="cloud" type="Cloud" minOccurs="0">
<xs:annotation>
<xs:documentation>Allows processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="ttl" type="xs:nonNegativeInteger" minOccurs="0">
<xs:annotation>
<xs:documentation>ttl stands for time to live. It's a number of minutes that indicates how long a channel can be cached before refreshing from the source.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="image" type="Image" minOccurs="0">
<xs:annotation>
<xs:documentation>Specifies a GIF, JPEG or PNG image that can be displayed with the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="rating" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>The PICS rating for the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="textInput" type="TextInput" minOccurs="0">
<xs:annotation>
<xs:documentation>Specifies a text input box that can be displayed with the channel.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="skipHours" type="SkipHoursList" minOccurs="0">
<xs:annotation>
<xs:documentation>A hint for aggregators telling them which hours they can skip.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="skipDays" type="SkipDaysList" minOccurs="0">
<xs:annotation>
<xs:documentation>A hint for aggregators telling them which days they can skip.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:any namespace="##other" processContents="lax" minOccurs="0" maxOccurs="unbounded">
<xs:annotation>
<xs:documentation>Extensibility element.</xs:documentation>
</xs:annotation>
</xs:any>
</xs:choice>
<xs:element name="item" type="RssItem" minOccurs="1" maxOccurs="unbounded">
<!--
HACK: According to the RSS 2.0 spec, it should strictly be possible to have zero item elements,
but this makes the schema non-deterministic with regard to extensibility elements
so for the moment we undid bug-fix 10231 and set minOccurs=1 to work around this problem.
-->
</xs:element>
<xs:any namespace="##other" processContents="lax" minOccurs="0" maxOccurs="unbounded">
<xs:annotation>
<xs:documentation>Extensibility element.</xs:documentation>
</xs:annotation>
</xs:any>
</xs:sequence>
<xs:anyAttribute namespace="##any"/>
</xs:complexType>
<xs:simpleType name="SkipHour">
<xs:annotation>
<xs:documentation>A time in GMT when aggregators should not request the channel data. The hour beginning at midnight is hour zero.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:nonNegativeInteger">
<xs:minInclusive value="0"/>
<xs:maxInclusive value="23"/>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="SkipHoursList">
<xs:sequence>
<xs:element name="hour" type="SkipHour" minOccurs="0" maxOccurs="24"/>
</xs:sequence>
</xs:complexType>
<xs:simpleType name="SkipDay">
<xs:annotation>
<xs:documentation>A day when aggregators should not request the channel data.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="Monday"/>
<xs:enumeration value="Tuesday"/>
<xs:enumeration value="Wednesday"/>
<xs:enumeration value="Thursday"/>
<xs:enumeration value="Friday"/>
<xs:enumeration value="Saturday"/>
<xs:enumeration value="Sunday"/>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="SkipDaysList">
<xs:sequence>
<xs:element name="day" type="SkipDay" minOccurs="0" maxOccurs="7">
<xs:annotation>
<xs:documentation>A time in GMT, when aggregators should not request the channel data. The hour beginning at midnight is hour zero.</xs:documentation>
</xs:annotation>
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="Category">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="domain" type="xs:string" use="optional"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="Image">
<xs:all>
<xs:element name="url" type="xs:anyURI">
<xs:annotation>
<xs:documentation>The URL of the image file.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="title" type="xs:string">
<xs:annotation>
<xs:documentation>Describes the image, it's used in the ALT attribute of the HTML &lt;img&gt; tag when the channel is rendered in HTML.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="link" type="xs:anyURI">
<xs:annotation>
<xs:documentation>The URL of the site, when the channel is rendered, the image is a link to the site. (Note, in practice the image &lt;title&gt; and &lt;link&gt; should have the same value as the channel's &lt;title&gt; and &lt;link&gt;. </xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="width" type="ImageWidth" default="88" minOccurs="0">
<xs:annotation>
<xs:documentation>The width of the image in pixels.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="height" type="ImageHeight" default="31" minOccurs="0">
<xs:annotation>
<xs:documentation>The height of the image in pixels.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="description" type="xs:string" minOccurs="0">
<xs:annotation>
<xs:documentation>Text that is included in the TITLE attribute of the link formed around the image in the HTML rendering.</xs:documentation>
</xs:annotation>
</xs:element>
</xs:all>
</xs:complexType>
<xs:simpleType name="ImageHeight">
<xs:annotation>
<xs:documentation>The height of the image in pixels.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:positiveInteger">
<xs:maxInclusive value="400"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="ImageWidth">
<xs:annotation>
<xs:documentation>The width of the image in pixels.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:positiveInteger">
<xs:maxInclusive value="144"/>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="Cloud">
<xs:annotation>
<xs:documentation>Specifies a web service that supports the rssCloud interface which can be implemented in HTTP-POST, XML-RPC or SOAP 1.1. Its purpose is to allow processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds.</xs:documentation>
</xs:annotation>
<xs:attribute name="domain" type="xs:string" use="required"/>
<xs:attribute name="port" type="xs:positiveInteger" use="required"/>
<xs:attribute name="path" type="xs:string" use="required"/>
<xs:attribute name="registerProcedure" type="xs:string" use="required"/>
<xs:attribute name="protocol" type="CloudProtocol" use="required"/>
</xs:complexType>
<xs:simpleType name="CloudProtocol">
<xs:restriction base="xs:string">
<xs:enumeration value="xml-rpc"/>
<xs:enumeration value="http-post"/>
<xs:enumeration value="soap"/>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="TextInput">
<xs:annotation>
<xs:documentation>The purpose of this element is something of a mystery! You can use it to specify a search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it.</xs:documentation>
</xs:annotation>
<xs:all>
<xs:element name="title" type="xs:string">
<xs:annotation>
<xs:documentation>The label of the Submit button in the text input area.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="description" type="xs:string">
<xs:annotation>
<xs:documentation>Explains the text input area.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="name" type="xs:string">
<xs:annotation>
<xs:documentation>The name of the text object in the text input area.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="link" type="xs:anyURI">
<xs:annotation>
<xs:documentation>The URL of the CGI script that processes text input requests.</xs:documentation>
</xs:annotation>
</xs:element>
</xs:all>
</xs:complexType>
<xs:simpleType name="EmailAddress">
<xs:annotation>
<xs:documentation>Using the regexp definiton of E-Mail Address by Lucadean from the .NET RegExp Pattern Repository at http://www.3leaf.com/default/NetRegExpRepository.aspx </xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:pattern value="([a-zA-Z0-9_\-])([a-zA-Z0-9_\-\.]*)@(\[((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.){3}|((([a-zA-Z0-9\-]+)\.)+))([a-zA-Z]{2,}|(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\])( \([^())]+\))?"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="Rfc822FormatDate">
<xs:annotation>
<xs:documentation>A date-time displayed in RFC-822 format.</xs:documentation>
<xs:documentation>Using the regexp definiton of rfc-822 date by Sam Ruby at http://www.intertwingly.net/blog/1360.html </xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:pattern value="(((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), *)?\d\d? +((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec)) +\d\d(\d\d)? +\d\d:\d\d(:\d\d)? +(([+\-]?\d\d\d\d)|(UT)|(GMT)|(EST)|(EDT)|(CST)|(CDT)|(MST)|(MDT)|(PST)|(PDT)|\w)"/>
</xs:restriction>
</xs:simpleType>
<xs:complexType name="Source">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="url" type="xs:anyURI"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="Enclosure">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="url" type="xs:anyURI" use="required">
<xs:annotation>
<xs:documentation>URL where the enclosure is located</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="length" type="xs:nonNegativeInteger" use="required">
<xs:annotation>
<xs:documentation>Size in bytes</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="type" type="xs:string" use="required">
<xs:annotation>
<xs:documentation>MIME media-type of the enclosure</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="Guid">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="isPermaLink" type="xs:boolean" use="optional" default="true"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<!--
TODO:
- Need to add regexp pattern for MIME media-type value of tEnclosure/type
- Need to add regexp pattern for checking contents of guid is a URL when isPermaLink=true"
- Need to add some form of constraint to check on an item that one, or other, or both of title and description are present.
However, I'm not sure it is possible to represent these constraints in XML Schema language alone.
- Need some way to enforce cardinality constraints preventing repeated elements in channels or items
- Unfortunately the bug-fix for issue 10231 made this schema non-deterministic with respect to extensibility elements.
We can't tell whether an extension element in tRssChannel is within the choice or after the item elements.
Need to reconsider the solution to bug-fix 10231.
-->
<!--
Change Log:
Date Revision Description
31-Mar-2003 1 Initial version released for comment
31-Mar-2003 2 Changes based on feedback from Gudge:
- Remove targetNamespace="" and use elementFormDefault="unqualified" instead
- Use namespace="##other" on <any>'s to create a more deterministic data model.
- Added missing xs:documentation inside xs:annotation at the schema level.
- Use xs:language for ISO Language Codes in <language> element.
- Change guid to a single declaration. This loses some of the checking of the
URL when the contents of the guid is a permaLink, so we will need to add
that back in with a regexp pattern.
14-Apr-2003 3 Changes to solve some element ordering problems.
- Use xs:all in place of xs:sequence to support flexible ordering of elements.
Although the ordering constraints for elements is not clear from the
original specification, the custom and practice seems to be that
element ordering is free-form.
- Use elementFormDefault="qualified" for explicit intent.
15-Apr-2003 4 Changes to solve some element ordering problems.
- Use xs:choice in place of xs:all as previous usage of <all> was invalid.
This creates the problem that insufficient constraints can be applied
by the schema - for example, it can't prevent two title elements for an item.
- Use elementFormDefault="unqualified" for to get the correct behaviours
when importing and combining schemas.
15-Apr-2003 5 Putting the extensibility element inside the repeating choice solves
all problems with element ordering.
15-Apr-2003 6 - skipHours and skipDays should contain a nested list of values,
not just a single value.
- Added version attribute to schema definition.
- Corrected type of the cloud element
25-Apr-2003 7 - Add regexp for RFC-822 date suggested by Sam Ruby
- I had to leave the base type of the tRfc822FormatDate type
as xs:string due to the problems with using
a pattern with xs:dateTime described at
http://www.thearchitect.co.uk/weblog/archives/2003/04/000142.html
19-Jun-2003 8 - Fixed a bug that the Oxygen XML Editor spotted in the regexp for RFC-822 dates
23-Jun-2003 9 - Added legal boilerplate license text for LGPL.
- Minor formatting changes.
24-Jun-2003 10 - Missing types for item/title and item/description - Spotted by Andreas Schwotzer.
01-Jan-2008 11 - Copy made available under the Microsoft Public License (MS-PL).
25-May-2008 12 - Bug fix 10231 from Ken Gruven - channel can contain zero or more items.
06-Sep-2008 13 - Fixed tab-space whitespace issues. Now always use spaces.
- Undid the fix for bug-fix 10231 since it made the schema non-deterministic
with respect to extensibility elements in tRssChannel - need to reconsider the fix.
08-Sep-2008 14 - Removed 't' prefixes from type names to improve class names
that get code-generated from the schema.
22-Sep-2008 15 - Move type def for rss element in-line for improved compatibility with Java 1.6 tools.
01-Nov-2008 16 - Added the missing rating element from the spec to RssChannel.
04-Sep-2019 17 - Fixed some minor typos in comments and messages.
04-Sep-2019 18 - Use MIT license.
04-Sep-2023 19 - Copied schema from <https://github.com/jthelin/rss2schema>
- Fixed email addresses validation to support `email (real name)`,
as in <https://www.rssboard.org/rss-profile#data-types-email>
-->
</xs:schema>