101 lines
2.8 KiB
C
101 lines
2.8 KiB
C
#ifndef LIBWEB_HTML_TOKENIZER_H
|
|
#define LIBWEB_HTML_TOKENIZER_H
|
|
|
|
#include <LibWeb/HTML/token.h>
|
|
|
|
typedef enum {
|
|
DATA_STATE,
|
|
RCDATA_STATE,
|
|
RAWTEXT_STATE,
|
|
SCRIPT_DATA_STATE,
|
|
PLAINTEXT_STATE,
|
|
TAG_OPEN_STATE,
|
|
END_TAG_OPEN_STATE,
|
|
TAG_NAME_STATE,
|
|
RCDATA_LESS_SIGN_STATE,
|
|
RCDATA_END_TAG_OPEN_STATE,
|
|
RCDATA_END_TAG_NAME_STATE,
|
|
RAWTEXT_LESS_SIGN_STATE,
|
|
RAWTEXT_END_TAG_OPEN_STATE,
|
|
RAWTEXT_END_TAG_NAME_STATE,
|
|
SCRIPT_DATA_LESS_SIGN_STATE,
|
|
SCRIPT_DATA_END_TAG_OPEN_STATE,
|
|
SCRIPT_DATA_END_TAG_NAME_STATE,
|
|
|
|
SCRIPT_DATA_ESCAPE_START_STATE,
|
|
SCRIPT_DATA_ESCAPE_START_DASH_STATE,
|
|
SCRIPT_DATA_ESCAPED_STATE,
|
|
SCRIPT_DATA_ESCAPED_DASH_STATE,
|
|
SCRIPT_DATA_ESCAPED_DASH_DASH_STATE,
|
|
SCRIPT_DATA_ESCAPED_LESS_SIGN_STATE,
|
|
SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE,
|
|
SCRIPT_DATA_ESCAPED_END_TAG_NAME_STATE,
|
|
|
|
SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE,
|
|
SCRIPT_DATA_DOUBLE_ESCAPED_STATE,
|
|
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE,
|
|
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE,
|
|
SCRIPT_DATA_DOUBLE_ESCAPED_LESS_SIGN_STATE,
|
|
SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE,
|
|
|
|
BEFORE_ATTRIBUTE_NAME_STATE,
|
|
ATTRIBUTE_NAME_STATE,
|
|
AFTER_ATTRIBUTE_NAME_STATE,
|
|
BEFORE_ATTRIBUTE_VALUE_STATE,
|
|
ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE,
|
|
ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE,
|
|
ATTRIBUTE_VALUE_UNQUOTED_STATE,
|
|
AFTER_ATTRIBUTE_VALUE_QUOTED_STATE,
|
|
|
|
SELF_CLOSING_START_TAG_STATE,
|
|
BOGUS_COMMENT_STATE,
|
|
MARKUP_DECLARATION_OPEN_STATE,
|
|
|
|
COMMENT_START_STATE,
|
|
COMMENT_START_DASH_STATE,
|
|
COMMENT_STATE,
|
|
COMMENT_LESS_SIGN_STATE,
|
|
COMMENT_LESS_SIGN_BANG_STATE,
|
|
COMMENT_LESS_SIGN_BANG_DASH_STATE,
|
|
COMMENT_LESS_SIGN_BANG_DASH_DASH_STATE,
|
|
COMMENT_END_DASH_STATE,
|
|
COMMENT_END_STATE,
|
|
COMMENT_END_BANG_STATE,
|
|
|
|
DOCTYPE_STATE,
|
|
BEFORE_DOCTYPE_NAME_STATE,
|
|
DOCTYPE_NAME_STATE,
|
|
AFTER_DOCTYPE_NAME_STATE,
|
|
AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE,
|
|
BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE,
|
|
DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE,
|
|
DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE,
|
|
AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE,
|
|
BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS_STATE,
|
|
AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE,
|
|
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE,
|
|
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE,
|
|
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE,
|
|
AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE,
|
|
BOGUS_DOCTYPE_STATE,
|
|
|
|
CDATA_SECTION_STATE,
|
|
CDATA_SECTION_BRACKET_STATE,
|
|
CDATA_SECTION_END_STATE,
|
|
|
|
CHARACTER_REFERENCE_STATE,
|
|
NAMED_CHARACTER_REFERENCE_STATE,
|
|
AMBIGUOUS_AMPERSAND_STATE,
|
|
NUMERIC_CHARACTER_REFERENCE_STATE,
|
|
HEX_CHARACTER_REFERENCE_START_STATE,
|
|
DEC_CHARACTER_REFERENCE_START_STATE,
|
|
HEX_CHARACTER_REFERENCE_STATE,
|
|
DEC_CHARACTER_REFERENCE_STATE,
|
|
NUMERIC_CHARACTER_REFERENCE_END_STATE
|
|
} tokenizing_state_t;
|
|
|
|
void set_tokenizing(tokenizing_state_t input_state);
|
|
token_t next_token(char *string);
|
|
|
|
#endif
|