HTTP request and HTML response parsing

This commit is contained in:
~lucidiot 2022-06-19 04:12:37 +02:00
parent caf5e5e070
commit f6ef16168c
9 changed files with 194 additions and 69 deletions

View File

@ -34,8 +34,9 @@ namespace ToeCracker {
this.Enabled = false;
Dictionary<string, string[]> results = new Dictionary<string,string[]>();
results = Thesaurus.Search(query);
try {
results = Thesaurus.Search(query);
// results = Thesaurus.Search(query);
} catch (Exception e) {
// TODO: Maybe specialize the exception handler?
// TODO: Internationalize the error dialog box

View File

@ -121,6 +121,23 @@
<data name="tableLayoutPanel.ColumnCount" type="System.Int32, mscorlib">
<value>2</value>
</data>
<assembly alias="System.Drawing" name="System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
<data name="defaultTabPage.Location" type="System.Drawing.Point, System.Drawing">
<value>4, 22</value>
</data>
<assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
<data name="defaultTabPage.Padding" type="System.Windows.Forms.Padding, System.Windows.Forms">
<value>3, 3, 3, 3</value>
</data>
<data name="defaultTabPage.Size" type="System.Drawing.Size, System.Drawing">
<value>278, 306</value>
</data>
<data name="defaultTabPage.TabIndex" type="System.Int32, mscorlib">
<value>1</value>
</data>
<data name="defaultTabPage.Text" xml:space="preserve">
<value>No results</value>
</data>
<data name="&gt;&gt;defaultTabPage.Name" xml:space="preserve">
<value>defaultTabPage</value>
</data>
@ -133,11 +150,9 @@
<data name="&gt;&gt;defaultTabPage.ZOrder" xml:space="preserve">
<value>0</value>
</data>
<assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
<data name="tabControl.Dock" type="System.Windows.Forms.DockStyle, System.Windows.Forms">
<value>Fill</value>
</data>
<assembly alias="System.Drawing" name="System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
<data name="tabControl.Location" type="System.Drawing.Point, System.Drawing">
<value>3, 31</value>
</data>
@ -159,9 +174,6 @@
<data name="&gt;&gt;tabControl.ZOrder" xml:space="preserve">
<value>0</value>
</data>
<metadata name="toolTip.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>182, 17</value>
</metadata>
<data name="searchTextBox.Dock" type="System.Windows.Forms.DockStyle, System.Windows.Forms">
<value>Fill</value>
</data>
@ -174,6 +186,9 @@
<data name="searchTextBox.TabIndex" type="System.Int32, mscorlib">
<value>1</value>
</data>
<metadata name="toolTip.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>182, 17</value>
</metadata>
<data name="searchTextBox.ToolTip" xml:space="preserve">
<value>Type in a word to look up in the thesaurus.</value>
</data>
@ -207,8 +222,8 @@
<data name="searchButton.Image" type="System.Drawing.Bitmap, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>
iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8
YQUAAAAgY0hSTQAAeiYAAICEAAD6AAAAgOgAAHUwAADqYAAAOpgAABdwnLpRPAAAAAlwSFlzAAALEAAA
CxABrSO9dQAAAnxJREFUOE+Vk9tLYlEUxk2SwjApuxzFEjsVaJdjRGU3LEOxEiOTICOpJCwK7GGosZm0
YQUAAAAgY0hSTQAAeiYAAICEAAD6AAAAgOgAAHUwAADqYAAAOpgAABdwnLpRPAAAAAlwSFlzAAALDgAA
Cw4BQL7hQQAAAnxJREFUOE+Vk9tLYlEUxk2SwjApuxzFEjsVaJdjRGU3LEOxEiOTICOpJCwK7GGosZm0
maGXsIeYmi5UlsfSrmrZBZqBeep13oaZp/6Ubzr75eSbs2HBfljfb629vrUFAJjXeHmN/z2chhFw4uSf
JBaeFuC59WD0ahSOMwcGogPoi/Sh/6QfZtYMw5EB7YftaNhpAB2ksfa0xhV84QBYfFrEwa8DBJ+D8H33
Yf5xHjOpGUzdTGEyMYmx6zEMXwzDFrWh+7gbzCYD+aKcdEwAs3ezWH9ex8rPFXgfvPDceDB+PY7U3xRJ
@ -276,45 +291,9 @@
<data name="tableLayoutPanel.LayoutSettings" type="System.Windows.Forms.TableLayoutSettings, System.Windows.Forms">
<value>&lt;?xml version="1.0" encoding="utf-16"?&gt;&lt;TableLayoutSettings&gt;&lt;Controls&gt;&lt;Control Name="tabControl" Row="1" RowSpan="1" Column="0" ColumnSpan="2" /&gt;&lt;Control Name="searchTextBox" Row="0" RowSpan="1" Column="0" ColumnSpan="1" /&gt;&lt;Control Name="searchButton" Row="0" RowSpan="1" Column="1" ColumnSpan="1" /&gt;&lt;/Controls&gt;&lt;Columns Styles="Percent,100,AutoSize,0" /&gt;&lt;Rows Styles="AutoSize,0,Percent,100" /&gt;&lt;/TableLayoutSettings&gt;</value>
</data>
<data name="defaultTabPage.Location" type="System.Drawing.Point, System.Drawing">
<value>4, 22</value>
</data>
<data name="defaultTabPage.Padding" type="System.Windows.Forms.Padding, System.Windows.Forms">
<value>3, 3, 3, 3</value>
</data>
<data name="defaultTabPage.Size" type="System.Drawing.Size, System.Drawing">
<value>278, 306</value>
</data>
<data name="defaultTabPage.TabIndex" type="System.Int32, mscorlib">
<value>1</value>
</data>
<data name="defaultTabPage.Text" xml:space="preserve">
<value>No results</value>
</data>
<data name="&gt;&gt;defaultTabPage.Name" xml:space="preserve">
<value>defaultTabPage</value>
</data>
<data name="&gt;&gt;defaultTabPage.Type" xml:space="preserve">
<value>System.Windows.Forms.TabPage, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="&gt;&gt;defaultTabPage.Parent" xml:space="preserve">
<value>tabControl</value>
</data>
<data name="&gt;&gt;defaultTabPage.ZOrder" xml:space="preserve">
<value>0</value>
</data>
<metadata name="wordContextMenuStrip.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>17, 17</value>
</metadata>
<data name="wordContextMenuStrip.Size" type="System.Drawing.Size, System.Drawing">
<value>124, 48</value>
</data>
<data name="&gt;&gt;wordContextMenuStrip.Name" xml:space="preserve">
<value>wordContextMenuStrip</value>
</data>
<data name="&gt;&gt;wordContextMenuStrip.Type" xml:space="preserve">
<value>System.Windows.Forms.ContextMenuStrip, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="copyToolStripMenuItem.Size" type="System.Drawing.Size, System.Drawing">
<value>123, 22</value>
</data>
@ -327,9 +306,15 @@
<data name="lookUpToolStripMenuItem.Text" xml:space="preserve">
<value>&amp;Look Up</value>
</data>
<metadata name="toolTip.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<value>182, 17</value>
</metadata>
<data name="wordContextMenuStrip.Size" type="System.Drawing.Size, System.Drawing">
<value>124, 48</value>
</data>
<data name="&gt;&gt;wordContextMenuStrip.Name" xml:space="preserve">
<value>wordContextMenuStrip</value>
</data>
<data name="&gt;&gt;wordContextMenuStrip.Type" xml:space="preserve">
<value>System.Windows.Forms.ContextMenuStrip, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<metadata name="$this.Localizable" type="System.Boolean, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089">
<value>True</value>
</metadata>

View File

@ -9,8 +9,9 @@
//------------------------------------------------------------------------------
namespace ToeCracker.Properties {
using System;
/// <summary>
/// A strongly-typed resource class, for looking up localized strings, etc.
/// </summary>
@ -22,29 +23,29 @@ namespace ToeCracker.Properties {
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources {
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources() {
}
/// <summary>
/// Returns the cached ResourceManager instance used by this class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if ((resourceMan == null)) {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("ToeCracker.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.

View File

@ -46,7 +46,7 @@
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Serialization.Formatters.Binary.BinaryFormatter
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
@ -60,6 +60,7 @@
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
@ -68,9 +69,10 @@
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" />
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
@ -85,9 +87,10 @@
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">

View File

@ -9,18 +9,30 @@
//------------------------------------------------------------------------------
namespace ToeCracker.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "8.0.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
public static Settings Default {
get {
return defaultInstance;
}
}
[global::System.Configuration.UserScopedSettingAttribute()]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Configuration.DefaultSettingValueAttribute("https://words.bighugelabs.com/{0}")]
public string BaseURL {
get {
return ((string)(this["BaseURL"]));
}
set {
this["BaseURL"] = value;
}
}
}
}

View File

@ -1,7 +1,9 @@
<?xml version='1.0' encoding='utf-8'?>
<SettingsFile xmlns="http://schemas.microsoft.com/VisualStudio/2004/01/settings" CurrentProfile="(Default)">
<Profiles>
<Profile Name="(Default)" />
</Profiles>
<Settings />
</SettingsFile>
<SettingsFile xmlns="http://schemas.microsoft.com/VisualStudio/2004/01/settings" CurrentProfile="(Default)" GeneratedClassNamespace="ToeCracker.Properties" GeneratedClassName="Settings">
<Profiles />
<Settings>
<Setting Name="BaseURL" Type="System.String" Scope="User">
<Value Profile="(Default)">https://words.bighugelabs.com/{0}</Value>
</Setting>
</Settings>
</SettingsFile>

View File

@ -1,18 +1,121 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Windows.Forms;
using System.Xml;
using System.Text;
namespace ToeCracker {
/// <summary>
/// Methods to look up a word in the thesaurus and parse the results.
/// </summary>
public static class Thesaurus {
private const string RESULT_HEADER = "<div class=\"results\">";
private const string RESULT_FOOTER = "</div>";
private static Uri BuildURI(string query) {
return new Uri(String.Format(Properties.Settings.Default.BaseURL, query), UriKind.Absolute);
}
/// <summary>
/// Look up a word in the thesaurus and return the results.
/// </summary>
/// <param name="query">Word to look for.</param>
/// <returns>A dictionary holding an array of words for each word group.</returns>
public static Dictionary<string, string[]> Search(string query) {
throw new NotImplementedException();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(BuildURI(query));
request.UserAgent = String.Format("{0}/{1}", Application.ProductName, Application.ProductVersion);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
// TODO: Better exception types!
if ((int)response.StatusCode < 200 || (int)response.StatusCode > 299)
throw new Exception(String.Format("Unexpected HTTP status code {0}", response.StatusCode));
XmlDocument doc = new XmlDocument();
using (Stream stream = response.GetResponseStream()) {
/*
* The HTML response is not XHTML, so we cannot parse it as XML at first.
* However, the search results (including a "Not found" error) are in a div tag
* whose contents can be parsed using an XML parser.
* We will look for the beginning of this <div> tag in the stream, read until the </div>,
* then parse it as XML.
*/
using (StreamReader reader = new StreamReader(stream)) {
StringBuilder sb = new StringBuilder(RESULT_HEADER);
string line;
while ((line = reader.ReadLine()) != null) {
if (line.Contains(RESULT_HEADER))
break;
}
if (line == null)
throw new Exception("Result block not found.");
while ((line = reader.ReadLine()) != null) {
sb.AppendLine(line);
if (line.Contains(RESULT_FOOTER))
break;
}
if (line == null)
throw new Exception("End of result block not found.");
doc.LoadXml(sb.ToString());
}
}
/*
* We can now iterate on the nodes inside of the <div>. We can expect three types of tags:
* - <h2>, which contains the word we just searched and that we can ignore
* - <h3>, the beginning of a new word group
* - <ul>, listing words inside a new group.
* We will ignore any other tags to stay flexible.
*/
Dictionary<string, string[]> results = new Dictionary<string, string[]>();
string groupName = null;
List<string> words = new List<string>();
foreach (XmlNode node in doc.ChildNodes[0].ChildNodes) {
if (node.NodeType != XmlNodeType.Element)
continue;
// Beginning of a new word group
if (node.Name.ToLowerInvariant() == "h3") {
if (groupName != null && words.Count > 0) {
// End the current group, store it in the dictionary.
if (results.ContainsKey(groupName)) {
// Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists
List<string> newWords = new List<string>(results[groupName]);
newWords.AddRange(words);
results[groupName] = newWords.ToArray();
} else {
results[groupName] = words.ToArray();
}
words.Clear();
}
groupName = node.InnerText;
} else if (node.Name.ToLowerInvariant() == "ul") {
if (groupName == null)
throw new Exception("Unexpected word list with no group name");
foreach (XmlNode listItem in node.ChildNodes) {
if (listItem.NodeType == XmlNodeType.Element && listItem.Name == "li")
words.Add(listItem.InnerText);
}
}
}
// Once we have finished iterating, there might be one remaining group to add to the dictionary.
if (groupName != null && words.Count > 0) {
if (results.ContainsKey(groupName)) {
// Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists
List<string> newWords = new List<string>(results[groupName]);
newWords.AddRange(words);
results[groupName] = newWords.ToArray();
} else {
results[groupName] = words.ToArray();
}
}
return results;
}
}
}

View File

@ -44,6 +44,7 @@
<Compile Include="MainForm.Designer.cs">
<DependentUpon>MainForm.cs</DependentUpon>
</Compile>
<Compile Include="ConcatenatedStream.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<EmbeddedResource Include="MainForm.resx">
@ -58,7 +59,9 @@
<Compile Include="Properties\Resources.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Resources.resx</DependentUpon>
<DesignTime>True</DesignTime>
</Compile>
<None Include="app.config" />
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>

15
ToeCracker/app.config Normal file
View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<configSections>
<sectionGroup name="userSettings" type="System.Configuration.UserSettingsGroup, System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" >
<section name="ToeCracker.Properties.Settings" type="System.Configuration.ClientSettingsSection, System, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" allowExeDefinition="MachineToLocalUser" requirePermission="false" />
</sectionGroup>
</configSections>
<userSettings>
<ToeCracker.Properties.Settings>
<setting name="BaseURL" serializeAs="String">
<value>https://words.bighugelabs.com/{0}</value>
</setting>
</ToeCracker.Properties.Settings>
</userSettings>
</configuration>