153 lines
7.4 KiB
C#
153 lines
7.4 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Net;
|
|
using System.Windows.Forms;
|
|
using System.Xml;
|
|
using System.Text;
|
|
|
|
namespace ToeCracker {
|
|
/// <summary>
|
|
/// Methods to look up a word in the thesaurus and parse the results.
|
|
/// </summary>
|
|
public static class Thesaurus {
|
|
private const string RESULT_HEADER = "<div class=\"results\">";
|
|
private const string RESULT_FOOTER = "</div>";
|
|
|
|
/// <summary>
|
|
/// A dictionary mapping English word group keys with translated values.
|
|
/// Some keys might be missing, in which case they should be displayed as they are;
|
|
/// you might want to open an issue to request those translations to be added.
|
|
/// </summary>
|
|
public static Dictionary<string, string> Translations {
|
|
get {
|
|
Dictionary<string, string> tr = new Dictionary<string, string>();
|
|
tr["adjective"] = Properties.Resources.Adjective;
|
|
tr["adverb"] = Properties.Resources.Verb;
|
|
tr["antonyms"] = Properties.Resources.Antonyms;
|
|
tr["noun"] = Properties.Resources.Noun;
|
|
tr["related terms"] = Properties.Resources.RelatedTerms;
|
|
tr["rhymes with"] = Properties.Resources.RhymesWith;
|
|
tr["similar terms"] = Properties.Resources.SimilarTerms;
|
|
tr["sounds kind of like"] = Properties.Resources.SoundsLike;
|
|
tr["verb"] = Properties.Resources.Verb;
|
|
return tr;
|
|
}
|
|
}
|
|
|
|
private static Uri BuildURI(string query) {
|
|
return new Uri(String.Format(Properties.Settings.Default.BaseURL, query), UriKind.Absolute);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Look up a word in the thesaurus and return the results.
|
|
/// </summary>
|
|
/// <param name="query">Word to look for.</param>
|
|
/// <returns>A dictionary holding an array of words for each word group.</returns>
|
|
public static Dictionary<string, string[]> Search(string query) {
|
|
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(BuildURI(query));
|
|
request.UserAgent = String.Format("{0}/{1}", Application.ProductName, Application.ProductVersion);
|
|
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
|
|
|
if ((int)response.StatusCode < 200 || (int)response.StatusCode > 299)
|
|
throw new WebException(String.Format("Unexpected HTTP status code {0}", response.StatusCode));
|
|
|
|
XmlDocument doc = new XmlDocument();
|
|
|
|
using (Stream stream = response.GetResponseStream()) {
|
|
/*
|
|
* The HTML response is not XHTML, so we cannot parse it as XML at first.
|
|
* However, the search results (including a "Not found" error) are in a div tag
|
|
* whose contents can be parsed using an XML parser.
|
|
* We will look for the beginning of this <div> tag in the stream, read until the </div>,
|
|
* then parse it as XML.
|
|
*/
|
|
using (StreamReader reader = new StreamReader(stream)) {
|
|
StringBuilder sb = new StringBuilder(RESULT_HEADER);
|
|
string line;
|
|
while ((line = reader.ReadLine()) != null) {
|
|
if (line.Contains(RESULT_HEADER))
|
|
break;
|
|
}
|
|
if (line == null)
|
|
throw new XmlException("Result block not found.");
|
|
|
|
while ((line = reader.ReadLine()) != null) {
|
|
sb.AppendLine(line);
|
|
if (line.Contains(RESULT_FOOTER))
|
|
break;
|
|
}
|
|
if (line == null)
|
|
throw new XmlException("End of result block not found.");
|
|
|
|
doc.LoadXml(sb.ToString());
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We can now iterate on the nodes inside of the <div>. We can expect four types of tags:
|
|
* - <h2>, which contains the word we just searched and that we can ignore
|
|
* - <h3>, the beginning of a new word group
|
|
* - <h4>, a subgroup within the word group
|
|
* - <ul>, listing words inside a new group.
|
|
* We will ignore any other tags to stay flexible.
|
|
*/
|
|
Dictionary<string, string[]> results = new Dictionary<string, string[]>();
|
|
string groupName = null;
|
|
List<string> words = new List<string>();
|
|
|
|
foreach (XmlNode node in doc.ChildNodes[0].ChildNodes) {
|
|
if (node.NodeType != XmlNodeType.Element)
|
|
continue;
|
|
// Beginning of a new word group
|
|
if (node.Name.ToLowerInvariant() == "h3") {
|
|
if (groupName != null && words.Count > 0) {
|
|
// End the current group, store it in the dictionary.
|
|
if (results.ContainsKey(groupName)) {
|
|
// Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists
|
|
List<string> newWords = new List<string>(results[groupName]);
|
|
newWords.AddRange(words);
|
|
results[groupName] = newWords.ToArray();
|
|
} else {
|
|
results[groupName] = words.ToArray();
|
|
}
|
|
words.Clear();
|
|
}
|
|
groupName = node.InnerText;
|
|
} else if (node.Name.ToLowerInvariant() == "h4") {
|
|
// We will represent subgroups by adding an empty word (an empty line in the display),
|
|
// and format the group name in uppercase with dashes: "-- ANTONYMS --"
|
|
if (groupName == null)
|
|
throw new XmlException("Unexpected subgroup name with no group name.");
|
|
words.Add("");
|
|
string title = node.InnerText.ToLowerInvariant();
|
|
if (Translations.ContainsKey(title))
|
|
title = Translations[title];
|
|
words.Add(String.Format("-- {0} --", title.ToUpperInvariant()));
|
|
} else if (node.Name.ToLowerInvariant() == "ul") {
|
|
if (groupName == null)
|
|
throw new XmlException("Unexpected word list with no group name.");
|
|
foreach (XmlNode listItem in node.ChildNodes) {
|
|
if (listItem.NodeType == XmlNodeType.Element && listItem.Name == "li")
|
|
words.Add(listItem.InnerText);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Once we have finished iterating, there might be one remaining group to add to the dictionary.
|
|
if (groupName != null && words.Count > 0) {
|
|
if (results.ContainsKey(groupName)) {
|
|
// Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists
|
|
List<string> newWords = new List<string>(results[groupName]);
|
|
newWords.AddRange(words);
|
|
results[groupName] = newWords.ToArray();
|
|
} else {
|
|
results[groupName] = words.ToArray();
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
}
|
|
}
|