using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Windows.Forms;
using System.Xml;
using System.Text;
namespace ToeCracker {
///
/// Methods to look up a word in the thesaurus and parse the results.
///
public static class Thesaurus {
private const string RESULT_HEADER = "
";
private const string RESULT_FOOTER = "
";
///
/// A dictionary mapping English word group keys with translated values.
/// Some keys might be missing, in which case they should be displayed as they are;
/// you might want to open an issue to request those translations to be added.
///
public static Dictionary Translations {
get {
Dictionary tr = new Dictionary();
tr["adjective"] = Properties.Resources.Adjective;
tr["adverb"] = Properties.Resources.Verb;
tr["noun"] = Properties.Resources.Noun;
tr["sounds kind of like"] = Properties.Resources.SoundsLike;
tr["rhymes with"] = Properties.Resources.RhymesWith;
tr["verb"] = Properties.Resources.Verb;
return tr;
}
}
private static Uri BuildURI(string query) {
return new Uri(String.Format(Properties.Settings.Default.BaseURL, query), UriKind.Absolute);
}
///
/// Look up a word in the thesaurus and return the results.
///
/// Word to look for.
/// A dictionary holding an array of words for each word group.
public static Dictionary Search(string query) {
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(BuildURI(query));
request.UserAgent = String.Format("{0}/{1}", Application.ProductName, Application.ProductVersion);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if ((int)response.StatusCode < 200 || (int)response.StatusCode > 299)
throw new WebException(String.Format("Unexpected HTTP status code {0}", response.StatusCode));
XmlDocument doc = new XmlDocument();
using (Stream stream = response.GetResponseStream()) {
/*
* The HTML response is not XHTML, so we cannot parse it as XML at first.
* However, the search results (including a "Not found" error) are in a div tag
* whose contents can be parsed using an XML parser.
* We will look for the beginning of this tag in the stream, read until the
,
* then parse it as XML.
*/
using (StreamReader reader = new StreamReader(stream)) {
StringBuilder sb = new StringBuilder(RESULT_HEADER);
string line;
while ((line = reader.ReadLine()) != null) {
if (line.Contains(RESULT_HEADER))
break;
}
if (line == null)
throw new XmlException("Result block not found.");
while ((line = reader.ReadLine()) != null) {
sb.AppendLine(line);
if (line.Contains(RESULT_FOOTER))
break;
}
if (line == null)
throw new XmlException("End of result block not found.");
doc.LoadXml(sb.ToString());
}
}
/*
* We can now iterate on the nodes inside of the . We can expect three types of tags:
* -
, which contains the word we just searched and that we can ignore
* - , the beginning of a new word group
* - , listing words inside a new group.
* We will ignore any other tags to stay flexible.
*/
Dictionary results = new Dictionary();
string groupName = null;
List words = new List();
foreach (XmlNode node in doc.ChildNodes[0].ChildNodes) {
if (node.NodeType != XmlNodeType.Element)
continue;
// Beginning of a new word group
if (node.Name.ToLowerInvariant() == "h3") {
if (groupName != null && words.Count > 0) {
// End the current group, store it in the dictionary.
if (results.ContainsKey(groupName)) {
// Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists
List newWords = new List(results[groupName]);
newWords.AddRange(words);
results[groupName] = newWords.ToArray();
} else {
results[groupName] = words.ToArray();
}
words.Clear();
}
groupName = node.InnerText;
} else if (node.Name.ToLowerInvariant() == "ul") {
if (groupName == null)
throw new XmlException("Unexpected word list with no group name.");
foreach (XmlNode listItem in node.ChildNodes) {
if (listItem.NodeType == XmlNodeType.Element && listItem.Name == "li")
words.Add(listItem.InnerText);
}
}
}
// Once we have finished iterating, there might be one remaining group to add to the dictionary.
if (groupName != null && words.Count > 0) {
if (results.ContainsKey(groupName)) {
// Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists
List newWords = new List(results[groupName]);
newWords.AddRange(words);
results[groupName] = newWords.ToArray();
} else {
results[groupName] = words.ToArray();
}
}
return results;
}
}
}