using System; using System.Collections.Generic; using System.IO; using System.Net; using System.Windows.Forms; using System.Xml; using System.Text; namespace ToeCracker { /// /// Methods to look up a word in the thesaurus and parse the results. /// public static class Thesaurus { private const string RESULT_HEADER = "
"; private const string RESULT_FOOTER = "
"; /// /// A dictionary mapping English word group keys with translated values. /// Some keys might be missing, in which case they should be displayed as they are; /// you might want to open an issue to request those translations to be added. /// public static Dictionary Translations { get { Dictionary tr = new Dictionary(); tr["adjective"] = Properties.Resources.Adjective; tr["adverb"] = Properties.Resources.Verb; tr["noun"] = Properties.Resources.Noun; tr["sounds kind of like"] = Properties.Resources.SoundsLike; tr["rhymes with"] = Properties.Resources.RhymesWith; tr["verb"] = Properties.Resources.Verb; return tr; } } private static Uri BuildURI(string query) { return new Uri(String.Format(Properties.Settings.Default.BaseURL, query), UriKind.Absolute); } /// /// Look up a word in the thesaurus and return the results. /// /// Word to look for. /// A dictionary holding an array of words for each word group. public static Dictionary Search(string query) { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(BuildURI(query)); request.UserAgent = String.Format("{0}/{1}", Application.ProductName, Application.ProductVersion); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); if ((int)response.StatusCode < 200 || (int)response.StatusCode > 299) throw new WebException(String.Format("Unexpected HTTP status code {0}", response.StatusCode)); XmlDocument doc = new XmlDocument(); using (Stream stream = response.GetResponseStream()) { /* * The HTML response is not XHTML, so we cannot parse it as XML at first. * However, the search results (including a "Not found" error) are in a div tag * whose contents can be parsed using an XML parser. * We will look for the beginning of this
tag in the stream, read until the
, * then parse it as XML. */ using (StreamReader reader = new StreamReader(stream)) { StringBuilder sb = new StringBuilder(RESULT_HEADER); string line; while ((line = reader.ReadLine()) != null) { if (line.Contains(RESULT_HEADER)) break; } if (line == null) throw new XmlException("Result block not found."); while ((line = reader.ReadLine()) != null) { sb.AppendLine(line); if (line.Contains(RESULT_FOOTER)) break; } if (line == null) throw new XmlException("End of result block not found."); doc.LoadXml(sb.ToString()); } } /* * We can now iterate on the nodes inside of the
. We can expect three types of tags: * -

, which contains the word we just searched and that we can ignore * -

, the beginning of a new word group * -
    , listing words inside a new group. * We will ignore any other tags to stay flexible. */ Dictionary results = new Dictionary(); string groupName = null; List words = new List(); foreach (XmlNode node in doc.ChildNodes[0].ChildNodes) { if (node.NodeType != XmlNodeType.Element) continue; // Beginning of a new word group if (node.Name.ToLowerInvariant() == "h3") { if (groupName != null && words.Count > 0) { // End the current group, store it in the dictionary. if (results.ContainsKey(groupName)) { // Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists List newWords = new List(results[groupName]); newWords.AddRange(words); results[groupName] = newWords.ToArray(); } else { results[groupName] = words.ToArray(); } words.Clear(); } groupName = node.InnerText; } else if (node.Name.ToLowerInvariant() == "ul") { if (groupName == null) throw new XmlException("Unexpected word list with no group name."); foreach (XmlNode listItem in node.ChildNodes) { if (listItem.NodeType == XmlNodeType.Element && listItem.Name == "li") words.Add(listItem.InnerText); } } } // Once we have finished iterating, there might be one remaining group to add to the dictionary. if (groupName != null && words.Count > 0) { if (results.ContainsKey(groupName)) { // Group names should only be unique, but we will tolerate duplicate group names by concatenating the word lists List newWords = new List(results[groupName]); newWords.AddRange(words); results[groupName] = newWords.ToArray(); } else { results[groupName] = words.ToArray(); } } return results; } } }