Browsing reddit; I found a parsed dataset from Tublr. Link Here.
I used code from an older post to obtain a list of unique words, and their frequencies.
Next, I used a default TermCloud – Sample from GoogleCharts’s Additional Charts Gallery, to generate this image in a web browser.
Tublr blog description: words with a frequency greater than 5,000, ordered by most to least frequent.
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; namespace ordertumblrwords { class Program { static void Main(string[] args) { int counter = 0; string line; Dictionary<string, int> hashfreq = new Dictionary<string, int>(); System.IO.StreamReader file = new System.IO.StreamReader("C:\\Book\\New folder\\OUTPUT.txt"); while ((line = file.ReadLine()) != null) { string[] parts = line.Split(':'); int freq = int.Parse(parts[1].ToString().Trim()); hashfreq.Add(parts[0], freq); counter++; } file.Close(); int q = 0; string masterstring = ""; foreach (KeyValuePair<string, int> item in hashfreq.OrderByDescending(key => key.Value)) { if (item.Value > 5000) { Console.WriteLine("data.setValue(" + q + ", 0, '" + item.Key + "');"); Console.WriteLine("data.setValue(" + q + ", 1, "+item.Value+");"); q += 1; masterstring += "data.setValue(" + q + ", 0, '" + item.Key + "');\r\n"; masterstring += "data.setValue(" + q + ", 1, " + item.Value + ");\r\n"; } } StreamWriter streamWrite; streamWrite = File.AppendText("C:\\Book\\MANTASMAIN.txt"); streamWrite.WriteLine(masterstring); streamWrite.Close(); } } } |