using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Data;
using System.Data.Entity.Design.PluralizationServices;
namespace Master_Thesis
{
class Program
{
static void Main(string[] args)
{
string filename = "TextFile1.txt";
string inputString = File.ReadAllText(filename);
// Convert to lowercase
inputString = inputString.ToLower();
// Define Special Character
string[] stripChars = { ";", ",", ".", "-", "_", "^", "(", ")", "[", "]",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\n", "\t", "\r" };
foreach (string character in stripChars)
{
inputString = inputString.Replace(character, "");
}
// Split spaces.............
List<string> wordList = inputString.Split(' ').ToList();
//stopwords.........
string[] stopwords = new string[] { "and", "the", "she", "for", "this", "you", "but" };
foreach (string word in stopwords)
{
while (wordList.Contains(word))
{
wordList.Remove(word);
}
}
// ................
Dictionary<string, int> dictionary = new Dictionary<string, int>();
foreach (string word in wordList)
{
if (word.Length >= 5)
{
if (dictionary.ContainsKey(word))
{
// increment the count.........
dictionary[word]++;
}
else
{
// new word add it to the dictionary with an initial count of 1.....
dictionary[word] = 1;
}
}
}
//how many times a word occurs.......
var sortedDict = (from entry in dictionary orderby entry.Value descending select entry).ToDictionary(pair => pair.Key, pair => pair.Value);
// most frequently occurring words
int count = 1;
Console.WriteLine("---- Most Frequent Terms in the File: " + filename + " ----");
Console.WriteLine();
foreach (KeyValuePair<string, int> pair in sortedDict)
{
Console.WriteLine(count + "\t" + pair.Key + "\t" + pair.Value);
count++;
// top 20 words
if (count > 20)
{
break;
}
}
Console.ReadKey();
}
}
}