Files

51 lines
1.5 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace Core
{
public class WordFrequency : IComparer<string>
{
private readonly Dictionary<string, int> _dictionary;
public WordFrequency()
: this(@"all.num.o5.txt")
{
}
private static IEnumerable<Tuple<string, int>> ParseWordFreq(IEnumerable<string> lines)
{
var regex = new Regex("[a-z]+");
return
from line in lines
select line.Split(' ')
into fields
where fields.Length == 4 && regex.Match(fields[1]).Success
let word = fields[1]
let freq = int.Parse(fields[0])
select new Tuple<string, int>(word.Canonicalize(), freq);
}
public WordFrequency(string filename)
: this(ParseWordFreq(filename.GetFileLines()))
{
}
public WordFrequency(IEnumerable<Tuple<string, int>> words)
{
_dictionary = new Dictionary<string, int>();
foreach (var wf in words.Where(wf => !_dictionary.ContainsKey(wf.Item1)))
_dictionary.Add(wf.Item1, wf.Item2);
}
public int GetWordFreq(string word)
{
return _dictionary.ContainsKey(word) ? _dictionary[word] : 0;
}
public int Compare(string x, string y)
{
return _dictionary[x].CompareTo(_dictionary[y]);
}
}
}