51 lines
1.5 KiB
C#
51 lines
1.5 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace Core
|
|
{
|
|
public class WordFrequency : IComparer<string>
|
|
{
|
|
private readonly Dictionary<string, int> _dictionary;
|
|
|
|
public WordFrequency()
|
|
: this(@"all.num.o5.txt")
|
|
{
|
|
}
|
|
|
|
private static IEnumerable<Tuple<string, int>> ParseWordFreq(IEnumerable<string> lines)
|
|
{
|
|
var regex = new Regex("[a-z]+");
|
|
return
|
|
from line in lines
|
|
select line.Split(' ')
|
|
into fields
|
|
where fields.Length == 4 && regex.Match(fields[1]).Success
|
|
let word = fields[1]
|
|
let freq = int.Parse(fields[0])
|
|
select new Tuple<string, int>(word.Canonicalize(), freq);
|
|
}
|
|
|
|
public WordFrequency(string filename)
|
|
: this(ParseWordFreq(filename.GetFileLines()))
|
|
{
|
|
}
|
|
|
|
public WordFrequency(IEnumerable<Tuple<string, int>> words)
|
|
{
|
|
_dictionary = new Dictionary<string, int>();
|
|
foreach (var wf in words.Where(wf => !_dictionary.ContainsKey(wf.Item1)))
|
|
_dictionary.Add(wf.Item1, wf.Item2);
|
|
}
|
|
|
|
public int GetWordFreq(string word)
|
|
{
|
|
return _dictionary.ContainsKey(word) ? _dictionary[word] : 0;
|
|
}
|
|
public int Compare(string x, string y)
|
|
{
|
|
return _dictionary[x].CompareTo(_dictionary[y]);
|
|
}
|
|
}
|
|
} |