Find duplicates in the Fluxnet Data, correct the non utf-8 characters

This commit is contained in:
2016-01-29 12:23:47 -05:00
parent 68a77366e9
commit 7e4d475260
4 changed files with 28 additions and 6 deletions
@@ -23,7 +23,6 @@ namespace LeafWeb.Core.Tests.Parsers
}
}
[Test]
public void Parse_Find_Misencoded_Characters()
{
@@ -34,5 +33,25 @@ namespace LeafWeb.Core.Tests.Parsers
Console.WriteLine($"{fns.FluxnetId} / {fns.SiteName}");
}
}
[Test]
public void Parse_Find_Duplicate_Ids()
{
var fileInfo = FileUtility.GetContentFile(ContentDirectory, "fluxnet_site_list_all_October2015_with_joins.csv");
var parser = new FluxnetSiteCsvParser(fileInfo);
var fluxnetSites = parser.Parse();
var dupes =
from f in fluxnetSites
group f by f.FluxnetId.ToUpper()
into g
where g.Count() > 1
select g.Key;
foreach (var dupe in dupes)
{
Console.WriteLine($"{dupe}");
}
}
}
}