From 7e4d475260795b1e91daf95ce150d5266c3493b5 Mon Sep 17 00:00:00 2001 From: James Kolpack Date: Fri, 29 Jan 2016 12:23:47 -0500 Subject: [PATCH] Find duplicates in the Fluxnet Data, correct the non utf-8 characters --- .../Parsers/FluxnetSiteCsvParserTests.cs | 21 ++++++++++++++++++- ...t_all_October2015_with_joins_corrected.csv | 2 -- Core/DAL/LeafWebInitializer.cs | 2 +- Core/Utility/FileUtility.cs | 9 ++++++-- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/Core.Tests/Parsers/FluxnetSiteCsvParserTests.cs b/Core.Tests/Parsers/FluxnetSiteCsvParserTests.cs index 3b4bf7a..25e30dd 100644 --- a/Core.Tests/Parsers/FluxnetSiteCsvParserTests.cs +++ b/Core.Tests/Parsers/FluxnetSiteCsvParserTests.cs @@ -23,7 +23,6 @@ namespace LeafWeb.Core.Tests.Parsers } } - [Test] public void Parse_Find_Misencoded_Characters() { @@ -34,5 +33,25 @@ namespace LeafWeb.Core.Tests.Parsers Console.WriteLine($"{fns.FluxnetId} / {fns.SiteName}"); } } + + [Test] + public void Parse_Find_Duplicate_Ids() + { + var fileInfo = FileUtility.GetContentFile(ContentDirectory, "fluxnet_site_list_all_October2015_with_joins.csv"); + var parser = new FluxnetSiteCsvParser(fileInfo); + var fluxnetSites = parser.Parse(); + + var dupes = + from f in fluxnetSites + group f by f.FluxnetId.ToUpper() + into g + where g.Count() > 1 + select g.Key; + + foreach (var dupe in dupes) + { + Console.WriteLine($"{dupe}"); + } + } } } \ No newline at end of file diff --git a/Core/DAL/InitialData/fluxnet_site_list_all_October2015_with_joins_corrected.csv b/Core/DAL/InitialData/fluxnet_site_list_all_October2015_with_joins_corrected.csv index 647fa1a..5499988 100644 --- a/Core/DAL/InitialData/fluxnet_site_list_all_October2015_with_joins_corrected.csv +++ b/Core/DAL/InitialData/fluxnet_site_list_all_October2015_with_joins_corrected.csv @@ -732,7 +732,6 @@ siteid,fluxnetid,keyid,sitename,country,land_unit,status,latitude,longitude,year "4,188.00",IN-Hdw,(null),Haldwani Forest Plantation,India,(null),Active,29.149319,79.421103,"2,012.00",AsiaFlux,(null),(null),(null),(null),2,"2,012.00",287,0, ,7,DYSTRIC CAMBISOL,12,Cwb - Temperate/Dry Winter/Warm Summer,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests "4,189.00",IN-IAR,(null),IARI Flux Site,India,(null),Active,28.643839,77.152712,"2,013.00",AsiaFlux,(null),(null),(null),(null),2,"2,013.00",216,0, ,53,ORTHIC LUVISOL,6,BSh - Arid/Steppe/Hot,13,Urban and Built-up,10,Urban,13,Urban and Built-Up "4,190.00",JP-Aou,(null),Akou green belt,Japan,Asia,Active,34.786316,134.370861,"1,993.00",AsiaFlux,(null),(null),(null),(null),2,"1,993.00",153,0, ,4,ORTHIC ACRISOL,0, ,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests -"4,191.00",JP-APP,(null),Appi forest meteorology research site,Japan,Asia,Active,40.022356,140.939255,"2,000.00",AsiaFlux,(null),(null),(null),(null),2,"2,000.00",783,0, ,40,LITHOSOL,25,Dfa - Cold/Without dry season/Hot Summer,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests "4,192.00",JP-Bre,(null),National Livestock Breeding Center Niikappu Station,Japan,Asia,Active,42.4,142.466667,"2,007.00",AsiaFlux,(null),(null),(null),(null),2,"2,007.00",33,0, ,40,LITHOSOL,26,Dfb - Cold/Without dry season/Warm Summer,12,Croplands,3,Broadleaf Crops,12,Croplands "4,193.00",JP-Das,(null),Dasan Station site,Japan,Asia,Active,78.916667,11.933333,"2,003.00",AsiaFlux,(null),(null),(null),(null),2,"2,003.00",69,0, ,0,WATER/OCEAN/LAKE,29,ET - Polar/Tundra,10,Grasslands,1,Grasses/Cereal Crops,10,Grasslands "4,194.00",JP-FHK,(null),Fuji Hokuroku Flux Observation Site,Japan,Asia,Active,35.443528,138.764722,"2,006.00",AsiaFlux,(null),(null),(null),(null),2,"2,006.00","1,084",0, ,40,LITHOSOL,25,Dfa - Cold/Without dry season/Hot Summer,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests @@ -751,7 +750,6 @@ siteid,fluxnetid,keyid,sitename,country,land_unit,status,latitude,longitude,year "4,207.00",JP-Srk,(null),Shirakami Beech Forest Site,Japan,Asia,Active,40.565556,140.127778,"2,008.00",AsiaFlux,(null),(null),(null),(null),2,"2,008.00",340,0, ,40,LITHOSOL,25,Dfa - Cold/Without dry season/Hot Summer,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests "4,208.00",JP-Szn,(null),Hokkaido University Shizunai Livestock Farm,Japan,Asia,Active,42.407267,142.475696,"2,004.00",AsiaFlux,(null),(null),(null),(null),2,"2,004.00",38,0, ,40,LITHOSOL,26,Dfb - Cold/Without dry season/Warm Summer,12,Croplands,3,Broadleaf Crops,12,Croplands "4,209.00",JP-Ta2,(null),Takayama evergreen coniferous forest site,Japan,Asia,Active,36.139722,137.370833,"2,005.00",AsiaFlux,(null),(null),(null),(null),2,"2,005.00",900,0, ,40,LITHOSOL,25,Dfa - Cold/Without dry season/Hot Summer,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests -"4,210.00",JP-TEF,(null),CC-LaG Teshio Experimental Forest,Japan,Asia,Active,45.0558,142.1073,"2,001.00",AsiaFlux,(null),(null),(null),(null),2,"2,001.00",81,0, ,0,WATER/OCEAN/LAKE,26,Dfb - Cold/Without dry season/Warm Summer,5,Mixed Forest,6,Deciduous Broad Leaf Forest,5,Mixed Forests "4,211.00",KR-Ceo,(null),Cheongmicheon Farmlanbd Site,South Korea,Asia,Active,37.15979,127.652729,"2,008.00",AsiaFlux,(null),(null),(null),(null),2,"2,008.00",67,0, ,4,ORTHIC ACRISOL,21,Dwa - Cold/Dry Winter/Hot Summer,12,Croplands,1,Grasses/Cereal Crops,12,Croplands "4,212.00",KR-Gmj,(null),Gimje Rice paddy,South Korea,Asia,Active,35.745328,126.852522,"2,011.00",AsiaFlux,(null),(null),(null),(null),2,"2,011.00",1,0, ,4,ORTHIC ACRISOL,25,Dfa - Cold/Without dry season/Hot Summer,12,Croplands,3,Broadleaf Crops,12,Croplands "4,213.00",KR-Hae,(null),KoFlux Haenam site,South Korea,Asia,Active,34.5536,126.5643,"2,002.00",AsiaFlux,(null),(null),(null),(null),2,"2,002.00",4,0, ,4,ORTHIC ACRISOL,0, ,14,Cropland-Natural Vegetation Mosaic,4,Savannah,8,Woody Savannahs diff --git a/Core/DAL/LeafWebInitializer.cs b/Core/DAL/LeafWebInitializer.cs index 03b154d..6a4fd53 100644 --- a/Core/DAL/LeafWebInitializer.cs +++ b/Core/DAL/LeafWebInitializer.cs @@ -12,7 +12,7 @@ namespace LeafWeb.Core.DAL protected override void Seed(LeafWebContext context) { // get fluxnet sites from file - var fileInfo = FileUtility.GetContentFile(ContentDirectory, "fluxnet_site_list_all_October2015_with_joins_corrected.csv"); + var fileInfo = FileUtility.GetContentFile(ContentDirectory, "fluxnet_site_list_all_October2015_with_joins_corrected.csv", true); var fluxnetSiteCsvParser = new FluxnetSiteCsvParser(fileInfo); var fluxnetSites = fluxnetSiteCsvParser.Parse().ToList(); diff --git a/Core/Utility/FileUtility.cs b/Core/Utility/FileUtility.cs index 559a5f4..62964a8 100644 --- a/Core/Utility/FileUtility.cs +++ b/Core/Utility/FileUtility.cs @@ -5,9 +5,14 @@ namespace LeafWeb.Core.Utility { public static class FileUtility { - public static FileInfo GetContentFile(string contentDirectory, string fileName) + public static FileInfo GetContentFile(string contentDirectory, string fileName, bool usePrivateBinPath = false) { - var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, contentDirectory); + string basePath; + if (usePrivateBinPath) + basePath = AppDomain.CurrentDomain.SetupInformation.PrivateBinPath; + else + basePath = AppDomain.CurrentDomain.BaseDirectory; + var path = Path.Combine(basePath, contentDirectory); return new FileInfo(path + fileName); } }