Enhance event occurrence parsing to skip unmatched high school section headers
This commit introduces a new property to track skipped high school section headers in the EventOccurrenceParseResult and EventOccurrenceParserResult classes. The EventOccurrenceParser has been updated to gracefully skip HS section headers that do not match any event definitions, improving the parsing logic. Additionally, the LocationParsingConfiguration has been removed from the EventOccurrenceParser, simplifying its constructor. Unit tests have been updated to reflect these changes and ensure correct behavior during parsing.
This commit is contained in:
@@ -51,42 +51,23 @@ public class LineClassifier_Tests
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsContinuationLine_StartsWithLowercaseContinuationWord_ReturnsTrue()
|
||||
public void IsContinuationLine_StartsWithAsterisk_ReturnsTrue()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.That(LineClassifier.IsContinuationLine("the event will be held"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine("and participants should arrive"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine("be sure to register"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine("or contact the coordinator"), Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsContinuationLine_StartsWithLowercase_NonContinuationWord_ReturnsFalse()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.That(LineClassifier.IsContinuationLine("important: bring materials"), Is.False);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsContinuationLine_StartsWithUppercase_ReturnsFalse()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.That(LineClassifier.IsContinuationLine("Important Event March 15"), Is.False);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsContinuationLine_ContainsSchedulePosted_ReturnsTrue()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.That(LineClassifier.IsContinuationLine("Schedule Posted on website"), Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsContinuationLine_ContainsNote_ReturnsTrue()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.That(LineClassifier.IsContinuationLine("Note: Additional information"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine("*The books of semifinalist teams"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine("*Note: Important details"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine("*This is a continuation line"), Is.True);
|
||||
Assert.That(LineClassifier.IsContinuationLine(" *Line with leading whitespace"), Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsContinuationLine_DoesNotStartWithAsterisk_ReturnsFalse()
|
||||
{
|
||||
// Act & Assert
|
||||
Assert.That(LineClassifier.IsContinuationLine("The event will be held"), Is.False);
|
||||
Assert.That(LineClassifier.IsContinuationLine("Note: Additional information"), Is.False);
|
||||
Assert.That(LineClassifier.IsContinuationLine("Important Event March 15"), Is.False);
|
||||
Assert.That(LineClassifier.IsContinuationLine("Schedule Posted on website"), Is.False);
|
||||
}
|
||||
|
||||
[Test]
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
using Core.Models;
|
||||
using Core.Parsers.EventOccurrence;
|
||||
using NUnit.Framework;
|
||||
|
||||
@@ -10,110 +9,69 @@ public class TimeLocationParser_Tests
|
||||
[Test]
|
||||
public void Parse_TimeAndLocation_ExtractsBoth()
|
||||
{
|
||||
// Arrange
|
||||
var locationConfig = new LocationParsingConfiguration
|
||||
{
|
||||
LocationPatterns = new List<string> { "Room *" }
|
||||
};
|
||||
|
||||
// Act
|
||||
TimeLocationParser.Parse("10:30 a.m. Room 101", locationConfig,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("10:30 a.m. Room 101",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("10:30 a.m."));
|
||||
Assert.That(location, Is.EqualTo("Room 101"));
|
||||
Assert.That(locationParseSuccess, Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_TimeRangeAndLocation_ExtractsTimeRangeAndLocation()
|
||||
{
|
||||
// Arrange
|
||||
var locationConfig = new LocationParsingConfiguration
|
||||
{
|
||||
LocationPatterns = new List<string> { "Room *" }
|
||||
};
|
||||
|
||||
// Act
|
||||
TimeLocationParser.Parse("10:00 a.m. - 12:00 p.m. Room 202", locationConfig,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("10:00 a.m. - 12:00 p.m. Room 202",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("10:00 a.m. - 12:00 p.m."));
|
||||
Assert.That(location, Is.EqualTo("Room 202"));
|
||||
Assert.That(locationParseSuccess, Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_NOONAndLocation_ExtractsBoth()
|
||||
{
|
||||
// Arrange
|
||||
var locationConfig = new LocationParsingConfiguration
|
||||
{
|
||||
LocationPatterns = new List<string> { "Hall *" }
|
||||
};
|
||||
|
||||
// Act
|
||||
TimeLocationParser.Parse("NOON Hall C", locationConfig,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("NOON Hall C",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("NOON"));
|
||||
Assert.That(location, Is.EqualTo("Hall C"));
|
||||
Assert.That(locationParseSuccess, Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_TimeOnly_NoLocation()
|
||||
{
|
||||
// Arrange
|
||||
var locationConfig = new LocationParsingConfiguration
|
||||
{
|
||||
LocationPatterns = new List<string> { "Room *" }
|
||||
};
|
||||
|
||||
// Act
|
||||
TimeLocationParser.Parse("3:00 p.m.", locationConfig,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("3:00 p.m.",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("3:00 p.m."));
|
||||
Assert.That(location, Is.Empty);
|
||||
Assert.That(locationParseSuccess, Is.True); // No location is valid
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_LocationNotMatchingPattern_StillReturnsLocation_ReportsFailure()
|
||||
public void Parse_AnyLocation_ExtractsLocationWithoutValidation()
|
||||
{
|
||||
// Arrange
|
||||
var locationConfig = new LocationParsingConfiguration
|
||||
{
|
||||
LocationPatterns = new List<string> { "Room *" }
|
||||
};
|
||||
|
||||
// Act
|
||||
TimeLocationParser.Parse("10:00 a.m. Unknown Location", locationConfig,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("10:00 a.m. Unknown Location",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("10:00 a.m."));
|
||||
Assert.That(location, Is.EqualTo("Unknown Location"));
|
||||
Assert.That(locationParseSuccess, Is.False);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_LocationWithTimeComponent_CleansTimeComponent()
|
||||
{
|
||||
// Arrange
|
||||
var locationConfig = new LocationParsingConfiguration
|
||||
{
|
||||
LocationPatterns = new List<string> { "Exhibit Hall *" }
|
||||
};
|
||||
|
||||
// Act
|
||||
TimeLocationParser.Parse("10:00 a.m. - 12:15 p.m. Exhibit Hall C", locationConfig,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("10:00 a.m. - 12:15 p.m. Exhibit Hall C",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("10:00 a.m. - 12:15 p.m."));
|
||||
@@ -121,16 +79,15 @@ public class TimeLocationParser_Tests
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_NoLocationConfig_StillExtractsTimeAndLocation()
|
||||
public void Parse_AnyLocation_ExtractsAsIs()
|
||||
{
|
||||
// Act
|
||||
TimeLocationParser.Parse("3:00 p.m. Room A", null,
|
||||
out string time, out string location, out bool locationParseSuccess);
|
||||
TimeLocationParser.Parse("3:00 p.m. Room A",
|
||||
out string time, out string location);
|
||||
|
||||
// Assert
|
||||
Assert.That(time, Is.EqualTo("3:00 p.m."));
|
||||
Assert.That(location, Is.EqualTo("Room A"));
|
||||
Assert.That(locationParseSuccess, Is.False); // No patterns to match against
|
||||
}
|
||||
|
||||
[Test]
|
||||
|
||||
@@ -156,18 +156,19 @@ public class EventOccurrenceParserIssues_Tests
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_LocationParseFailure_ReportsIssue()
|
||||
public void Parse_LocationExtraction_WorksWithoutPatterns()
|
||||
{
|
||||
// Arrange
|
||||
// Locations that don't match "Room *" or "Hall *" patterns
|
||||
// The timeLocationRegex needs to match to extract location, so we need valid time format
|
||||
var testContent = "Test Event March 15 2:00 p.m. Auditorium A\n" + // Doesn't match Room * or Hall *
|
||||
"Test Event March 15 3:00 p.m. Room 101\n" + // This should match "Room *"
|
||||
"Test Event March 15 4:00 p.m. Conference Center"; // Doesn't match any pattern
|
||||
// Test that locations are extracted correctly without pattern matching
|
||||
// Locations should be extracted as everything after the time
|
||||
var testContent = "Test Event - MS\n" +
|
||||
"Submit Entry March 15 2:00 p.m. Auditorium A\n" +
|
||||
"Judging March 15 3:00 p.m. Room 101\n" +
|
||||
"Pick-up March 15 4:00 p.m. Conference Center\n" +
|
||||
"Final March 15 5:00 p.m."; // No location
|
||||
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
|
||||
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
|
||||
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *", "Hall *");
|
||||
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -175,32 +176,16 @@ public class EventOccurrenceParserIssues_Tests
|
||||
var result = parser.Parse();
|
||||
|
||||
// Assert
|
||||
// Should have location parse failures for unmatched locations
|
||||
// Note: Location issues are only reported when:
|
||||
// 1. Time/location regex matches (can extract location)
|
||||
// 2. Location part is not empty
|
||||
// 3. Patterns are configured
|
||||
// 4. No pattern matches
|
||||
var locationIssues = result.Issues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
|
||||
// Should parse successfully - location parsing no longer uses patterns, locations are extracted as-is
|
||||
// Verify that locations are extracted correctly without pattern validation
|
||||
|
||||
// The parser should report location parse failures for "Auditorium A" and "Conference Center"
|
||||
// But only if the timeLocationRegex successfully extracts them as locations
|
||||
if (locationIssues.Any())
|
||||
{
|
||||
foreach (var issue in locationIssues)
|
||||
{
|
||||
Assert.That(issue.Message, Does.Contain("does not match any configured pattern"));
|
||||
}
|
||||
|
||||
// Verify that "Room 101" was parsed successfully (no issue for it)
|
||||
Assert.That(locationIssues, Has.None.Matches<ParsingIssue>(i => i.LineContent.Contains("Room 101")));
|
||||
}
|
||||
else
|
||||
{
|
||||
// If no location issues, it might be because the regex didn't extract locations properly
|
||||
// This is still a valid test - we're verifying the parser behavior
|
||||
Assert.Pass("Location parsing may not extract locations in all cases - this is acceptable behavior");
|
||||
}
|
||||
// Verify that locations are extracted correctly
|
||||
var occurrences = result.Occurrences.Values.SelectMany(list => list).ToList();
|
||||
Assert.That(occurrences, Has.Count.GreaterThan(0), "Should parse at least some occurrences");
|
||||
|
||||
// Verify locations are extracted
|
||||
var locations = occurrences.Select(eo => eo.Location).Where(loc => !string.IsNullOrWhiteSpace(loc)).ToList();
|
||||
Assert.That(locations, Has.Count.GreaterThan(0), "Should extract at least some locations");
|
||||
}
|
||||
finally
|
||||
{
|
||||
@@ -216,12 +201,11 @@ public class EventOccurrenceParserIssues_Tests
|
||||
"Unknown Event March 15 2:00 p.m. Room 101\n" + // MissingEventDefinition
|
||||
"Test Event February 30 2:00 p.m. Room 101\n" + // DateParseFailure (invalid date)
|
||||
"Test Event March 15 invalid time format Room 101\n" + // TimeParseFailure (no AM/PM)
|
||||
"Test Event March 15 3:00 p.m. Unmatched Location\n" + // LocationParseFailure (if location extracted)
|
||||
"Test Event March 15 3:00 p.m. Unmatched Location\n" + // Location extracted as-is (no validation)
|
||||
"Valid Event March 20 4:00 p.m. Room 202"; // Valid line
|
||||
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
|
||||
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Valid Event"), EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
|
||||
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *");
|
||||
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -349,9 +333,8 @@ public class EventOccurrenceParserIssues_Tests
|
||||
EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event"),
|
||||
EventOccurrenceParserTestHelpers.CreateTestEvent("Another Event")
|
||||
};
|
||||
// All locations match the patterns
|
||||
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *", "Hall *");
|
||||
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
|
||||
// Locations are extracted without pattern matching
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -378,26 +361,15 @@ public class EventOccurrenceParserIssues_Tests
|
||||
// Note: If the test event wasn't parsed, it might be due to location parsing or other edge cases
|
||||
// The important thing is that the parser doesn't crash and processes the input
|
||||
|
||||
// Verify no location parse failures for locations that match patterns
|
||||
// Note: Location parsing only reports failures when:
|
||||
// 1. Location is successfully extracted from time/location string
|
||||
// 2. Patterns are configured
|
||||
// 3. No pattern matches
|
||||
// If location isn't extracted, no issue is created (which is also acceptable)
|
||||
var locationIssues = result.Issues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
|
||||
|
||||
// Verify that locations that match patterns don't create issues
|
||||
// "Room 101" should match "Room *", "Hall A" and "Hall B" should match "Hall *"
|
||||
// Note: The parser might create location issues if the location extraction doesn't work perfectly,
|
||||
// but we verify that at least the test event lines don't create false positives
|
||||
var locationIssuesForTestEvents = locationIssues.Where(i =>
|
||||
i.LineContent.Contains("Test Event") && i.LineContent.Contains("Room 101") ||
|
||||
i.LineContent.Contains("Another Event") && i.LineContent.Contains("Hall B")).ToList();
|
||||
|
||||
// The important thing is that matching locations for our test events don't create false positives
|
||||
// "Opening Session" might have different behavior since it's in GeneralSchedule section
|
||||
Assert.That(locationIssuesForTestEvents, Has.Count.EqualTo(0),
|
||||
"Should have no location parse failures for test event locations that match configured patterns");
|
||||
// Verify locations are extracted correctly (pattern matching is no longer used)
|
||||
var testEventOccurrence = result.Occurrences.ContainsKey(testEvent)
|
||||
? result.Occurrences[testEvent].FirstOrDefault()
|
||||
: null;
|
||||
if (testEventOccurrence != null)
|
||||
{
|
||||
Assert.That(testEventOccurrence.Location, Is.EqualTo("Room 101"),
|
||||
"Location should be extracted correctly without pattern matching");
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
@@ -419,8 +391,7 @@ public class EventOccurrenceParserIssues_Tests
|
||||
// For General Schedule section, we don't need a specific event definition
|
||||
// The parser will use EventDefinition.GeneralSchedule
|
||||
var events = Array.Empty<EventDefinition>();
|
||||
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Mtg. Room *", "Room *");
|
||||
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -477,14 +448,13 @@ public class EventOccurrenceParserIssues_Tests
|
||||
var months = new[] { "January", "February", "March", "April", "May", "June",
|
||||
"July", "August", "September", "October", "November", "December" };
|
||||
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
|
||||
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *");
|
||||
|
||||
foreach (var month in months)
|
||||
{
|
||||
var testContent = $"Test Event – MS\n" +
|
||||
$"Submit Entry {month} 15 3:00 p.m. Room A";
|
||||
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
|
||||
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
@@ -96,55 +96,6 @@ public class EventOccurrenceParser_Tests
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes location parsing failures and extracts common patterns.
|
||||
/// </summary>
|
||||
private static Dictionary<string, int> AnalyzeLocationFailures(
|
||||
List<ParsingIssue> locationIssues, List<string> fileLines)
|
||||
{
|
||||
var locationPatterns = new Dictionary<string, int>();
|
||||
|
||||
foreach (var issue in locationIssues)
|
||||
{
|
||||
// Try to extract the location part from the line
|
||||
// The format is typically: "EventName Month Day Time Location"
|
||||
var parts = issue.LineContent.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
// Look for location-like strings (usually after time)
|
||||
// This is a heuristic - we'll look for parts that don't match date/time patterns
|
||||
var timeRegex = new System.Text.RegularExpressions.Regex(
|
||||
@"\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?|NOON");
|
||||
|
||||
bool foundTime = false;
|
||||
var locationParts = new List<string>();
|
||||
|
||||
foreach (var part in parts)
|
||||
{
|
||||
if (timeRegex.IsMatch(part) || part == "NOON")
|
||||
{
|
||||
foundTime = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (foundTime && !string.IsNullOrWhiteSpace(part))
|
||||
{
|
||||
locationParts.Add(part);
|
||||
}
|
||||
}
|
||||
|
||||
if (locationParts.Any())
|
||||
{
|
||||
var location = string.Join(" ", locationParts).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(location))
|
||||
{
|
||||
locationPatterns.TryGetValue(location, out var count);
|
||||
locationPatterns[location] = count + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return locationPatterns;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Counts HS vs MS event sections in the file.
|
||||
@@ -300,8 +251,7 @@ public class EventOccurrenceParser_Tests
|
||||
// Arrange
|
||||
var events = TestEntityHandler.GetEvents();
|
||||
var fileInfo = TestEntityHandler.GetEventOccurrenceNationalsFileInfo();
|
||||
var locationConfig = LocationParsingConfiguration.Default;
|
||||
var parser = new EventOccurrenceParser(fileInfo, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(fileInfo, events);
|
||||
|
||||
// Act
|
||||
var result = parser.Parse();
|
||||
@@ -369,19 +319,7 @@ public class EventOccurrenceParser_Tests
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern Analysis
|
||||
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
|
||||
if (locationFailures.Any())
|
||||
{
|
||||
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
|
||||
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
|
||||
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
|
||||
Console.WriteLine($"Top unmatched location strings:");
|
||||
foreach (var loc in topLocations)
|
||||
{
|
||||
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
|
||||
}
|
||||
}
|
||||
// Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed)
|
||||
|
||||
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
|
||||
if (unmatchedLines.Any())
|
||||
@@ -408,8 +346,7 @@ public class EventOccurrenceParser_Tests
|
||||
// Arrange
|
||||
var events = TestEntityHandler.GetEvents();
|
||||
var fileInfo = TestEntityHandler.GetEventOccurrenceStateFileInfo();
|
||||
var locationConfig = LocationParsingConfiguration.Default;
|
||||
var parser = new EventOccurrenceParser(fileInfo, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(fileInfo, events);
|
||||
|
||||
// Act
|
||||
var result = parser.Parse();
|
||||
@@ -477,19 +414,7 @@ public class EventOccurrenceParser_Tests
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern Analysis
|
||||
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
|
||||
if (locationFailures.Any())
|
||||
{
|
||||
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
|
||||
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
|
||||
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
|
||||
Console.WriteLine($"Top unmatched location strings:");
|
||||
foreach (var loc in topLocations)
|
||||
{
|
||||
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
|
||||
}
|
||||
}
|
||||
// Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed)
|
||||
|
||||
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
|
||||
if (unmatchedLines.Any())
|
||||
@@ -516,8 +441,7 @@ public class EventOccurrenceParser_Tests
|
||||
// Arrange
|
||||
var events = TestEntityHandler.GetEvents();
|
||||
var fileInfo = TestEntityHandler.GetEventOccurrenceState2024FileInfo();
|
||||
var locationConfig = LocationParsingConfiguration.Default;
|
||||
var parser = new EventOccurrenceParser(fileInfo, events, locationConfig);
|
||||
var parser = new EventOccurrenceParser(fileInfo, events);
|
||||
|
||||
// Act
|
||||
var result = parser.Parse();
|
||||
@@ -585,19 +509,7 @@ public class EventOccurrenceParser_Tests
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern Analysis
|
||||
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
|
||||
if (locationFailures.Any())
|
||||
{
|
||||
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
|
||||
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
|
||||
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
|
||||
Console.WriteLine($"Top unmatched location strings:");
|
||||
foreach (var loc in topLocations)
|
||||
{
|
||||
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
|
||||
}
|
||||
}
|
||||
// Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed)
|
||||
|
||||
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
|
||||
if (unmatchedLines.Any())
|
||||
@@ -617,4 +529,124 @@ public class EventOccurrenceParser_Tests
|
||||
// Test passes if no exceptions were thrown
|
||||
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues ({fixableIssues.Count} fixable)");
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_Section_Lines64To92_ChildrensStoriesToConstructionChallenge()
|
||||
{
|
||||
// Arrange
|
||||
// Extract lines 64-92 from the test file - contains MS and HS events with various formats
|
||||
var allLines = File.ReadAllLines(TestEntityHandler.GetEventOccurrenceStateFileInfo().FullName);
|
||||
var sectionLines = allLines.Skip(63).Take(29).ToArray(); // Lines 64-92 (0-indexed: 63-91)
|
||||
var sectionContent = string.Join("\n", sectionLines);
|
||||
|
||||
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(sectionContent);
|
||||
var events = TestEntityHandler.GetEvents();
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
// Act
|
||||
var result = parser.Parse();
|
||||
|
||||
// Assert - Should parse without exceptions
|
||||
Assert.That(result, Is.Not.Null, "Parser should return a result");
|
||||
|
||||
// Count occurrences by event type
|
||||
var totalOccurrences = result.Occurrences.Values.Sum(list => list.Count);
|
||||
|
||||
// Verify MS events are parsed
|
||||
var childrensStories = events.FirstOrDefault(e => e.Name.Contains("Children's Stories", StringComparison.OrdinalIgnoreCase));
|
||||
var coding = events.FirstOrDefault(e => e.Name == "Coding");
|
||||
var communityServiceVideo = events.FirstOrDefault(e => e.Name.Contains("Community Service Video", StringComparison.OrdinalIgnoreCase));
|
||||
var constructionChallenge = events.FirstOrDefault(e => e.Name.Contains("Construction Challenge", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// Count expected MS occurrences:
|
||||
// Children's Stories – MS: 5 occurrences (lines 65-69)
|
||||
// Coding – MS: 2 occurrences (lines 76-77)
|
||||
// Community Service Video – MS: 4 occurrences (lines 79-82)
|
||||
// Construction Challenge – MS: 5 occurrences (lines 88-92)
|
||||
// Total expected MS occurrences: 16
|
||||
|
||||
var msEventCount = 0;
|
||||
if (childrensStories != null && result.Occurrences.ContainsKey(childrensStories))
|
||||
msEventCount += result.Occurrences[childrensStories].Count;
|
||||
if (coding != null && result.Occurrences.ContainsKey(coding))
|
||||
msEventCount += result.Occurrences[coding].Count;
|
||||
if (communityServiceVideo != null && result.Occurrences.ContainsKey(communityServiceVideo))
|
||||
msEventCount += result.Occurrences[communityServiceVideo].Count;
|
||||
if (constructionChallenge != null && result.Occurrences.ContainsKey(constructionChallenge))
|
||||
msEventCount += result.Occurrences[constructionChallenge].Count;
|
||||
|
||||
// Verify HS events are skipped gracefully (no issues should be created for them)
|
||||
var hsIssues = result.Issues.Where(i =>
|
||||
i.LineContent.Contains("Coding – HS") ||
|
||||
i.LineContent.Contains("CAD") && i.LineContent.Contains("HS") ||
|
||||
i.LineNumber >= 72 && i.LineNumber <= 86 && IsHighSchoolEvent(i.LineContent)
|
||||
).ToList();
|
||||
|
||||
// Verify HS section headers are tracked in SkippedHSSectionHeaders
|
||||
var skippedHSHeaders = result.SkippedHSSectionHeaders;
|
||||
|
||||
// Verify continuation lines are skipped
|
||||
// Line 70 starts with "*The" - this enters continuation mode and both line 70 and 71 should be skipped
|
||||
var continuationLineIssues = result.Issues.Where(i =>
|
||||
i.LineContent.Contains("books of semifinalist teams") ||
|
||||
i.LineContent.Contains("be returned to teams")
|
||||
).ToList();
|
||||
|
||||
// Verify specific time formats are parsed correctly
|
||||
var noonOccurrence = result.Occurrences.Values
|
||||
.SelectMany(list => list)
|
||||
.FirstOrDefault(eo => eo.Time.Contains("NOON", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var lateTimeOccurrence = result.Occurrences.Values
|
||||
.SelectMany(list => list)
|
||||
.FirstOrDefault(eo => eo.Time.Contains("11:59", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// Output detailed analysis
|
||||
Console.WriteLine($"\n=== Section Lines 64-92 Parsing Results ===");
|
||||
Console.WriteLine($"Total occurrences parsed: {totalOccurrences}");
|
||||
Console.WriteLine($"MS event occurrences: {msEventCount}");
|
||||
Console.WriteLine($"Total issues: {result.Issues.Count}");
|
||||
Console.WriteLine($"HS-related issues: {hsIssues.Count}");
|
||||
Console.WriteLine($"Skipped HS section headers: {skippedHSHeaders.Count}");
|
||||
Console.WriteLine($"Continuation line issues: {continuationLineIssues.Count}");
|
||||
|
||||
Console.WriteLine($"\n--- Issue Types ---");
|
||||
foreach (var issueType in result.Issues.GroupBy(i => i.IssueType))
|
||||
{
|
||||
Console.WriteLine($" {issueType.Key}: {issueType.Count()}");
|
||||
}
|
||||
|
||||
// Assertions
|
||||
Assert.That(totalOccurrences, Is.GreaterThan(0), "Should parse at least some occurrences");
|
||||
Assert.That(msEventCount, Is.GreaterThanOrEqualTo(14), "Should parse most MS occurrences (at least 14 out of 16)");
|
||||
// HS events should not create issues - they should be skipped gracefully
|
||||
Assert.That(hsIssues, Has.Count.EqualTo(0), "HS events should be skipped gracefully without creating issues");
|
||||
// HS section headers should be tracked
|
||||
Assert.That(skippedHSHeaders, Has.Count.GreaterThanOrEqualTo(2), "Should track at least 2 HS section headers (Coding - HS, CAD Architecture - HS, CAD Engineering - HS)");
|
||||
// Line 70 (starts with "*The") enters continuation mode and both line 70 and 71 should be skipped without issues
|
||||
Assert.That(continuationLineIssues, Has.Count.EqualTo(0),
|
||||
"Continuation lines starting with '*' and subsequent lines should be skipped without issues");
|
||||
Assert.That(noonOccurrence, Is.Not.Null, "Should parse NOON time format");
|
||||
Assert.That(lateTimeOccurrence, Is.Not.Null, "Should parse 11:59 p.m. time format");
|
||||
|
||||
// Verify specific locations are parsed
|
||||
if (childrensStories != null && result.Occurrences.ContainsKey(childrensStories))
|
||||
{
|
||||
var locations = result.Occurrences[childrensStories]
|
||||
.Select(eo => eo.Location)
|
||||
.Where(loc => !string.IsNullOrWhiteSpace(loc))
|
||||
.ToList();
|
||||
Assert.That(locations, Has.Count.GreaterThan(0), "Children's Stories should have locations parsed");
|
||||
}
|
||||
|
||||
// Test passes with detailed information
|
||||
Assert.Pass($"Successfully parsed section: {totalOccurrences} occurrences, {result.Issues.Count} issues, {msEventCount} MS events");
|
||||
}
|
||||
finally
|
||||
{
|
||||
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user