Enhance event occurrence parsing to skip unmatched high school section headers

This commit introduces a new property to track skipped high school section headers in the EventOccurrenceParseResult and EventOccurrenceParserResult classes. The EventOccurrenceParser has been updated to gracefully skip HS section headers that do not match any event definitions, improving the parsing logic. Additionally, the LocationParsingConfiguration has been removed from the EventOccurrenceParser, simplifying its constructor. Unit tests have been updated to reflect these changes and ensure correct behavior during parsing.
This commit is contained in:
2026-01-09 00:14:19 -05:00
parent f916cfad6b
commit 19e5ef0675
10 changed files with 279 additions and 372 deletions
+7 -6
View File
@@ -30,6 +30,12 @@ public class EventOccurrenceParseResult
/// </summary>
public List<ParsingIssue> Issues { get; set; } = new();
/// <summary>
/// List of high school (HS) section headers that were encountered but skipped
/// because they don't match any event definition in the system.
/// </summary>
public List<string> SkippedHSSectionHeaders { get; set; } = new();
/// <summary>
/// Total number of event occurrences successfully parsed.
/// </summary>
@@ -110,11 +116,6 @@ public enum ParsingIssueType
/// <summary>
/// Invalid format or other parsing issue.
/// </summary>
InvalidFormat,
/// <summary>
/// Location parsing failed (no matching pattern found).
/// </summary>
LocationParseFailure
InvalidFormat
}
+5 -19
View File
@@ -24,33 +24,19 @@ public static class LineClassifier
/// <summary>
/// Determines if a line is a continuation/wrapped line that should be skipped.
/// These are typically lines that:
/// - Start with lowercase or special characters (not event names)
/// - Start with "*" (marks the start of a continuation block)
/// - Are parenthetical notes like "(Semifinalists only)"
/// - Are informational text like "Schedule Posted on..."
/// </summary>
public static bool IsContinuationLine(string line)
{
var trimmed = line.Trim();
// Skip parenthetical notes
if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal))
// Check if line starts with "*" (marks continuation block start)
if (trimmed.StartsWith("*", StringComparison.Ordinal))
return true;
// Skip lines that are clearly continuation text (start with lowercase, common continuation words)
if (trimmed.Length > 0 && char.IsLower(trimmed[0]))
{
// Check if it starts with common continuation words
var continuationPrefixes = new[] { "be ", "the ", "and ", "or ", "to ", "a ", "an ", "will ", "may ", "can " };
foreach (var prefix in continuationPrefixes)
{
if (trimmed.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
return true;
}
}
// Skip informational lines that don't contain dates/times
if (trimmed.Contains("Schedule Posted", StringComparison.OrdinalIgnoreCase) ||
trimmed.Contains("Note:", StringComparison.OrdinalIgnoreCase))
// Skip parenthetical notes
if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal))
return true;
return false;
@@ -1,11 +1,10 @@
using System.Text.RegularExpressions;
using Core.Models;
namespace Core.Parsers.EventOccurrence;
/// <summary>
/// Parses time and location from combined strings.
/// Handles time ranges, location extraction, and pattern matching.
/// Extracts time using regex, then uses everything after the time as the location.
/// </summary>
public static class TimeLocationParser
{
@@ -28,21 +27,18 @@ public static class TimeLocationParser
RegexOptions.Compiled | RegexOptions.IgnoreCase);
/// <summary>
/// Parses time and location from the timeAndLocation string using configurable location patterns.
/// Parses time and location from the timeAndLocation string.
/// Extracts time using regex, then uses everything after the time as the location (after cleaning time fragments).
/// </summary>
/// <param name="timeAndLocation">The combined time and location string.</param>
/// <param name="locationConfig">The location parsing configuration with patterns.</param>
/// <param name="time">Output parameter: the parsed time string.</param>
/// <param name="location">Output parameter: the parsed location string.</param>
/// <param name="locationParseSuccess">Output parameter: whether location parsing was successful.</param>
public static void Parse(
string timeAndLocation,
LocationParsingConfiguration? locationConfig,
out string time,
out string location,
out bool locationParseSuccess)
out string location)
{
// Try to separate time from location using the time regex
// Extract time using regex
var timeLocationMatch = TimeLocationRegex.Match(timeAndLocation);
if (!timeLocationMatch.Success)
@@ -50,7 +46,6 @@ public static class TimeLocationParser
// If time regex doesn't match, use the whole string as time
time = timeAndLocation.Trim();
location = string.Empty;
locationParseSuccess = false;
return;
}
@@ -63,61 +58,12 @@ public static class TimeLocationParser
if (string.IsNullOrWhiteSpace(locationPart))
{
location = string.Empty;
locationParseSuccess = true; // Consider it a success since no location is needed
return;
}
// Clean up location part - remove any remaining time components
// Clean location of any remaining time fragments
// (e.g., " 12:15 p.m. Exhibit Hall C" -> "Exhibit Hall C")
locationPart = CleanLocationText(locationPart);
if (string.IsNullOrWhiteSpace(locationPart))
{
location = string.Empty;
locationParseSuccess = true; // No location after cleaning is also valid
return;
}
// Try to match location using configurable patterns
(location, locationParseSuccess) = TryMatchLocation(locationPart, locationConfig);
// If no pattern matched but we have a location, use it anyway
// This allows parsing to continue while still tracking that the location didn't match a pattern
if (!locationParseSuccess)
{
location = locationPart;
}
}
/// <summary>
/// Attempts to match a location string against configured patterns.
/// </summary>
private static (string location, bool success) TryMatchLocation(
string locationPart,
LocationParsingConfiguration? locationConfig)
{
// No patterns configured - can't match
if (locationConfig == null || !locationConfig.LocationPatterns.Any())
{
return (string.Empty, false);
}
// Try initial match
var location = LocationPatternMatcher.Match(locationPart, locationConfig.LocationPatterns);
if (!string.IsNullOrEmpty(location))
{
return (location, true);
}
// Try matching against trimmed version (handles extra whitespace)
var cleanedForMatching = locationPart.Trim();
location = LocationPatternMatcher.Match(cleanedForMatching, locationConfig.LocationPatterns);
if (!string.IsNullOrEmpty(location))
{
return (cleanedForMatching, true);
}
return (string.Empty, false);
location = CleanLocationText(locationPart);
}
/// <summary>
+65 -22
View File
@@ -13,19 +13,18 @@ public class EventOccurrenceParserResult
{
public IDictionary<EventDefinition, List<Entities.EventOccurrence>> Occurrences { get; set; } = new Dictionary<EventDefinition, List<Entities.EventOccurrence>>();
public List<ParsingIssue> Issues { get; set; } = new();
public List<string> SkippedHSSectionHeaders { get; set; } = new();
}
public class EventOccurrenceParser
{
private FileSystemInfo _txtFile;
private ICollection<EventDefinition> _events;
private LocationParsingConfiguration? _locationConfig;
public EventOccurrenceParser(FileSystemInfo txtFile, ICollection<EventDefinition> events, LocationParsingConfiguration? locationConfig = null)
public EventOccurrenceParser(FileSystemInfo txtFile, ICollection<EventDefinition> events)
{
_events = events;
_txtFile = txtFile;
_locationConfig = locationConfig;
}
public EventOccurrenceParserResult Parse()
@@ -34,6 +33,8 @@ public class EventOccurrenceParser
var occurrences = result.Occurrences;
var issues = result.Issues;
EventDefinition? currentEventDefinition = null;
bool inContinuationMode = false;
bool inHSSection = false;
var lines = File.ReadLines(_txtFile.FullName);
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
@@ -44,11 +45,19 @@ public class EventOccurrenceParser
// Skip empty lines
if (EventOccurrenceParsers.LineClassifier.IsEmptyLine(normalizedLine))
{
// Empty lines break continuation mode
inContinuationMode = false;
continue;
}
// Skip comment lines (starting with "#") - use grammar parser
if (EventOccurrenceParsers.LineClassifier.IsCommentLine(normalizedLine))
{
// Comment lines break continuation mode
inContinuationMode = false;
continue;
}
// Try to parse occurrence line using grammar parser
var occurrenceLine = EventOccurrenceGrammar.TryParseOccurrenceLine(normalizedLine);
@@ -61,10 +70,23 @@ public class EventOccurrenceParser
{
var (eventNamePart, schoolLevel) = sectionHeader.Value;
// Section headers break continuation mode
inContinuationMode = false;
// Use fuzzy matching to find the best matching event definition
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
if (evt == null)
{
// Check if this is an HS event - if so, skip gracefully
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
{
result.SkippedHSSectionHeaders.Add(normalizedLine);
currentEventDefinition = null; // Skip subsequent occurrences
inHSSection = true; // Mark that we're in an HS section
continue; // No issue created
}
// For non-HS unmatched headers, create issue as before
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events);
issues.Add(new ParsingIssue
{
@@ -76,12 +98,16 @@ public class EventOccurrenceParser
continue;
}
currentEventDefinition = evt;
inHSSection = false; // Reset HS section flag for MS events
continue;
}
// Check for General Schedule/Session using grammar parser
if (EventOccurrenceParsers.SectionHeaderMatcher.IsGeneralSchedule(normalizedLine))
{
// General schedule breaks continuation mode
inContinuationMode = false;
inHSSection = false; // Reset HS section flag
currentEventDefinition = EventDefinition.GeneralSchedule;
continue;
}
@@ -89,9 +115,22 @@ public class EventOccurrenceParser
// Also check for simple "MS" or "HS" in line (backward compatibility)
if (EventOccurrenceParsers.SectionHeaderMatcher.HasSchoolLevel(normalizedLine))
{
// Section headers break continuation mode
inContinuationMode = false;
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
if (evt == null)
{
// Check if this is an HS event - if so, skip gracefully
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
{
result.SkippedHSSectionHeaders.Add(normalizedLine);
currentEventDefinition = null; // Skip subsequent occurrences
inHSSection = true; // Mark that we're in an HS section
continue; // No issue created
}
// For non-HS unmatched headers, create issue as before
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events);
issues.Add(new ParsingIssue
{
@@ -103,11 +142,18 @@ public class EventOccurrenceParser
continue;
}
currentEventDefinition = evt;
inHSSection = false; // Reset HS section flag for MS events
continue;
}
// Skip continuation lines (lines that look like they're continuing from previous line)
if (EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine))
// Check if line starts with "*" to enter continuation mode
if (normalizedLine.TrimStart().StartsWith("*", StringComparison.Ordinal))
{
inContinuationMode = true;
}
// Skip continuation lines (in continuation mode OR line starts with "*" or is parenthetical)
if (inContinuationMode || EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine))
{
continue;
}
@@ -127,6 +173,15 @@ public class EventOccurrenceParser
continue;
}
// Occurrence lines break continuation mode
inContinuationMode = false;
// Skip occurrences under HS sections (they won't match any event definition)
if (inHSSection)
{
continue;
}
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
// Remove weekday suffix from occurrence name if present
@@ -151,23 +206,8 @@ public class EventOccurrenceParser
// timeAndLocation is already normalized (hyphens normalized) since normalizedLine was sanitized
// Parse time and location using configurable patterns
EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, _locationConfig, out string time, out string location, out bool locationParseSuccess);
// Track location parsing failure if patterns are configured but none matched
if (!locationParseSuccess && !string.IsNullOrWhiteSpace(location))
{
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
{
issues.Add(new ParsingIssue
{
LineNumber = index,
LineContent = normalizedLine,
IssueType = ParsingIssueType.LocationParseFailure,
Message = $"Location '{location}' does not match any configured pattern"
});
}
}
// Parse time and location - extract time using regex, then use everything after time as location
EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, out string time, out string location);
// Parse date
DateOnly? startDate = null;
@@ -222,6 +262,9 @@ public class EventOccurrenceParser
if (!occurrences.ContainsKey(eventDefinition))
occurrences.Add(eventDefinition, []);
occurrences[eventDefinition].Add(eventOccurrence);
// Reset HS section flag when we successfully parse an occurrence (means we're in a valid section)
inHSSection = false;
}
return result;
+6 -14
View File
@@ -12,20 +12,9 @@ namespace Core.Services;
/// </summary>
public class EventOccurrenceParserService : IEventOccurrenceParserService
{
private readonly LocationParsingConfiguration? _locationConfig;
public EventOccurrenceParserService(IConfiguration? configuration = null)
{
// Load location parsing configuration from IConfiguration if provided
if (configuration != null)
{
_locationConfig = configuration.GetSection("LocationParsingSettings").Get<LocationParsingConfiguration>()
?? LocationParsingConfiguration.Default;
}
else
{
_locationConfig = LocationParsingConfiguration.Default;
}
// Configuration parameter kept for backward compatibility but not used
}
/// <inheritdoc/>
@@ -48,8 +37,8 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService
File.WriteAllText(tempFile, text, Encoding.UTF8);
var fileInfo = new FileInfo(tempFile);
// Use the existing EventOccurrenceParser with location configuration
var parser = new EventOccurrenceParser(fileInfo, events, _locationConfig);
// Use the existing EventOccurrenceParser
var parser = new EventOccurrenceParser(fileInfo, events);
var parserResult = parser.Parse();
// Copy occurrences from parser result
@@ -101,6 +90,9 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService
// Copy parsing issues from parser result
result.Issues.AddRange(parserResult.Issues);
// Copy skipped HS section headers from parser result
result.SkippedHSSectionHeaders.AddRange(parserResult.SkippedHSSectionHeaders);
}
finally
{
@@ -51,42 +51,23 @@ public class LineClassifier_Tests
}
[Test]
public void IsContinuationLine_StartsWithLowercaseContinuationWord_ReturnsTrue()
public void IsContinuationLine_StartsWithAsterisk_ReturnsTrue()
{
// Act & Assert
Assert.That(LineClassifier.IsContinuationLine("the event will be held"), Is.True);
Assert.That(LineClassifier.IsContinuationLine("and participants should arrive"), Is.True);
Assert.That(LineClassifier.IsContinuationLine("be sure to register"), Is.True);
Assert.That(LineClassifier.IsContinuationLine("or contact the coordinator"), Is.True);
}
[Test]
public void IsContinuationLine_StartsWithLowercase_NonContinuationWord_ReturnsFalse()
{
// Act & Assert
Assert.That(LineClassifier.IsContinuationLine("important: bring materials"), Is.False);
}
[Test]
public void IsContinuationLine_StartsWithUppercase_ReturnsFalse()
{
// Act & Assert
Assert.That(LineClassifier.IsContinuationLine("Important Event March 15"), Is.False);
}
[Test]
public void IsContinuationLine_ContainsSchedulePosted_ReturnsTrue()
{
// Act & Assert
Assert.That(LineClassifier.IsContinuationLine("Schedule Posted on website"), Is.True);
}
[Test]
public void IsContinuationLine_ContainsNote_ReturnsTrue()
{
// Act & Assert
Assert.That(LineClassifier.IsContinuationLine("Note: Additional information"), Is.True);
Assert.That(LineClassifier.IsContinuationLine("*The books of semifinalist teams"), Is.True);
Assert.That(LineClassifier.IsContinuationLine("*Note: Important details"), Is.True);
Assert.That(LineClassifier.IsContinuationLine("*This is a continuation line"), Is.True);
Assert.That(LineClassifier.IsContinuationLine(" *Line with leading whitespace"), Is.True);
}
[Test]
public void IsContinuationLine_DoesNotStartWithAsterisk_ReturnsFalse()
{
// Act & Assert
Assert.That(LineClassifier.IsContinuationLine("The event will be held"), Is.False);
Assert.That(LineClassifier.IsContinuationLine("Note: Additional information"), Is.False);
Assert.That(LineClassifier.IsContinuationLine("Important Event March 15"), Is.False);
Assert.That(LineClassifier.IsContinuationLine("Schedule Posted on website"), Is.False);
}
[Test]
@@ -1,4 +1,3 @@
using Core.Models;
using Core.Parsers.EventOccurrence;
using NUnit.Framework;
@@ -10,110 +9,69 @@ public class TimeLocationParser_Tests
[Test]
public void Parse_TimeAndLocation_ExtractsBoth()
{
// Arrange
var locationConfig = new LocationParsingConfiguration
{
LocationPatterns = new List<string> { "Room *" }
};
// Act
TimeLocationParser.Parse("10:30 a.m. Room 101", locationConfig,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("10:30 a.m. Room 101",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("10:30 a.m."));
Assert.That(location, Is.EqualTo("Room 101"));
Assert.That(locationParseSuccess, Is.True);
}
[Test]
public void Parse_TimeRangeAndLocation_ExtractsTimeRangeAndLocation()
{
// Arrange
var locationConfig = new LocationParsingConfiguration
{
LocationPatterns = new List<string> { "Room *" }
};
// Act
TimeLocationParser.Parse("10:00 a.m. - 12:00 p.m. Room 202", locationConfig,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("10:00 a.m. - 12:00 p.m. Room 202",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("10:00 a.m. - 12:00 p.m."));
Assert.That(location, Is.EqualTo("Room 202"));
Assert.That(locationParseSuccess, Is.True);
}
[Test]
public void Parse_NOONAndLocation_ExtractsBoth()
{
// Arrange
var locationConfig = new LocationParsingConfiguration
{
LocationPatterns = new List<string> { "Hall *" }
};
// Act
TimeLocationParser.Parse("NOON Hall C", locationConfig,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("NOON Hall C",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("NOON"));
Assert.That(location, Is.EqualTo("Hall C"));
Assert.That(locationParseSuccess, Is.True);
}
[Test]
public void Parse_TimeOnly_NoLocation()
{
// Arrange
var locationConfig = new LocationParsingConfiguration
{
LocationPatterns = new List<string> { "Room *" }
};
// Act
TimeLocationParser.Parse("3:00 p.m.", locationConfig,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("3:00 p.m.",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("3:00 p.m."));
Assert.That(location, Is.Empty);
Assert.That(locationParseSuccess, Is.True); // No location is valid
}
[Test]
public void Parse_LocationNotMatchingPattern_StillReturnsLocation_ReportsFailure()
public void Parse_AnyLocation_ExtractsLocationWithoutValidation()
{
// Arrange
var locationConfig = new LocationParsingConfiguration
{
LocationPatterns = new List<string> { "Room *" }
};
// Act
TimeLocationParser.Parse("10:00 a.m. Unknown Location", locationConfig,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("10:00 a.m. Unknown Location",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("10:00 a.m."));
Assert.That(location, Is.EqualTo("Unknown Location"));
Assert.That(locationParseSuccess, Is.False);
}
[Test]
public void Parse_LocationWithTimeComponent_CleansTimeComponent()
{
// Arrange
var locationConfig = new LocationParsingConfiguration
{
LocationPatterns = new List<string> { "Exhibit Hall *" }
};
// Act
TimeLocationParser.Parse("10:00 a.m. - 12:15 p.m. Exhibit Hall C", locationConfig,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("10:00 a.m. - 12:15 p.m. Exhibit Hall C",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("10:00 a.m. - 12:15 p.m."));
@@ -121,16 +79,15 @@ public class TimeLocationParser_Tests
}
[Test]
public void Parse_NoLocationConfig_StillExtractsTimeAndLocation()
public void Parse_AnyLocation_ExtractsAsIs()
{
// Act
TimeLocationParser.Parse("3:00 p.m. Room A", null,
out string time, out string location, out bool locationParseSuccess);
TimeLocationParser.Parse("3:00 p.m. Room A",
out string time, out string location);
// Assert
Assert.That(time, Is.EqualTo("3:00 p.m."));
Assert.That(location, Is.EqualTo("Room A"));
Assert.That(locationParseSuccess, Is.False); // No patterns to match against
}
[Test]
@@ -156,18 +156,19 @@ public class EventOccurrenceParserIssues_Tests
}
[Test]
public void Parse_LocationParseFailure_ReportsIssue()
public void Parse_LocationExtraction_WorksWithoutPatterns()
{
// Arrange
// Locations that don't match "Room *" or "Hall *" patterns
// The timeLocationRegex needs to match to extract location, so we need valid time format
var testContent = "Test Event March 15 2:00 p.m. Auditorium A\n" + // Doesn't match Room * or Hall *
"Test Event March 15 3:00 p.m. Room 101\n" + // This should match "Room *"
"Test Event March 15 4:00 p.m. Conference Center"; // Doesn't match any pattern
// Test that locations are extracted correctly without pattern matching
// Locations should be extracted as everything after the time
var testContent = "Test Event - MS\n" +
"Submit Entry March 15 2:00 p.m. Auditorium A\n" +
"Judging March 15 3:00 p.m. Room 101\n" +
"Pick-up March 15 4:00 p.m. Conference Center\n" +
"Final March 15 5:00 p.m."; // No location
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *", "Hall *");
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
var parser = new EventOccurrenceParser(tempFile, events);
try
{
@@ -175,32 +176,16 @@ public class EventOccurrenceParserIssues_Tests
var result = parser.Parse();
// Assert
// Should have location parse failures for unmatched locations
// Note: Location issues are only reported when:
// 1. Time/location regex matches (can extract location)
// 2. Location part is not empty
// 3. Patterns are configured
// 4. No pattern matches
var locationIssues = result.Issues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
// Should parse successfully - location parsing no longer uses patterns, locations are extracted as-is
// Verify that locations are extracted correctly without pattern validation
// The parser should report location parse failures for "Auditorium A" and "Conference Center"
// But only if the timeLocationRegex successfully extracts them as locations
if (locationIssues.Any())
{
foreach (var issue in locationIssues)
{
Assert.That(issue.Message, Does.Contain("does not match any configured pattern"));
}
// Verify that "Room 101" was parsed successfully (no issue for it)
Assert.That(locationIssues, Has.None.Matches<ParsingIssue>(i => i.LineContent.Contains("Room 101")));
}
else
{
// If no location issues, it might be because the regex didn't extract locations properly
// This is still a valid test - we're verifying the parser behavior
Assert.Pass("Location parsing may not extract locations in all cases - this is acceptable behavior");
}
// Verify that locations are extracted correctly
var occurrences = result.Occurrences.Values.SelectMany(list => list).ToList();
Assert.That(occurrences, Has.Count.GreaterThan(0), "Should parse at least some occurrences");
// Verify locations are extracted
var locations = occurrences.Select(eo => eo.Location).Where(loc => !string.IsNullOrWhiteSpace(loc)).ToList();
Assert.That(locations, Has.Count.GreaterThan(0), "Should extract at least some locations");
}
finally
{
@@ -216,12 +201,11 @@ public class EventOccurrenceParserIssues_Tests
"Unknown Event March 15 2:00 p.m. Room 101\n" + // MissingEventDefinition
"Test Event February 30 2:00 p.m. Room 101\n" + // DateParseFailure (invalid date)
"Test Event March 15 invalid time format Room 101\n" + // TimeParseFailure (no AM/PM)
"Test Event March 15 3:00 p.m. Unmatched Location\n" + // LocationParseFailure (if location extracted)
"Test Event March 15 3:00 p.m. Unmatched Location\n" + // Location extracted as-is (no validation)
"Valid Event March 20 4:00 p.m. Room 202"; // Valid line
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Valid Event"), EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *");
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
var parser = new EventOccurrenceParser(tempFile, events);
try
{
@@ -349,9 +333,8 @@ public class EventOccurrenceParserIssues_Tests
EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event"),
EventOccurrenceParserTestHelpers.CreateTestEvent("Another Event")
};
// All locations match the patterns
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *", "Hall *");
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
// Locations are extracted without pattern matching
var parser = new EventOccurrenceParser(tempFile, events);
try
{
@@ -378,26 +361,15 @@ public class EventOccurrenceParserIssues_Tests
// Note: If the test event wasn't parsed, it might be due to location parsing or other edge cases
// The important thing is that the parser doesn't crash and processes the input
// Verify no location parse failures for locations that match patterns
// Note: Location parsing only reports failures when:
// 1. Location is successfully extracted from time/location string
// 2. Patterns are configured
// 3. No pattern matches
// If location isn't extracted, no issue is created (which is also acceptable)
var locationIssues = result.Issues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
// Verify that locations that match patterns don't create issues
// "Room 101" should match "Room *", "Hall A" and "Hall B" should match "Hall *"
// Note: The parser might create location issues if the location extraction doesn't work perfectly,
// but we verify that at least the test event lines don't create false positives
var locationIssuesForTestEvents = locationIssues.Where(i =>
i.LineContent.Contains("Test Event") && i.LineContent.Contains("Room 101") ||
i.LineContent.Contains("Another Event") && i.LineContent.Contains("Hall B")).ToList();
// The important thing is that matching locations for our test events don't create false positives
// "Opening Session" might have different behavior since it's in GeneralSchedule section
Assert.That(locationIssuesForTestEvents, Has.Count.EqualTo(0),
"Should have no location parse failures for test event locations that match configured patterns");
// Verify locations are extracted correctly (pattern matching is no longer used)
var testEventOccurrence = result.Occurrences.ContainsKey(testEvent)
? result.Occurrences[testEvent].FirstOrDefault()
: null;
if (testEventOccurrence != null)
{
Assert.That(testEventOccurrence.Location, Is.EqualTo("Room 101"),
"Location should be extracted correctly without pattern matching");
}
}
finally
{
@@ -419,8 +391,7 @@ public class EventOccurrenceParserIssues_Tests
// For General Schedule section, we don't need a specific event definition
// The parser will use EventDefinition.GeneralSchedule
var events = Array.Empty<EventDefinition>();
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Mtg. Room *", "Room *");
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
var parser = new EventOccurrenceParser(tempFile, events);
try
{
@@ -477,14 +448,13 @@ public class EventOccurrenceParserIssues_Tests
var months = new[] { "January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December" };
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *");
foreach (var month in months)
{
var testContent = $"Test Event MS\n" +
$"Submit Entry {month} 15 3:00 p.m. Room A";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
var parser = new EventOccurrenceParser(tempFile, events);
try
{
+126 -94
View File
@@ -96,55 +96,6 @@ public class EventOccurrenceParser_Tests
.ToList();
}
/// <summary>
/// Analyzes location parsing failures and extracts common patterns.
/// </summary>
private static Dictionary<string, int> AnalyzeLocationFailures(
List<ParsingIssue> locationIssues, List<string> fileLines)
{
var locationPatterns = new Dictionary<string, int>();
foreach (var issue in locationIssues)
{
// Try to extract the location part from the line
// The format is typically: "EventName Month Day Time Location"
var parts = issue.LineContent.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
// Look for location-like strings (usually after time)
// This is a heuristic - we'll look for parts that don't match date/time patterns
var timeRegex = new System.Text.RegularExpressions.Regex(
@"\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?|NOON");
bool foundTime = false;
var locationParts = new List<string>();
foreach (var part in parts)
{
if (timeRegex.IsMatch(part) || part == "NOON")
{
foundTime = true;
continue;
}
if (foundTime && !string.IsNullOrWhiteSpace(part))
{
locationParts.Add(part);
}
}
if (locationParts.Any())
{
var location = string.Join(" ", locationParts).Trim();
if (!string.IsNullOrWhiteSpace(location))
{
locationPatterns.TryGetValue(location, out var count);
locationPatterns[location] = count + 1;
}
}
}
return locationPatterns;
}
/// <summary>
/// Counts HS vs MS event sections in the file.
@@ -300,8 +251,7 @@ public class EventOccurrenceParser_Tests
// Arrange
var events = TestEntityHandler.GetEvents();
var fileInfo = TestEntityHandler.GetEventOccurrenceNationalsFileInfo();
var locationConfig = LocationParsingConfiguration.Default;
var parser = new EventOccurrenceParser(fileInfo, events, locationConfig);
var parser = new EventOccurrenceParser(fileInfo, events);
// Act
var result = parser.Parse();
@@ -369,19 +319,7 @@ public class EventOccurrenceParser_Tests
}
}
// Pattern Analysis
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
if (locationFailures.Any())
{
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
Console.WriteLine($"Top unmatched location strings:");
foreach (var loc in topLocations)
{
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
}
}
// Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed)
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
if (unmatchedLines.Any())
@@ -408,8 +346,7 @@ public class EventOccurrenceParser_Tests
// Arrange
var events = TestEntityHandler.GetEvents();
var fileInfo = TestEntityHandler.GetEventOccurrenceStateFileInfo();
var locationConfig = LocationParsingConfiguration.Default;
var parser = new EventOccurrenceParser(fileInfo, events, locationConfig);
var parser = new EventOccurrenceParser(fileInfo, events);
// Act
var result = parser.Parse();
@@ -477,19 +414,7 @@ public class EventOccurrenceParser_Tests
}
}
// Pattern Analysis
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
if (locationFailures.Any())
{
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
Console.WriteLine($"Top unmatched location strings:");
foreach (var loc in topLocations)
{
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
}
}
// Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed)
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
if (unmatchedLines.Any())
@@ -516,8 +441,7 @@ public class EventOccurrenceParser_Tests
// Arrange
var events = TestEntityHandler.GetEvents();
var fileInfo = TestEntityHandler.GetEventOccurrenceState2024FileInfo();
var locationConfig = LocationParsingConfiguration.Default;
var parser = new EventOccurrenceParser(fileInfo, events, locationConfig);
var parser = new EventOccurrenceParser(fileInfo, events);
// Act
var result = parser.Parse();
@@ -585,19 +509,7 @@ public class EventOccurrenceParser_Tests
}
}
// Pattern Analysis
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
if (locationFailures.Any())
{
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
Console.WriteLine($"Top unmatched location strings:");
foreach (var loc in topLocations)
{
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
}
}
// Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed)
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
if (unmatchedLines.Any())
@@ -617,4 +529,124 @@ public class EventOccurrenceParser_Tests
// Test passes if no exceptions were thrown
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues ({fixableIssues.Count} fixable)");
}
[Test]
public void Parse_Section_Lines64To92_ChildrensStoriesToConstructionChallenge()
{
// Arrange
// Extract lines 64-92 from the test file - contains MS and HS events with various formats
var allLines = File.ReadAllLines(TestEntityHandler.GetEventOccurrenceStateFileInfo().FullName);
var sectionLines = allLines.Skip(63).Take(29).ToArray(); // Lines 64-92 (0-indexed: 63-91)
var sectionContent = string.Join("\n", sectionLines);
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(sectionContent);
var events = TestEntityHandler.GetEvents();
var parser = new EventOccurrenceParser(tempFile, events);
try
{
// Act
var result = parser.Parse();
// Assert - Should parse without exceptions
Assert.That(result, Is.Not.Null, "Parser should return a result");
// Count occurrences by event type
var totalOccurrences = result.Occurrences.Values.Sum(list => list.Count);
// Verify MS events are parsed
var childrensStories = events.FirstOrDefault(e => e.Name.Contains("Children's Stories", StringComparison.OrdinalIgnoreCase));
var coding = events.FirstOrDefault(e => e.Name == "Coding");
var communityServiceVideo = events.FirstOrDefault(e => e.Name.Contains("Community Service Video", StringComparison.OrdinalIgnoreCase));
var constructionChallenge = events.FirstOrDefault(e => e.Name.Contains("Construction Challenge", StringComparison.OrdinalIgnoreCase));
// Count expected MS occurrences:
// Children's Stories MS: 5 occurrences (lines 65-69)
// Coding MS: 2 occurrences (lines 76-77)
// Community Service Video MS: 4 occurrences (lines 79-82)
// Construction Challenge MS: 5 occurrences (lines 88-92)
// Total expected MS occurrences: 16
var msEventCount = 0;
if (childrensStories != null && result.Occurrences.ContainsKey(childrensStories))
msEventCount += result.Occurrences[childrensStories].Count;
if (coding != null && result.Occurrences.ContainsKey(coding))
msEventCount += result.Occurrences[coding].Count;
if (communityServiceVideo != null && result.Occurrences.ContainsKey(communityServiceVideo))
msEventCount += result.Occurrences[communityServiceVideo].Count;
if (constructionChallenge != null && result.Occurrences.ContainsKey(constructionChallenge))
msEventCount += result.Occurrences[constructionChallenge].Count;
// Verify HS events are skipped gracefully (no issues should be created for them)
var hsIssues = result.Issues.Where(i =>
i.LineContent.Contains("Coding HS") ||
i.LineContent.Contains("CAD") && i.LineContent.Contains("HS") ||
i.LineNumber >= 72 && i.LineNumber <= 86 && IsHighSchoolEvent(i.LineContent)
).ToList();
// Verify HS section headers are tracked in SkippedHSSectionHeaders
var skippedHSHeaders = result.SkippedHSSectionHeaders;
// Verify continuation lines are skipped
// Line 70 starts with "*The" - this enters continuation mode and both line 70 and 71 should be skipped
var continuationLineIssues = result.Issues.Where(i =>
i.LineContent.Contains("books of semifinalist teams") ||
i.LineContent.Contains("be returned to teams")
).ToList();
// Verify specific time formats are parsed correctly
var noonOccurrence = result.Occurrences.Values
.SelectMany(list => list)
.FirstOrDefault(eo => eo.Time.Contains("NOON", StringComparison.OrdinalIgnoreCase));
var lateTimeOccurrence = result.Occurrences.Values
.SelectMany(list => list)
.FirstOrDefault(eo => eo.Time.Contains("11:59", StringComparison.OrdinalIgnoreCase));
// Output detailed analysis
Console.WriteLine($"\n=== Section Lines 64-92 Parsing Results ===");
Console.WriteLine($"Total occurrences parsed: {totalOccurrences}");
Console.WriteLine($"MS event occurrences: {msEventCount}");
Console.WriteLine($"Total issues: {result.Issues.Count}");
Console.WriteLine($"HS-related issues: {hsIssues.Count}");
Console.WriteLine($"Skipped HS section headers: {skippedHSHeaders.Count}");
Console.WriteLine($"Continuation line issues: {continuationLineIssues.Count}");
Console.WriteLine($"\n--- Issue Types ---");
foreach (var issueType in result.Issues.GroupBy(i => i.IssueType))
{
Console.WriteLine($" {issueType.Key}: {issueType.Count()}");
}
// Assertions
Assert.That(totalOccurrences, Is.GreaterThan(0), "Should parse at least some occurrences");
Assert.That(msEventCount, Is.GreaterThanOrEqualTo(14), "Should parse most MS occurrences (at least 14 out of 16)");
// HS events should not create issues - they should be skipped gracefully
Assert.That(hsIssues, Has.Count.EqualTo(0), "HS events should be skipped gracefully without creating issues");
// HS section headers should be tracked
Assert.That(skippedHSHeaders, Has.Count.GreaterThanOrEqualTo(2), "Should track at least 2 HS section headers (Coding - HS, CAD Architecture - HS, CAD Engineering - HS)");
// Line 70 (starts with "*The") enters continuation mode and both line 70 and 71 should be skipped without issues
Assert.That(continuationLineIssues, Has.Count.EqualTo(0),
"Continuation lines starting with '*' and subsequent lines should be skipped without issues");
Assert.That(noonOccurrence, Is.Not.Null, "Should parse NOON time format");
Assert.That(lateTimeOccurrence, Is.Not.Null, "Should parse 11:59 p.m. time format");
// Verify specific locations are parsed
if (childrensStories != null && result.Occurrences.ContainsKey(childrensStories))
{
var locations = result.Occurrences[childrensStories]
.Select(eo => eo.Location)
.Where(loc => !string.IsNullOrWhiteSpace(loc))
.ToList();
Assert.That(locations, Has.Count.GreaterThan(0), "Children's Stories should have locations parsed");
}
// Test passes with detailed information
Assert.Pass($"Successfully parsed section: {totalOccurrences} occurrences, {result.Issues.Count} issues, {msEventCount} MS events");
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
}
@@ -302,7 +302,6 @@
ParsingIssueType.MissingEventDefinition => Color.Warning,
ParsingIssueType.TimeParseFailure => Color.Error,
ParsingIssueType.DateParseFailure => Color.Error,
ParsingIssueType.LocationParseFailure => Color.Warning,
ParsingIssueType.InvalidFormat => Color.Error,
_ => Color.Default
};