Enhance event occurrence parsing to skip unmatched high school section headers
This commit introduces a new property to track skipped high school section headers in the EventOccurrenceParseResult and EventOccurrenceParserResult classes. The EventOccurrenceParser has been updated to gracefully skip HS section headers that do not match any event definitions, improving the parsing logic. Additionally, the LocationParsingConfiguration has been removed from the EventOccurrenceParser, simplifying its constructor. Unit tests have been updated to reflect these changes and ensure correct behavior during parsing.
This commit is contained in:
@@ -24,33 +24,19 @@ public static class LineClassifier
|
||||
/// <summary>
|
||||
/// Determines if a line is a continuation/wrapped line that should be skipped.
|
||||
/// These are typically lines that:
|
||||
/// - Start with lowercase or special characters (not event names)
|
||||
/// - Start with "*" (marks the start of a continuation block)
|
||||
/// - Are parenthetical notes like "(Semifinalists only)"
|
||||
/// - Are informational text like "Schedule Posted on..."
|
||||
/// </summary>
|
||||
public static bool IsContinuationLine(string line)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
|
||||
// Skip parenthetical notes
|
||||
if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal))
|
||||
// Check if line starts with "*" (marks continuation block start)
|
||||
if (trimmed.StartsWith("*", StringComparison.Ordinal))
|
||||
return true;
|
||||
|
||||
// Skip lines that are clearly continuation text (start with lowercase, common continuation words)
|
||||
if (trimmed.Length > 0 && char.IsLower(trimmed[0]))
|
||||
{
|
||||
// Check if it starts with common continuation words
|
||||
var continuationPrefixes = new[] { "be ", "the ", "and ", "or ", "to ", "a ", "an ", "will ", "may ", "can " };
|
||||
foreach (var prefix in continuationPrefixes)
|
||||
{
|
||||
if (trimmed.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip informational lines that don't contain dates/times
|
||||
if (trimmed.Contains("Schedule Posted", StringComparison.OrdinalIgnoreCase) ||
|
||||
trimmed.Contains("Note:", StringComparison.OrdinalIgnoreCase))
|
||||
// Skip parenthetical notes
|
||||
if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user