Files
chapter-organizer/Core/Parsers/EventOccurrence/LineClassifier.cs
T
poprhythm f916cfad6b Refactor event occurrence parsing by introducing modular components for improved maintainability
This commit restructures the EventOccurrenceParser by breaking down its functionality into modular components, including EventDefinitionResolver, LineClassifier, LocationPatternMatcher, SectionHeaderMatcher, TimeLocationParser, and TimeParser. This refactoring enhances code readability and maintainability, allowing for easier updates and testing. Additionally, the TextUtil class has been updated to include input sanitization methods. Comprehensive unit tests have been added to ensure the correctness of the new parsing logic and to validate the handling of various event occurrence scenarios.
2026-01-08 20:23:57 -05:00

60 lines
2.1 KiB
C#

namespace Core.Parsers.EventOccurrence;
/// <summary>
/// Classifies lines to determine if they should be skipped during parsing.
/// </summary>
public static class LineClassifier
{
/// <summary>
/// Checks if a line is empty or contains only whitespace.
/// </summary>
public static bool IsEmptyLine(string line)
{
return string.IsNullOrWhiteSpace(line);
}
/// <summary>
/// Checks if a line is a comment (starts with "#").
/// </summary>
public static bool IsCommentLine(string line)
{
return EventOccurrenceGrammar.IsCommentLine(line);
}
/// <summary>
/// Determines if a line is a continuation/wrapped line that should be skipped.
/// These are typically lines that:
/// - Start with lowercase or special characters (not event names)
/// - Are parenthetical notes like "(Semifinalists only)"
/// - Are informational text like "Schedule Posted on..."
/// </summary>
public static bool IsContinuationLine(string line)
{
var trimmed = line.Trim();
// Skip parenthetical notes
if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal))
return true;
// Skip lines that are clearly continuation text (start with lowercase, common continuation words)
if (trimmed.Length > 0 && char.IsLower(trimmed[0]))
{
// Check if it starts with common continuation words
var continuationPrefixes = new[] { "be ", "the ", "and ", "or ", "to ", "a ", "an ", "will ", "may ", "can " };
foreach (var prefix in continuationPrefixes)
{
if (trimmed.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
return true;
}
}
// Skip informational lines that don't contain dates/times
if (trimmed.Contains("Schedule Posted", StringComparison.OrdinalIgnoreCase) ||
trimmed.Contains("Note:", StringComparison.OrdinalIgnoreCase))
return true;
return false;
}
}