From 44fd38b7ac3de0a7c8a1f0a366e8830e5c4e8991 Mon Sep 17 00:00:00 2001 From: James Kolpack Date: Fri, 9 Jan 2026 10:04:11 -0500 Subject: [PATCH] Refactor event occurrence parsing to improve school level handling and section skipping This commit refines the EventOccurrenceParser by replacing boolean flags for school sections with a more robust SchoolLevel enum. The logic for determining whether to skip events based on school level has been encapsulated in dedicated methods, enhancing readability and maintainability. Additionally, the EventOccurrenceParserService has been updated to detect potential missing line breaks in section headers, improving warning reporting. These changes streamline the parsing process and ensure accurate handling of event occurrences based on school level settings. --- Core/Parsers/EventOccurrenceParser.cs | 250 +++++++++--------- Core/Services/EventOccurrenceParserService.cs | 20 ++ 2 files changed, 141 insertions(+), 129 deletions(-) diff --git a/Core/Parsers/EventOccurrenceParser.cs b/Core/Parsers/EventOccurrenceParser.cs index ce43fe1..7fc2fd6 100644 --- a/Core/Parsers/EventOccurrenceParser.cs +++ b/Core/Parsers/EventOccurrenceParser.cs @@ -40,8 +40,7 @@ public class EventOccurrenceParser var issues = result.Issues; EventDefinition? currentEventDefinition = null; bool inContinuationMode = false; - bool inHSSection = false; - bool inMSSection = false; + SchoolLevel? currentSectionLevel = null; var lines = File.ReadLines(_txtFile.FullName); foreach (var (line, index) in lines.Select((line, index) => (line, index + 1))) @@ -80,65 +79,19 @@ public class EventOccurrenceParser // Section headers break continuation mode inContinuationMode = false; - // Determine if we should skip this event based on chapter's school level setting - bool shouldSkip = false; - if (!string.IsNullOrWhiteSpace(schoolLevel)) - { - if (_schoolLevel.HasValue) - { - // School level is set - filter based on it - if (_schoolLevel.Value == SchoolLevel.MiddleSchool && - schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - else if (_schoolLevel.Value == SchoolLevel.HighSchool && - schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedMSSectionHeaders.Add(normalizedLine); - inMSSection = true; - inHSSection = false; - } - } - else - { - // No school level set - backward compatibility: skip HS events - if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - } - } + // Convert string school level to enum + var sectionSchoolLevel = SetCurrentSectionLevel(schoolLevel); - if (shouldSkip) + // Determine if we should skip this event based on chapter's school level setting + if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result)) { currentEventDefinition = null; // Skip subsequent occurrences + currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section continue; // No issue created } - // Reset section flags for events we're processing - if (schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase)) - { - inMSSection = true; - inHSSection = false; - } - else if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) - { - inHSSection = true; - inMSSection = false; - } - else - { - inHSSection = false; - inMSSection = false; - } + // Set current section level for events we're processing + currentSectionLevel = sectionSchoolLevel; // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events); @@ -164,8 +117,7 @@ public class EventOccurrenceParser { // General schedule breaks continuation mode inContinuationMode = false; - inHSSection = false; // Reset section flags - inMSSection = false; + currentSectionLevel = null; // Reset section level currentEventDefinition = EventDefinition.GeneralSchedule; continue; } @@ -176,62 +128,23 @@ public class EventOccurrenceParser // Section headers break continuation mode inContinuationMode = false; - // Determine if we should skip this event based on chapter's school level setting - bool shouldSkip = false; - if (_schoolLevel.HasValue) - { - // School level is set - filter based on it - if (_schoolLevel.Value == SchoolLevel.MiddleSchool && - normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - else if (_schoolLevel.Value == SchoolLevel.HighSchool && - normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedMSSectionHeaders.Add(normalizedLine); - inMSSection = true; - inHSSection = false; - } - } - else - { - // No school level set - backward compatibility: skip HS events - if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - } + // Extract school level from line + SchoolLevel? sectionSchoolLevel = null; + if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) + sectionSchoolLevel = SchoolLevel.MiddleSchool; + else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) + sectionSchoolLevel = SchoolLevel.HighSchool; - if (shouldSkip) + // Determine if we should skip this event based on chapter's school level setting + if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result)) { currentEventDefinition = null; // Skip subsequent occurrences + currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section continue; // No issue created } - // Reset section flags for events we're processing - if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) - { - inMSSection = true; - inHSSection = false; - } - else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) - { - inHSSection = true; - inMSSection = false; - } - else - { - inHSSection = false; - inMSSection = false; - } + // Set current section level for events we're processing + currentSectionLevel = sectionSchoolLevel; // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events); @@ -283,27 +196,9 @@ public class EventOccurrenceParser inContinuationMode = false; // Skip occurrences under sections that don't match the school level setting - if (_schoolLevel.HasValue) + if (ShouldSkipOccurrence(currentSectionLevel, result)) { - if (_schoolLevel.Value == SchoolLevel.MiddleSchool && inHSSection) - { - result.SkippedHSEventCount++; - continue; - } - if (_schoolLevel.Value == SchoolLevel.HighSchool && inMSSection) - { - result.SkippedMSEventCount++; - continue; - } - } - else - { - // If no school level is set, skip HS sections (backward compatibility) - if (inHSSection) - { - result.SkippedHSEventCount++; - continue; - } + continue; } var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value; @@ -387,10 +282,107 @@ public class EventOccurrenceParser occurrences.Add(eventDefinition, []); occurrences[eventDefinition].Add(eventOccurrence); - // Reset HS section flag when we successfully parse an occurrence (means we're in a valid section) - inHSSection = false; + // Reset section level when we successfully parse an occurrence (means we're in a valid section) + currentSectionLevel = null; } return result; } + + /// + /// Determines if a section should be skipped based on chapter's school level setting. + /// Handles backward compatibility (null school level = skip HS). + /// + /// The school level of the section (null if no school level designation). + /// The normalized line content for tracking skipped headers. + /// The parse result to update with skipped headers. + /// True if the section should be skipped, false otherwise. + private bool ShouldSkipSection(SchoolLevel? sectionSchoolLevel, string normalizedLine, EventOccurrenceParserResult result) + { + if (!sectionSchoolLevel.HasValue) + return false; // Events without school level are never skipped + + if (_schoolLevel.HasValue) + { + // School level is set - filter based on it + if (_schoolLevel.Value == SchoolLevel.MiddleSchool && sectionSchoolLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSSectionHeaders.Add(normalizedLine); + return true; + } + if (_schoolLevel.Value == SchoolLevel.HighSchool && sectionSchoolLevel.Value == SchoolLevel.MiddleSchool) + { + result.SkippedMSSectionHeaders.Add(normalizedLine); + return true; + } + } + else + { + // No school level set - backward compatibility: skip HS events + if (sectionSchoolLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSSectionHeaders.Add(normalizedLine); + return true; + } + } + + return false; + } + + /// + /// Converts string school level ("MS", "HS", or empty) to SchoolLevel?. + /// + /// The school level string from the section header. + /// SchoolLevel? representing the school level, or null if no school level designation. + private static SchoolLevel? SetCurrentSectionLevel(string schoolLevelStr) + { + if (string.IsNullOrWhiteSpace(schoolLevelStr)) + return null; + + if (schoolLevelStr.Equals("MS", StringComparison.OrdinalIgnoreCase)) + return SchoolLevel.MiddleSchool; + + if (schoolLevelStr.Equals("HS", StringComparison.OrdinalIgnoreCase)) + return SchoolLevel.HighSchool; + + return null; + } + + /// + /// Checks if current occurrence should be skipped based on section level. + /// + /// The current section level. + /// The parse result to update with skip counts. + /// True if the occurrence should be skipped, false otherwise. + private bool ShouldSkipOccurrence(SchoolLevel? currentSectionLevel, EventOccurrenceParserResult result) + { + if (!currentSectionLevel.HasValue) + return false; // Events without school level are never skipped + + if (_schoolLevel.HasValue) + { + // School level is set - filter based on it + if (_schoolLevel.Value == SchoolLevel.MiddleSchool && currentSectionLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSEventCount++; + return true; + } + if (_schoolLevel.Value == SchoolLevel.HighSchool && currentSectionLevel.Value == SchoolLevel.MiddleSchool) + { + result.SkippedMSEventCount++; + return true; + } + } + else + { + // If no school level is set, skip HS sections (backward compatibility) + if (currentSectionLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSEventCount++; + return true; + } + } + + return false; + } } \ No newline at end of file diff --git a/Core/Services/EventOccurrenceParserService.cs b/Core/Services/EventOccurrenceParserService.cs index 354ecc9..12d0532 100644 --- a/Core/Services/EventOccurrenceParserService.cs +++ b/Core/Services/EventOccurrenceParserService.cs @@ -197,6 +197,26 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'"); } } + + // Check for section header patterns (missing line break detection) + // Pattern matches: text ending with " - MS", " - HS" + // This indicates a missing line break where the next section header was concatenated to the location + // Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens + var sectionHeaderPattern = new Regex( + @"-\s*(MS|HS)\s*$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList(); + foreach (var loc in locationsWithSectionHeader) + { + if (loc != null) + { + var match = sectionHeaderPattern.Match(loc); + // Extract the section header part for better warning message + var sectionHeaderPart = match.Value.Trim(); + result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted."); + } + } } }