diff --git a/Core/Parsers/EventOccurrenceParser.cs b/Core/Parsers/EventOccurrenceParser.cs index ce43fe1..7fc2fd6 100644 --- a/Core/Parsers/EventOccurrenceParser.cs +++ b/Core/Parsers/EventOccurrenceParser.cs @@ -40,8 +40,7 @@ public class EventOccurrenceParser var issues = result.Issues; EventDefinition? currentEventDefinition = null; bool inContinuationMode = false; - bool inHSSection = false; - bool inMSSection = false; + SchoolLevel? currentSectionLevel = null; var lines = File.ReadLines(_txtFile.FullName); foreach (var (line, index) in lines.Select((line, index) => (line, index + 1))) @@ -80,65 +79,19 @@ public class EventOccurrenceParser // Section headers break continuation mode inContinuationMode = false; - // Determine if we should skip this event based on chapter's school level setting - bool shouldSkip = false; - if (!string.IsNullOrWhiteSpace(schoolLevel)) - { - if (_schoolLevel.HasValue) - { - // School level is set - filter based on it - if (_schoolLevel.Value == SchoolLevel.MiddleSchool && - schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - else if (_schoolLevel.Value == SchoolLevel.HighSchool && - schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedMSSectionHeaders.Add(normalizedLine); - inMSSection = true; - inHSSection = false; - } - } - else - { - // No school level set - backward compatibility: skip HS events - if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - } - } + // Convert string school level to enum + var sectionSchoolLevel = SetCurrentSectionLevel(schoolLevel); - if (shouldSkip) + // Determine if we should skip this event based on chapter's school level setting + if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result)) { currentEventDefinition = null; // Skip subsequent occurrences + currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section continue; // No issue created } - // Reset section flags for events we're processing - if (schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase)) - { - inMSSection = true; - inHSSection = false; - } - else if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) - { - inHSSection = true; - inMSSection = false; - } - else - { - inHSSection = false; - inMSSection = false; - } + // Set current section level for events we're processing + currentSectionLevel = sectionSchoolLevel; // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events); @@ -164,8 +117,7 @@ public class EventOccurrenceParser { // General schedule breaks continuation mode inContinuationMode = false; - inHSSection = false; // Reset section flags - inMSSection = false; + currentSectionLevel = null; // Reset section level currentEventDefinition = EventDefinition.GeneralSchedule; continue; } @@ -176,62 +128,23 @@ public class EventOccurrenceParser // Section headers break continuation mode inContinuationMode = false; - // Determine if we should skip this event based on chapter's school level setting - bool shouldSkip = false; - if (_schoolLevel.HasValue) - { - // School level is set - filter based on it - if (_schoolLevel.Value == SchoolLevel.MiddleSchool && - normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - else if (_schoolLevel.Value == SchoolLevel.HighSchool && - normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedMSSectionHeaders.Add(normalizedLine); - inMSSection = true; - inHSSection = false; - } - } - else - { - // No school level set - backward compatibility: skip HS events - if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) - { - shouldSkip = true; - result.SkippedHSSectionHeaders.Add(normalizedLine); - inHSSection = true; - inMSSection = false; - } - } + // Extract school level from line + SchoolLevel? sectionSchoolLevel = null; + if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) + sectionSchoolLevel = SchoolLevel.MiddleSchool; + else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) + sectionSchoolLevel = SchoolLevel.HighSchool; - if (shouldSkip) + // Determine if we should skip this event based on chapter's school level setting + if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result)) { currentEventDefinition = null; // Skip subsequent occurrences + currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section continue; // No issue created } - // Reset section flags for events we're processing - if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) - { - inMSSection = true; - inHSSection = false; - } - else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) - { - inHSSection = true; - inMSSection = false; - } - else - { - inHSSection = false; - inMSSection = false; - } + // Set current section level for events we're processing + currentSectionLevel = sectionSchoolLevel; // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events); @@ -283,27 +196,9 @@ public class EventOccurrenceParser inContinuationMode = false; // Skip occurrences under sections that don't match the school level setting - if (_schoolLevel.HasValue) + if (ShouldSkipOccurrence(currentSectionLevel, result)) { - if (_schoolLevel.Value == SchoolLevel.MiddleSchool && inHSSection) - { - result.SkippedHSEventCount++; - continue; - } - if (_schoolLevel.Value == SchoolLevel.HighSchool && inMSSection) - { - result.SkippedMSEventCount++; - continue; - } - } - else - { - // If no school level is set, skip HS sections (backward compatibility) - if (inHSSection) - { - result.SkippedHSEventCount++; - continue; - } + continue; } var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value; @@ -387,10 +282,107 @@ public class EventOccurrenceParser occurrences.Add(eventDefinition, []); occurrences[eventDefinition].Add(eventOccurrence); - // Reset HS section flag when we successfully parse an occurrence (means we're in a valid section) - inHSSection = false; + // Reset section level when we successfully parse an occurrence (means we're in a valid section) + currentSectionLevel = null; } return result; } + + /// + /// Determines if a section should be skipped based on chapter's school level setting. + /// Handles backward compatibility (null school level = skip HS). + /// + /// The school level of the section (null if no school level designation). + /// The normalized line content for tracking skipped headers. + /// The parse result to update with skipped headers. + /// True if the section should be skipped, false otherwise. + private bool ShouldSkipSection(SchoolLevel? sectionSchoolLevel, string normalizedLine, EventOccurrenceParserResult result) + { + if (!sectionSchoolLevel.HasValue) + return false; // Events without school level are never skipped + + if (_schoolLevel.HasValue) + { + // School level is set - filter based on it + if (_schoolLevel.Value == SchoolLevel.MiddleSchool && sectionSchoolLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSSectionHeaders.Add(normalizedLine); + return true; + } + if (_schoolLevel.Value == SchoolLevel.HighSchool && sectionSchoolLevel.Value == SchoolLevel.MiddleSchool) + { + result.SkippedMSSectionHeaders.Add(normalizedLine); + return true; + } + } + else + { + // No school level set - backward compatibility: skip HS events + if (sectionSchoolLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSSectionHeaders.Add(normalizedLine); + return true; + } + } + + return false; + } + + /// + /// Converts string school level ("MS", "HS", or empty) to SchoolLevel?. + /// + /// The school level string from the section header. + /// SchoolLevel? representing the school level, or null if no school level designation. + private static SchoolLevel? SetCurrentSectionLevel(string schoolLevelStr) + { + if (string.IsNullOrWhiteSpace(schoolLevelStr)) + return null; + + if (schoolLevelStr.Equals("MS", StringComparison.OrdinalIgnoreCase)) + return SchoolLevel.MiddleSchool; + + if (schoolLevelStr.Equals("HS", StringComparison.OrdinalIgnoreCase)) + return SchoolLevel.HighSchool; + + return null; + } + + /// + /// Checks if current occurrence should be skipped based on section level. + /// + /// The current section level. + /// The parse result to update with skip counts. + /// True if the occurrence should be skipped, false otherwise. + private bool ShouldSkipOccurrence(SchoolLevel? currentSectionLevel, EventOccurrenceParserResult result) + { + if (!currentSectionLevel.HasValue) + return false; // Events without school level are never skipped + + if (_schoolLevel.HasValue) + { + // School level is set - filter based on it + if (_schoolLevel.Value == SchoolLevel.MiddleSchool && currentSectionLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSEventCount++; + return true; + } + if (_schoolLevel.Value == SchoolLevel.HighSchool && currentSectionLevel.Value == SchoolLevel.MiddleSchool) + { + result.SkippedMSEventCount++; + return true; + } + } + else + { + // If no school level is set, skip HS sections (backward compatibility) + if (currentSectionLevel.Value == SchoolLevel.HighSchool) + { + result.SkippedHSEventCount++; + return true; + } + } + + return false; + } } \ No newline at end of file diff --git a/Core/Services/EventOccurrenceParserService.cs b/Core/Services/EventOccurrenceParserService.cs index 354ecc9..12d0532 100644 --- a/Core/Services/EventOccurrenceParserService.cs +++ b/Core/Services/EventOccurrenceParserService.cs @@ -197,6 +197,26 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'"); } } + + // Check for section header patterns (missing line break detection) + // Pattern matches: text ending with " - MS", " - HS" + // This indicates a missing line break where the next section header was concatenated to the location + // Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens + var sectionHeaderPattern = new Regex( + @"-\s*(MS|HS)\s*$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList(); + foreach (var loc in locationsWithSectionHeader) + { + if (loc != null) + { + var match = sectionHeaderPattern.Match(loc); + // Extract the section header part for better warning message + var sectionHeaderPart = match.Value.Trim(); + result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted."); + } + } } }