Refactor event occurrence parsing to improve school level handling and section skipping
This commit refines the EventOccurrenceParser by replacing boolean flags for school sections with a more robust SchoolLevel enum. The logic for determining whether to skip events based on school level has been encapsulated in dedicated methods, enhancing readability and maintainability. Additionally, the EventOccurrenceParserService has been updated to detect potential missing line breaks in section headers, improving warning reporting. These changes streamline the parsing process and ensure accurate handling of event occurrences based on school level settings.
This commit is contained in:
@@ -40,8 +40,7 @@ public class EventOccurrenceParser
|
||||
var issues = result.Issues;
|
||||
EventDefinition? currentEventDefinition = null;
|
||||
bool inContinuationMode = false;
|
||||
bool inHSSection = false;
|
||||
bool inMSSection = false;
|
||||
SchoolLevel? currentSectionLevel = null;
|
||||
|
||||
var lines = File.ReadLines(_txtFile.FullName);
|
||||
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
||||
@@ -80,65 +79,19 @@ public class EventOccurrenceParser
|
||||
// Section headers break continuation mode
|
||||
inContinuationMode = false;
|
||||
|
||||
// Determine if we should skip this event based on chapter's school level setting
|
||||
bool shouldSkip = false;
|
||||
if (!string.IsNullOrWhiteSpace(schoolLevel))
|
||||
{
|
||||
if (_schoolLevel.HasValue)
|
||||
{
|
||||
// School level is set - filter based on it
|
||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool &&
|
||||
schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shouldSkip = true;
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
inHSSection = true;
|
||||
inMSSection = false;
|
||||
}
|
||||
else if (_schoolLevel.Value == SchoolLevel.HighSchool &&
|
||||
schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shouldSkip = true;
|
||||
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
||||
inMSSection = true;
|
||||
inHSSection = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// No school level set - backward compatibility: skip HS events
|
||||
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shouldSkip = true;
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
inHSSection = true;
|
||||
inMSSection = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Convert string school level to enum
|
||||
var sectionSchoolLevel = SetCurrentSectionLevel(schoolLevel);
|
||||
|
||||
if (shouldSkip)
|
||||
// Determine if we should skip this event based on chapter's school level setting
|
||||
if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result))
|
||||
{
|
||||
currentEventDefinition = null; // Skip subsequent occurrences
|
||||
currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section
|
||||
continue; // No issue created
|
||||
}
|
||||
|
||||
// Reset section flags for events we're processing
|
||||
if (schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
inMSSection = true;
|
||||
inHSSection = false;
|
||||
}
|
||||
else if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
inHSSection = true;
|
||||
inMSSection = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
inHSSection = false;
|
||||
inMSSection = false;
|
||||
}
|
||||
// Set current section level for events we're processing
|
||||
currentSectionLevel = sectionSchoolLevel;
|
||||
|
||||
// Use fuzzy matching to find the best matching event definition
|
||||
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
|
||||
@@ -164,8 +117,7 @@ public class EventOccurrenceParser
|
||||
{
|
||||
// General schedule breaks continuation mode
|
||||
inContinuationMode = false;
|
||||
inHSSection = false; // Reset section flags
|
||||
inMSSection = false;
|
||||
currentSectionLevel = null; // Reset section level
|
||||
currentEventDefinition = EventDefinition.GeneralSchedule;
|
||||
continue;
|
||||
}
|
||||
@@ -176,62 +128,23 @@ public class EventOccurrenceParser
|
||||
// Section headers break continuation mode
|
||||
inContinuationMode = false;
|
||||
|
||||
// Determine if we should skip this event based on chapter's school level setting
|
||||
bool shouldSkip = false;
|
||||
if (_schoolLevel.HasValue)
|
||||
{
|
||||
// School level is set - filter based on it
|
||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool &&
|
||||
normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shouldSkip = true;
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
inHSSection = true;
|
||||
inMSSection = false;
|
||||
}
|
||||
else if (_schoolLevel.Value == SchoolLevel.HighSchool &&
|
||||
normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shouldSkip = true;
|
||||
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
||||
inMSSection = true;
|
||||
inHSSection = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// No school level set - backward compatibility: skip HS events
|
||||
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shouldSkip = true;
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
inHSSection = true;
|
||||
inMSSection = false;
|
||||
}
|
||||
}
|
||||
// Extract school level from line
|
||||
SchoolLevel? sectionSchoolLevel = null;
|
||||
if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
||||
sectionSchoolLevel = SchoolLevel.MiddleSchool;
|
||||
else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||
sectionSchoolLevel = SchoolLevel.HighSchool;
|
||||
|
||||
if (shouldSkip)
|
||||
// Determine if we should skip this event based on chapter's school level setting
|
||||
if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result))
|
||||
{
|
||||
currentEventDefinition = null; // Skip subsequent occurrences
|
||||
currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section
|
||||
continue; // No issue created
|
||||
}
|
||||
|
||||
// Reset section flags for events we're processing
|
||||
if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
inMSSection = true;
|
||||
inHSSection = false;
|
||||
}
|
||||
else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
inHSSection = true;
|
||||
inMSSection = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
inHSSection = false;
|
||||
inMSSection = false;
|
||||
}
|
||||
// Set current section level for events we're processing
|
||||
currentSectionLevel = sectionSchoolLevel;
|
||||
|
||||
// Use fuzzy matching to find the best matching event definition
|
||||
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
|
||||
@@ -283,28 +196,10 @@ public class EventOccurrenceParser
|
||||
inContinuationMode = false;
|
||||
|
||||
// Skip occurrences under sections that don't match the school level setting
|
||||
if (_schoolLevel.HasValue)
|
||||
if (ShouldSkipOccurrence(currentSectionLevel, result))
|
||||
{
|
||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && inHSSection)
|
||||
{
|
||||
result.SkippedHSEventCount++;
|
||||
continue;
|
||||
}
|
||||
if (_schoolLevel.Value == SchoolLevel.HighSchool && inMSSection)
|
||||
{
|
||||
result.SkippedMSEventCount++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// If no school level is set, skip HS sections (backward compatibility)
|
||||
if (inHSSection)
|
||||
{
|
||||
result.SkippedHSEventCount++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
|
||||
|
||||
@@ -387,10 +282,107 @@ public class EventOccurrenceParser
|
||||
occurrences.Add(eventDefinition, []);
|
||||
occurrences[eventDefinition].Add(eventOccurrence);
|
||||
|
||||
// Reset HS section flag when we successfully parse an occurrence (means we're in a valid section)
|
||||
inHSSection = false;
|
||||
// Reset section level when we successfully parse an occurrence (means we're in a valid section)
|
||||
currentSectionLevel = null;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a section should be skipped based on chapter's school level setting.
|
||||
/// Handles backward compatibility (null school level = skip HS).
|
||||
/// </summary>
|
||||
/// <param name="sectionSchoolLevel">The school level of the section (null if no school level designation).</param>
|
||||
/// <param name="normalizedLine">The normalized line content for tracking skipped headers.</param>
|
||||
/// <param name="result">The parse result to update with skipped headers.</param>
|
||||
/// <returns>True if the section should be skipped, false otherwise.</returns>
|
||||
private bool ShouldSkipSection(SchoolLevel? sectionSchoolLevel, string normalizedLine, EventOccurrenceParserResult result)
|
||||
{
|
||||
if (!sectionSchoolLevel.HasValue)
|
||||
return false; // Events without school level are never skipped
|
||||
|
||||
if (_schoolLevel.HasValue)
|
||||
{
|
||||
// School level is set - filter based on it
|
||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && sectionSchoolLevel.Value == SchoolLevel.HighSchool)
|
||||
{
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
return true;
|
||||
}
|
||||
if (_schoolLevel.Value == SchoolLevel.HighSchool && sectionSchoolLevel.Value == SchoolLevel.MiddleSchool)
|
||||
{
|
||||
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// No school level set - backward compatibility: skip HS events
|
||||
if (sectionSchoolLevel.Value == SchoolLevel.HighSchool)
|
||||
{
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts string school level ("MS", "HS", or empty) to SchoolLevel?.
|
||||
/// </summary>
|
||||
/// <param name="schoolLevelStr">The school level string from the section header.</param>
|
||||
/// <returns>SchoolLevel? representing the school level, or null if no school level designation.</returns>
|
||||
private static SchoolLevel? SetCurrentSectionLevel(string schoolLevelStr)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(schoolLevelStr))
|
||||
return null;
|
||||
|
||||
if (schoolLevelStr.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
||||
return SchoolLevel.MiddleSchool;
|
||||
|
||||
if (schoolLevelStr.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||
return SchoolLevel.HighSchool;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if current occurrence should be skipped based on section level.
|
||||
/// </summary>
|
||||
/// <param name="currentSectionLevel">The current section level.</param>
|
||||
/// <param name="result">The parse result to update with skip counts.</param>
|
||||
/// <returns>True if the occurrence should be skipped, false otherwise.</returns>
|
||||
private bool ShouldSkipOccurrence(SchoolLevel? currentSectionLevel, EventOccurrenceParserResult result)
|
||||
{
|
||||
if (!currentSectionLevel.HasValue)
|
||||
return false; // Events without school level are never skipped
|
||||
|
||||
if (_schoolLevel.HasValue)
|
||||
{
|
||||
// School level is set - filter based on it
|
||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && currentSectionLevel.Value == SchoolLevel.HighSchool)
|
||||
{
|
||||
result.SkippedHSEventCount++;
|
||||
return true;
|
||||
}
|
||||
if (_schoolLevel.Value == SchoolLevel.HighSchool && currentSectionLevel.Value == SchoolLevel.MiddleSchool)
|
||||
{
|
||||
result.SkippedMSEventCount++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// If no school level is set, skip HS sections (backward compatibility)
|
||||
if (currentSectionLevel.Value == SchoolLevel.HighSchool)
|
||||
{
|
||||
result.SkippedHSEventCount++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -197,6 +197,26 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService
|
||||
result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'");
|
||||
}
|
||||
}
|
||||
|
||||
// Check for section header patterns (missing line break detection)
|
||||
// Pattern matches: text ending with " - MS", " - HS"
|
||||
// This indicates a missing line break where the next section header was concatenated to the location
|
||||
// Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens
|
||||
var sectionHeaderPattern = new Regex(
|
||||
@"-\s*(MS|HS)\s*$",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
|
||||
var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList();
|
||||
foreach (var loc in locationsWithSectionHeader)
|
||||
{
|
||||
if (loc != null)
|
||||
{
|
||||
var match = sectionHeaderPattern.Match(loc);
|
||||
// Extract the section header part for better warning message
|
||||
var sectionHeaderPart = match.Value.Trim();
|
||||
result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user