Refactor event occurrence parsing to improve school level handling and section skipping
This commit refines the EventOccurrenceParser by replacing boolean flags for school sections with a more robust SchoolLevel enum. The logic for determining whether to skip events based on school level has been encapsulated in dedicated methods, enhancing readability and maintainability. Additionally, the EventOccurrenceParserService has been updated to detect potential missing line breaks in section headers, improving warning reporting. These changes streamline the parsing process and ensure accurate handling of event occurrences based on school level settings.
This commit is contained in:
@@ -40,8 +40,7 @@ public class EventOccurrenceParser
|
|||||||
var issues = result.Issues;
|
var issues = result.Issues;
|
||||||
EventDefinition? currentEventDefinition = null;
|
EventDefinition? currentEventDefinition = null;
|
||||||
bool inContinuationMode = false;
|
bool inContinuationMode = false;
|
||||||
bool inHSSection = false;
|
SchoolLevel? currentSectionLevel = null;
|
||||||
bool inMSSection = false;
|
|
||||||
|
|
||||||
var lines = File.ReadLines(_txtFile.FullName);
|
var lines = File.ReadLines(_txtFile.FullName);
|
||||||
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
||||||
@@ -80,65 +79,19 @@ public class EventOccurrenceParser
|
|||||||
// Section headers break continuation mode
|
// Section headers break continuation mode
|
||||||
inContinuationMode = false;
|
inContinuationMode = false;
|
||||||
|
|
||||||
// Determine if we should skip this event based on chapter's school level setting
|
// Convert string school level to enum
|
||||||
bool shouldSkip = false;
|
var sectionSchoolLevel = SetCurrentSectionLevel(schoolLevel);
|
||||||
if (!string.IsNullOrWhiteSpace(schoolLevel))
|
|
||||||
{
|
|
||||||
if (_schoolLevel.HasValue)
|
|
||||||
{
|
|
||||||
// School level is set - filter based on it
|
|
||||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool &&
|
|
||||||
schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
shouldSkip = true;
|
|
||||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
|
||||||
inHSSection = true;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
else if (_schoolLevel.Value == SchoolLevel.HighSchool &&
|
|
||||||
schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
shouldSkip = true;
|
|
||||||
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
|
||||||
inMSSection = true;
|
|
||||||
inHSSection = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// No school level set - backward compatibility: skip HS events
|
|
||||||
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
shouldSkip = true;
|
|
||||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
|
||||||
inHSSection = true;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldSkip)
|
// Determine if we should skip this event based on chapter's school level setting
|
||||||
|
if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result))
|
||||||
{
|
{
|
||||||
currentEventDefinition = null; // Skip subsequent occurrences
|
currentEventDefinition = null; // Skip subsequent occurrences
|
||||||
|
currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section
|
||||||
continue; // No issue created
|
continue; // No issue created
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset section flags for events we're processing
|
// Set current section level for events we're processing
|
||||||
if (schoolLevel.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
currentSectionLevel = sectionSchoolLevel;
|
||||||
{
|
|
||||||
inMSSection = true;
|
|
||||||
inHSSection = false;
|
|
||||||
}
|
|
||||||
else if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
inHSSection = true;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
inHSSection = false;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use fuzzy matching to find the best matching event definition
|
// Use fuzzy matching to find the best matching event definition
|
||||||
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
|
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
|
||||||
@@ -164,8 +117,7 @@ public class EventOccurrenceParser
|
|||||||
{
|
{
|
||||||
// General schedule breaks continuation mode
|
// General schedule breaks continuation mode
|
||||||
inContinuationMode = false;
|
inContinuationMode = false;
|
||||||
inHSSection = false; // Reset section flags
|
currentSectionLevel = null; // Reset section level
|
||||||
inMSSection = false;
|
|
||||||
currentEventDefinition = EventDefinition.GeneralSchedule;
|
currentEventDefinition = EventDefinition.GeneralSchedule;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -176,62 +128,23 @@ public class EventOccurrenceParser
|
|||||||
// Section headers break continuation mode
|
// Section headers break continuation mode
|
||||||
inContinuationMode = false;
|
inContinuationMode = false;
|
||||||
|
|
||||||
// Determine if we should skip this event based on chapter's school level setting
|
// Extract school level from line
|
||||||
bool shouldSkip = false;
|
SchoolLevel? sectionSchoolLevel = null;
|
||||||
if (_schoolLevel.HasValue)
|
if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
||||||
{
|
sectionSchoolLevel = SchoolLevel.MiddleSchool;
|
||||||
// School level is set - filter based on it
|
else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool &&
|
sectionSchoolLevel = SchoolLevel.HighSchool;
|
||||||
normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
shouldSkip = true;
|
|
||||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
|
||||||
inHSSection = true;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
else if (_schoolLevel.Value == SchoolLevel.HighSchool &&
|
|
||||||
normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
shouldSkip = true;
|
|
||||||
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
|
||||||
inMSSection = true;
|
|
||||||
inHSSection = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// No school level set - backward compatibility: skip HS events
|
|
||||||
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
shouldSkip = true;
|
|
||||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
|
||||||
inHSSection = true;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldSkip)
|
// Determine if we should skip this event based on chapter's school level setting
|
||||||
|
if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result))
|
||||||
{
|
{
|
||||||
currentEventDefinition = null; // Skip subsequent occurrences
|
currentEventDefinition = null; // Skip subsequent occurrences
|
||||||
|
currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section
|
||||||
continue; // No issue created
|
continue; // No issue created
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset section flags for events we're processing
|
// Set current section level for events we're processing
|
||||||
if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
currentSectionLevel = sectionSchoolLevel;
|
||||||
{
|
|
||||||
inMSSection = true;
|
|
||||||
inHSSection = false;
|
|
||||||
}
|
|
||||||
else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
inHSSection = true;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
inHSSection = false;
|
|
||||||
inMSSection = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use fuzzy matching to find the best matching event definition
|
// Use fuzzy matching to find the best matching event definition
|
||||||
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
|
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
|
||||||
@@ -283,28 +196,10 @@ public class EventOccurrenceParser
|
|||||||
inContinuationMode = false;
|
inContinuationMode = false;
|
||||||
|
|
||||||
// Skip occurrences under sections that don't match the school level setting
|
// Skip occurrences under sections that don't match the school level setting
|
||||||
if (_schoolLevel.HasValue)
|
if (ShouldSkipOccurrence(currentSectionLevel, result))
|
||||||
{
|
{
|
||||||
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && inHSSection)
|
|
||||||
{
|
|
||||||
result.SkippedHSEventCount++;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (_schoolLevel.Value == SchoolLevel.HighSchool && inMSSection)
|
|
||||||
{
|
|
||||||
result.SkippedMSEventCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// If no school level is set, skip HS sections (backward compatibility)
|
|
||||||
if (inHSSection)
|
|
||||||
{
|
|
||||||
result.SkippedHSEventCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
|
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
|
||||||
|
|
||||||
@@ -387,10 +282,107 @@ public class EventOccurrenceParser
|
|||||||
occurrences.Add(eventDefinition, []);
|
occurrences.Add(eventDefinition, []);
|
||||||
occurrences[eventDefinition].Add(eventOccurrence);
|
occurrences[eventDefinition].Add(eventOccurrence);
|
||||||
|
|
||||||
// Reset HS section flag when we successfully parse an occurrence (means we're in a valid section)
|
// Reset section level when we successfully parse an occurrence (means we're in a valid section)
|
||||||
inHSSection = false;
|
currentSectionLevel = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Determines if a section should be skipped based on chapter's school level setting.
|
||||||
|
/// Handles backward compatibility (null school level = skip HS).
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="sectionSchoolLevel">The school level of the section (null if no school level designation).</param>
|
||||||
|
/// <param name="normalizedLine">The normalized line content for tracking skipped headers.</param>
|
||||||
|
/// <param name="result">The parse result to update with skipped headers.</param>
|
||||||
|
/// <returns>True if the section should be skipped, false otherwise.</returns>
|
||||||
|
private bool ShouldSkipSection(SchoolLevel? sectionSchoolLevel, string normalizedLine, EventOccurrenceParserResult result)
|
||||||
|
{
|
||||||
|
if (!sectionSchoolLevel.HasValue)
|
||||||
|
return false; // Events without school level are never skipped
|
||||||
|
|
||||||
|
if (_schoolLevel.HasValue)
|
||||||
|
{
|
||||||
|
// School level is set - filter based on it
|
||||||
|
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && sectionSchoolLevel.Value == SchoolLevel.HighSchool)
|
||||||
|
{
|
||||||
|
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (_schoolLevel.Value == SchoolLevel.HighSchool && sectionSchoolLevel.Value == SchoolLevel.MiddleSchool)
|
||||||
|
{
|
||||||
|
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// No school level set - backward compatibility: skip HS events
|
||||||
|
if (sectionSchoolLevel.Value == SchoolLevel.HighSchool)
|
||||||
|
{
|
||||||
|
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Converts string school level ("MS", "HS", or empty) to SchoolLevel?.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="schoolLevelStr">The school level string from the section header.</param>
|
||||||
|
/// <returns>SchoolLevel? representing the school level, or null if no school level designation.</returns>
|
||||||
|
private static SchoolLevel? SetCurrentSectionLevel(string schoolLevelStr)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(schoolLevelStr))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (schoolLevelStr.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
||||||
|
return SchoolLevel.MiddleSchool;
|
||||||
|
|
||||||
|
if (schoolLevelStr.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||||
|
return SchoolLevel.HighSchool;
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Checks if current occurrence should be skipped based on section level.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="currentSectionLevel">The current section level.</param>
|
||||||
|
/// <param name="result">The parse result to update with skip counts.</param>
|
||||||
|
/// <returns>True if the occurrence should be skipped, false otherwise.</returns>
|
||||||
|
private bool ShouldSkipOccurrence(SchoolLevel? currentSectionLevel, EventOccurrenceParserResult result)
|
||||||
|
{
|
||||||
|
if (!currentSectionLevel.HasValue)
|
||||||
|
return false; // Events without school level are never skipped
|
||||||
|
|
||||||
|
if (_schoolLevel.HasValue)
|
||||||
|
{
|
||||||
|
// School level is set - filter based on it
|
||||||
|
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && currentSectionLevel.Value == SchoolLevel.HighSchool)
|
||||||
|
{
|
||||||
|
result.SkippedHSEventCount++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (_schoolLevel.Value == SchoolLevel.HighSchool && currentSectionLevel.Value == SchoolLevel.MiddleSchool)
|
||||||
|
{
|
||||||
|
result.SkippedMSEventCount++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// If no school level is set, skip HS sections (backward compatibility)
|
||||||
|
if (currentSectionLevel.Value == SchoolLevel.HighSchool)
|
||||||
|
{
|
||||||
|
result.SkippedHSEventCount++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -197,6 +197,26 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService
|
|||||||
result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'");
|
result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for section header patterns (missing line break detection)
|
||||||
|
// Pattern matches: text ending with " - MS", " - HS"
|
||||||
|
// This indicates a missing line break where the next section header was concatenated to the location
|
||||||
|
// Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens
|
||||||
|
var sectionHeaderPattern = new Regex(
|
||||||
|
@"-\s*(MS|HS)\s*$",
|
||||||
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||||
|
|
||||||
|
var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList();
|
||||||
|
foreach (var loc in locationsWithSectionHeader)
|
||||||
|
{
|
||||||
|
if (loc != null)
|
||||||
|
{
|
||||||
|
var match = sectionHeaderPattern.Match(loc);
|
||||||
|
// Extract the section header part for better warning message
|
||||||
|
var sectionHeaderPart = match.Value.Trim();
|
||||||
|
result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted.");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user