44fd38b7ac
This commit refines the EventOccurrenceParser by replacing boolean flags for school sections with a more robust SchoolLevel enum. The logic for determining whether to skip events based on school level has been encapsulated in dedicated methods, enhancing readability and maintainability. Additionally, the EventOccurrenceParserService has been updated to detect potential missing line breaks in section headers, improving warning reporting. These changes streamline the parsing process and ensure accurate handling of event occurrences based on school level settings.
388 lines
14 KiB
C#
388 lines
14 KiB
C#
using System.Text.RegularExpressions;
|
|
using Core.Entities;
|
|
using Core.Models;
|
|
using EventOccurrenceParsers = Core.Parsers.EventOccurrence;
|
|
using Core.Utility;
|
|
using SchoolLevel = Core.Models.SchoolLevel;
|
|
|
|
namespace Core.Parsers;
|
|
|
|
/// <summary>
|
|
/// Result of parsing event occurrence file, containing both occurrences and parsing issues.
|
|
/// </summary>
|
|
public class EventOccurrenceParserResult
|
|
{
|
|
public IDictionary<EventDefinition, List<Entities.EventOccurrence>> Occurrences { get; set; } = new Dictionary<EventDefinition, List<Entities.EventOccurrence>>();
|
|
public List<ParsingIssue> Issues { get; set; } = new();
|
|
public List<string> SkippedHSSectionHeaders { get; set; } = new();
|
|
public List<string> SkippedMSSectionHeaders { get; set; } = new();
|
|
public int SkippedMSEventCount { get; set; }
|
|
public int SkippedHSEventCount { get; set; }
|
|
}
|
|
|
|
public class EventOccurrenceParser
|
|
{
|
|
private FileSystemInfo _txtFile;
|
|
private ICollection<EventDefinition> _events;
|
|
private SchoolLevel? _schoolLevel;
|
|
|
|
public EventOccurrenceParser(FileSystemInfo txtFile, ICollection<EventDefinition> events, SchoolLevel? schoolLevel = null)
|
|
{
|
|
_events = events;
|
|
_txtFile = txtFile;
|
|
_schoolLevel = schoolLevel;
|
|
}
|
|
|
|
public EventOccurrenceParserResult Parse()
|
|
{
|
|
var result = new EventOccurrenceParserResult();
|
|
var occurrences = result.Occurrences;
|
|
var issues = result.Issues;
|
|
EventDefinition? currentEventDefinition = null;
|
|
bool inContinuationMode = false;
|
|
SchoolLevel? currentSectionLevel = null;
|
|
|
|
var lines = File.ReadLines(_txtFile.FullName);
|
|
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
|
{
|
|
// Normalize input: trim and normalize hyphens (en-dash, em-dash -> regular hyphen)
|
|
// This allows the grammar parser to assume normalized input
|
|
var normalizedLine = TextUtil.SanitizeInput(line.Trim());
|
|
|
|
// Skip empty lines
|
|
if (EventOccurrenceParsers.LineClassifier.IsEmptyLine(normalizedLine))
|
|
{
|
|
// Empty lines break continuation mode
|
|
inContinuationMode = false;
|
|
continue;
|
|
}
|
|
|
|
// Skip comment lines (starting with "#") - use grammar parser
|
|
if (EventOccurrenceParsers.LineClassifier.IsCommentLine(normalizedLine))
|
|
{
|
|
// Comment lines break continuation mode
|
|
inContinuationMode = false;
|
|
continue;
|
|
}
|
|
|
|
// Try to parse occurrence line using grammar parser
|
|
var occurrenceLine = EventOccurrenceGrammar.TryParseOccurrenceLine(normalizedLine);
|
|
if (!occurrenceLine.HasValue)
|
|
{
|
|
// Not an occurrence line, try other line types
|
|
// Try to parse section header using grammar parser
|
|
var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(normalizedLine);
|
|
if (sectionHeader.HasValue)
|
|
{
|
|
var (eventNamePart, schoolLevel) = sectionHeader.Value;
|
|
|
|
// Section headers break continuation mode
|
|
inContinuationMode = false;
|
|
|
|
// Convert string school level to enum
|
|
var sectionSchoolLevel = SetCurrentSectionLevel(schoolLevel);
|
|
|
|
// Determine if we should skip this event based on chapter's school level setting
|
|
if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result))
|
|
{
|
|
currentEventDefinition = null; // Skip subsequent occurrences
|
|
currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section
|
|
continue; // No issue created
|
|
}
|
|
|
|
// Set current section level for events we're processing
|
|
currentSectionLevel = sectionSchoolLevel;
|
|
|
|
// Use fuzzy matching to find the best matching event definition
|
|
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
|
|
if (evt == null)
|
|
{
|
|
// For unmatched headers, create issue
|
|
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events);
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.UnmatchedLine,
|
|
Message = $"Section header '{eventNamePart} - {schoolLevel}' found but no matching event definition (best match ratio: {bestRatio})"
|
|
});
|
|
continue;
|
|
}
|
|
currentEventDefinition = evt;
|
|
continue;
|
|
}
|
|
|
|
// Check for General Schedule/Session using grammar parser
|
|
if (EventOccurrenceParsers.SectionHeaderMatcher.IsGeneralSchedule(normalizedLine))
|
|
{
|
|
// General schedule breaks continuation mode
|
|
inContinuationMode = false;
|
|
currentSectionLevel = null; // Reset section level
|
|
currentEventDefinition = EventDefinition.GeneralSchedule;
|
|
continue;
|
|
}
|
|
|
|
// Also check for simple "MS" or "HS" in line (backward compatibility)
|
|
if (EventOccurrenceParsers.SectionHeaderMatcher.HasSchoolLevel(normalizedLine))
|
|
{
|
|
// Section headers break continuation mode
|
|
inContinuationMode = false;
|
|
|
|
// Extract school level from line
|
|
SchoolLevel? sectionSchoolLevel = null;
|
|
if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase))
|
|
sectionSchoolLevel = SchoolLevel.MiddleSchool;
|
|
else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
|
sectionSchoolLevel = SchoolLevel.HighSchool;
|
|
|
|
// Determine if we should skip this event based on chapter's school level setting
|
|
if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result))
|
|
{
|
|
currentEventDefinition = null; // Skip subsequent occurrences
|
|
currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section
|
|
continue; // No issue created
|
|
}
|
|
|
|
// Set current section level for events we're processing
|
|
currentSectionLevel = sectionSchoolLevel;
|
|
|
|
// Use fuzzy matching to find the best matching event definition
|
|
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
|
|
if (evt == null)
|
|
{
|
|
// For unmatched headers, create issue
|
|
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events);
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.UnmatchedLine,
|
|
Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {bestRatio})"
|
|
});
|
|
continue;
|
|
}
|
|
currentEventDefinition = evt;
|
|
continue;
|
|
}
|
|
|
|
// Check if line starts with "*" to enter continuation mode
|
|
if (normalizedLine.TrimStart().StartsWith("*", StringComparison.Ordinal))
|
|
{
|
|
inContinuationMode = true;
|
|
}
|
|
|
|
// Skip continuation lines (in continuation mode OR line starts with "*" or is parenthetical)
|
|
if (inContinuationMode || EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern
|
|
// Track as unmatched line if it's not empty
|
|
if (!string.IsNullOrWhiteSpace(normalizedLine))
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.UnmatchedLine,
|
|
Message = "Line does not match expected format (Name Month Day Time/Location)"
|
|
});
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Occurrence lines break continuation mode
|
|
inContinuationMode = false;
|
|
|
|
// Skip occurrences under sections that don't match the school level setting
|
|
if (ShouldSkipOccurrence(currentSectionLevel, result))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
|
|
|
|
// Remove weekday suffix from occurrence name if present
|
|
occurrenceName = Regex.Replace(occurrenceName,
|
|
@"(?<Weekday>Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s?$", "").Trim();
|
|
|
|
// Determine event definition based on occurrence name pattern or current section
|
|
EventDefinition? eventDefinition = EventOccurrenceParsers.EventDefinitionResolver.Resolve(occurrenceName, currentEventDefinition);
|
|
|
|
// Track issue if we can't determine the event definition
|
|
if (eventDefinition == null)
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.MissingEventDefinition,
|
|
Message = $"Cannot determine event definition for occurrence: {occurrenceName}"
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// timeAndLocation is already normalized (hyphens normalized) since normalizedLine was sanitized
|
|
|
|
// Parse time and location - extract time using regex, then use everything after time as location
|
|
EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, out string time, out string location);
|
|
|
|
// Parse date
|
|
DateOnly? startDate = null;
|
|
try
|
|
{
|
|
startDate = TextUtil.ParseDate(month, dayOfMonthStr.ToString(), DateTime.Now.Year);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.DateParseFailure,
|
|
Message = $"Failed to parse date: {ex.Message}"
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Parse time
|
|
TimeOnly? startTime = null;
|
|
try
|
|
{
|
|
startTime = EventOccurrenceParsers.TimeParser.Parse(time);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.TimeParseFailure,
|
|
Message = $"Failed to parse time '{time}': {ex.Message}"
|
|
});
|
|
continue;
|
|
}
|
|
|
|
if (startDate == null || startTime == null)
|
|
continue;
|
|
|
|
var t = new DateTime(startDate.Value, startTime.Value);
|
|
|
|
var eventOccurrence = new Core.Entities.EventOccurrence
|
|
{
|
|
Name = occurrenceName,
|
|
StartTime = t,
|
|
Time = $"{time}",
|
|
Date = $"{month} {dayOfMonthStr}",
|
|
Location = location
|
|
};
|
|
|
|
if (!occurrences.ContainsKey(eventDefinition))
|
|
occurrences.Add(eventDefinition, []);
|
|
occurrences[eventDefinition].Add(eventOccurrence);
|
|
|
|
// Reset section level when we successfully parse an occurrence (means we're in a valid section)
|
|
currentSectionLevel = null;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Determines if a section should be skipped based on chapter's school level setting.
|
|
/// Handles backward compatibility (null school level = skip HS).
|
|
/// </summary>
|
|
/// <param name="sectionSchoolLevel">The school level of the section (null if no school level designation).</param>
|
|
/// <param name="normalizedLine">The normalized line content for tracking skipped headers.</param>
|
|
/// <param name="result">The parse result to update with skipped headers.</param>
|
|
/// <returns>True if the section should be skipped, false otherwise.</returns>
|
|
private bool ShouldSkipSection(SchoolLevel? sectionSchoolLevel, string normalizedLine, EventOccurrenceParserResult result)
|
|
{
|
|
if (!sectionSchoolLevel.HasValue)
|
|
return false; // Events without school level are never skipped
|
|
|
|
if (_schoolLevel.HasValue)
|
|
{
|
|
// School level is set - filter based on it
|
|
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && sectionSchoolLevel.Value == SchoolLevel.HighSchool)
|
|
{
|
|
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
|
return true;
|
|
}
|
|
if (_schoolLevel.Value == SchoolLevel.HighSchool && sectionSchoolLevel.Value == SchoolLevel.MiddleSchool)
|
|
{
|
|
result.SkippedMSSectionHeaders.Add(normalizedLine);
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// No school level set - backward compatibility: skip HS events
|
|
if (sectionSchoolLevel.Value == SchoolLevel.HighSchool)
|
|
{
|
|
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts string school level ("MS", "HS", or empty) to SchoolLevel?.
|
|
/// </summary>
|
|
/// <param name="schoolLevelStr">The school level string from the section header.</param>
|
|
/// <returns>SchoolLevel? representing the school level, or null if no school level designation.</returns>
|
|
private static SchoolLevel? SetCurrentSectionLevel(string schoolLevelStr)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(schoolLevelStr))
|
|
return null;
|
|
|
|
if (schoolLevelStr.Equals("MS", StringComparison.OrdinalIgnoreCase))
|
|
return SchoolLevel.MiddleSchool;
|
|
|
|
if (schoolLevelStr.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
|
return SchoolLevel.HighSchool;
|
|
|
|
return null;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Checks if current occurrence should be skipped based on section level.
|
|
/// </summary>
|
|
/// <param name="currentSectionLevel">The current section level.</param>
|
|
/// <param name="result">The parse result to update with skip counts.</param>
|
|
/// <returns>True if the occurrence should be skipped, false otherwise.</returns>
|
|
private bool ShouldSkipOccurrence(SchoolLevel? currentSectionLevel, EventOccurrenceParserResult result)
|
|
{
|
|
if (!currentSectionLevel.HasValue)
|
|
return false; // Events without school level are never skipped
|
|
|
|
if (_schoolLevel.HasValue)
|
|
{
|
|
// School level is set - filter based on it
|
|
if (_schoolLevel.Value == SchoolLevel.MiddleSchool && currentSectionLevel.Value == SchoolLevel.HighSchool)
|
|
{
|
|
result.SkippedHSEventCount++;
|
|
return true;
|
|
}
|
|
if (_schoolLevel.Value == SchoolLevel.HighSchool && currentSectionLevel.Value == SchoolLevel.MiddleSchool)
|
|
{
|
|
result.SkippedMSEventCount++;
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// If no school level is set, skip HS sections (backward compatibility)
|
|
if (currentSectionLevel.Value == SchoolLevel.HighSchool)
|
|
{
|
|
result.SkippedHSEventCount++;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
} |