f916cfad6b
This commit restructures the EventOccurrenceParser by breaking down its functionality into modular components, including EventDefinitionResolver, LineClassifier, LocationPatternMatcher, SectionHeaderMatcher, TimeLocationParser, and TimeParser. This refactoring enhances code readability and maintainability, allowing for easier updates and testing. Additionally, the TextUtil class has been updated to include input sanitization methods. Comprehensive unit tests have been added to ensure the correctness of the new parsing logic and to validate the handling of various event occurrence scenarios.
229 lines
7.7 KiB
C#
229 lines
7.7 KiB
C#
using System.Text.RegularExpressions;
|
|
using Core.Entities;
|
|
using Core.Models;
|
|
using EventOccurrenceParsers = Core.Parsers.EventOccurrence;
|
|
using Core.Utility;
|
|
|
|
namespace Core.Parsers;
|
|
|
|
/// <summary>
|
|
/// Result of parsing event occurrence file, containing both occurrences and parsing issues.
|
|
/// </summary>
|
|
public class EventOccurrenceParserResult
|
|
{
|
|
public IDictionary<EventDefinition, List<Entities.EventOccurrence>> Occurrences { get; set; } = new Dictionary<EventDefinition, List<Entities.EventOccurrence>>();
|
|
public List<ParsingIssue> Issues { get; set; } = new();
|
|
}
|
|
|
|
public class EventOccurrenceParser
|
|
{
|
|
private FileSystemInfo _txtFile;
|
|
private ICollection<EventDefinition> _events;
|
|
private LocationParsingConfiguration? _locationConfig;
|
|
|
|
public EventOccurrenceParser(FileSystemInfo txtFile, ICollection<EventDefinition> events, LocationParsingConfiguration? locationConfig = null)
|
|
{
|
|
_events = events;
|
|
_txtFile = txtFile;
|
|
_locationConfig = locationConfig;
|
|
}
|
|
|
|
public EventOccurrenceParserResult Parse()
|
|
{
|
|
var result = new EventOccurrenceParserResult();
|
|
var occurrences = result.Occurrences;
|
|
var issues = result.Issues;
|
|
EventDefinition? currentEventDefinition = null;
|
|
|
|
var lines = File.ReadLines(_txtFile.FullName);
|
|
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
|
{
|
|
// Normalize input: trim and normalize hyphens (en-dash, em-dash -> regular hyphen)
|
|
// This allows the grammar parser to assume normalized input
|
|
var normalizedLine = TextUtil.SanitizeInput(line.Trim());
|
|
|
|
// Skip empty lines
|
|
if (EventOccurrenceParsers.LineClassifier.IsEmptyLine(normalizedLine))
|
|
continue;
|
|
|
|
// Skip comment lines (starting with "#") - use grammar parser
|
|
if (EventOccurrenceParsers.LineClassifier.IsCommentLine(normalizedLine))
|
|
continue;
|
|
|
|
// Try to parse occurrence line using grammar parser
|
|
var occurrenceLine = EventOccurrenceGrammar.TryParseOccurrenceLine(normalizedLine);
|
|
if (!occurrenceLine.HasValue)
|
|
{
|
|
// Not an occurrence line, try other line types
|
|
// Try to parse section header using grammar parser
|
|
var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(normalizedLine);
|
|
if (sectionHeader.HasValue)
|
|
{
|
|
var (eventNamePart, schoolLevel) = sectionHeader.Value;
|
|
|
|
// Use fuzzy matching to find the best matching event definition
|
|
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
|
|
if (evt == null)
|
|
{
|
|
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events);
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.UnmatchedLine,
|
|
Message = $"Section header '{eventNamePart} - {schoolLevel}' found but no matching event definition (best match ratio: {bestRatio})"
|
|
});
|
|
continue;
|
|
}
|
|
currentEventDefinition = evt;
|
|
continue;
|
|
}
|
|
|
|
// Check for General Schedule/Session using grammar parser
|
|
if (EventOccurrenceParsers.SectionHeaderMatcher.IsGeneralSchedule(normalizedLine))
|
|
{
|
|
currentEventDefinition = EventDefinition.GeneralSchedule;
|
|
continue;
|
|
}
|
|
|
|
// Also check for simple "MS" or "HS" in line (backward compatibility)
|
|
if (EventOccurrenceParsers.SectionHeaderMatcher.HasSchoolLevel(normalizedLine))
|
|
{
|
|
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
|
|
if (evt == null)
|
|
{
|
|
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events);
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.UnmatchedLine,
|
|
Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {bestRatio})"
|
|
});
|
|
continue;
|
|
}
|
|
currentEventDefinition = evt;
|
|
continue;
|
|
}
|
|
|
|
// Skip continuation lines (lines that look like they're continuing from previous line)
|
|
if (EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern
|
|
// Track as unmatched line if it's not empty
|
|
if (!string.IsNullOrWhiteSpace(normalizedLine))
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.UnmatchedLine,
|
|
Message = "Line does not match expected format (Name Month Day Time/Location)"
|
|
});
|
|
}
|
|
continue;
|
|
}
|
|
|
|
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
|
|
|
|
// Remove weekday suffix from occurrence name if present
|
|
occurrenceName = Regex.Replace(occurrenceName,
|
|
@"(?<Weekday>Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s?$", "").Trim();
|
|
|
|
// Determine event definition based on occurrence name pattern or current section
|
|
EventDefinition? eventDefinition = EventOccurrenceParsers.EventDefinitionResolver.Resolve(occurrenceName, currentEventDefinition);
|
|
|
|
// Track issue if we can't determine the event definition
|
|
if (eventDefinition == null)
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.MissingEventDefinition,
|
|
Message = $"Cannot determine event definition for occurrence: {occurrenceName}"
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// timeAndLocation is already normalized (hyphens normalized) since normalizedLine was sanitized
|
|
|
|
// Parse time and location using configurable patterns
|
|
EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, _locationConfig, out string time, out string location, out bool locationParseSuccess);
|
|
|
|
// Track location parsing failure if patterns are configured but none matched
|
|
if (!locationParseSuccess && !string.IsNullOrWhiteSpace(location))
|
|
{
|
|
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.LocationParseFailure,
|
|
Message = $"Location '{location}' does not match any configured pattern"
|
|
});
|
|
}
|
|
}
|
|
|
|
// Parse date
|
|
DateOnly? startDate = null;
|
|
try
|
|
{
|
|
startDate = TextUtil.ParseDate(month, dayOfMonthStr.ToString(), DateTime.Now.Year);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.DateParseFailure,
|
|
Message = $"Failed to parse date: {ex.Message}"
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Parse time
|
|
TimeOnly? startTime = null;
|
|
try
|
|
{
|
|
startTime = EventOccurrenceParsers.TimeParser.Parse(time);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
issues.Add(new ParsingIssue
|
|
{
|
|
LineNumber = index,
|
|
LineContent = normalizedLine,
|
|
IssueType = ParsingIssueType.TimeParseFailure,
|
|
Message = $"Failed to parse time '{time}': {ex.Message}"
|
|
});
|
|
continue;
|
|
}
|
|
|
|
if (startDate == null || startTime == null)
|
|
continue;
|
|
|
|
var t = new DateTime(startDate.Value, startTime.Value);
|
|
|
|
var eventOccurrence = new Core.Entities.EventOccurrence
|
|
{
|
|
Name = occurrenceName,
|
|
StartTime = t,
|
|
Time = $"{time}",
|
|
Date = $"{month} {dayOfMonthStr}",
|
|
Location = location
|
|
};
|
|
|
|
if (!occurrences.ContainsKey(eventDefinition))
|
|
occurrences.Add(eventDefinition, []);
|
|
occurrences[eventDefinition].Add(eventOccurrence);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
} |