Enhance event occurrence parsing with detailed issue reporting and location configuration
This commit introduces a new structure for handling parsing issues in the EventOccurrenceParser, allowing for detailed reporting of parsing problems such as unmatched lines, missing event definitions, and parsing failures for time, date, and location. A new ParsingIssue class has been added to encapsulate these details. Additionally, a LocationParsingConfiguration class has been implemented to support customizable location patterns, enhancing the flexibility of the parser. The EventOccurrenceParserService has been updated to utilize this configuration, and new tests have been added to ensure robust issue detection and reporting. Furthermore, the UI has been updated to display parsing issues, improving user feedback during the import process.
This commit is contained in:
@@ -1,18 +1,30 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Core.Entities;
|
||||
using Core.Models;
|
||||
using FuzzySharp;
|
||||
|
||||
namespace Core.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Result of parsing event occurrence file, containing both occurrences and parsing issues.
|
||||
/// </summary>
|
||||
public class EventOccurrenceParserResult
|
||||
{
|
||||
public IDictionary<EventDefinition, List<EventOccurrence>> Occurrences { get; set; } = new Dictionary<EventDefinition, List<EventOccurrence>>();
|
||||
public List<ParsingIssue> Issues { get; set; } = new();
|
||||
}
|
||||
|
||||
public class EventOccurrenceParser
|
||||
{
|
||||
private FileSystemInfo _txtFile;
|
||||
private ICollection<EventDefinition> _events;
|
||||
private LocationParsingConfiguration? _locationConfig;
|
||||
|
||||
public EventOccurrenceParser(FileSystemInfo txtFile, ICollection<EventDefinition> events)
|
||||
public EventOccurrenceParser(FileSystemInfo txtFile, ICollection<EventDefinition> events, LocationParsingConfiguration? locationConfig = null)
|
||||
{
|
||||
_events = events;
|
||||
_txtFile = txtFile;
|
||||
_locationConfig = locationConfig;
|
||||
}
|
||||
|
||||
private Regex _re =
|
||||
@@ -26,40 +38,74 @@ public class EventOccurrenceParser
|
||||
|
||||
private readonly Regex _timeRe = new(@"(?<Hour>\d{1,2}):?(?<Minute>\d{2})?\s?(?<APM>(?:a|p)\.?m\.?)");
|
||||
|
||||
private readonly Regex _timeLocationRegex = new(@"(?<Time>.*(?>[AaPp]\.?[Mm]\.?))(?<Location>[\s\t].*)?");
|
||||
// Regex to match time ranges like "10:30 a.m. - 12:00 p.m." or "10:30 a.m. - NOON"
|
||||
// Matches: time1 (optional dash time2/NOON), then location
|
||||
// The time group captures the full time range (including " - NOON" if present)
|
||||
// Pattern breakdown:
|
||||
// - First time: (?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?)) - matches NOON or time with AM/PM
|
||||
// - Optional range: (?:\s*[–-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?))) - matches dash followed by NOON or time
|
||||
// - Location: \s+.+ - whitespace followed by rest of string
|
||||
private readonly Regex _timeLocationRegex = new(@"(?<Time>(?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?))(?:\s*[–-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?)))?)(?<Location>\s+.+)?");
|
||||
|
||||
public IDictionary<EventDefinition, List<EventOccurrence>> Parse()
|
||||
public EventOccurrenceParserResult Parse()
|
||||
{
|
||||
var occurrences = new Dictionary<EventDefinition, List<EventOccurrence>>();
|
||||
var result = new EventOccurrenceParserResult();
|
||||
var occurrences = result.Occurrences;
|
||||
var issues = result.Issues;
|
||||
EventDefinition? currentEventDefinition = null;
|
||||
|
||||
var lines = File.ReadLines(_txtFile.FullName);
|
||||
foreach (var line in lines)
|
||||
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
||||
{
|
||||
var match = _re.Match(line);
|
||||
var trimmedLine = line.Trim();
|
||||
|
||||
// Skip empty lines
|
||||
if (string.IsNullOrWhiteSpace(trimmedLine))
|
||||
continue;
|
||||
|
||||
var match = _re.Match(trimmedLine);
|
||||
if (!match.Success)
|
||||
{
|
||||
if (line.Contains("MS"))
|
||||
if (trimmedLine.Contains("MS"))
|
||||
{
|
||||
var evt =
|
||||
(from e in _events
|
||||
let rat = Fuzz.Ratio(e.Name, line.Trim())
|
||||
let rat = Fuzz.Ratio(e.Name, trimmedLine)
|
||||
where rat > 50
|
||||
orderby rat descending
|
||||
select e).FirstOrDefault();
|
||||
if (evt == null)
|
||||
{
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.UnmatchedLine,
|
||||
Message = $"Section header with 'MS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
currentEventDefinition = evt;
|
||||
continue;
|
||||
}
|
||||
if (line == "General Schedule" || line == "General Session")
|
||||
if (trimmedLine == "General Schedule" || trimmedLine == "General Session")
|
||||
{
|
||||
currentEventDefinition = EventDefinition.GeneralSchedule;
|
||||
continue;
|
||||
}
|
||||
|
||||
// "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern
|
||||
// Continue without setting currentEventDefinition for this section
|
||||
// Track as unmatched line if it's not empty
|
||||
if (!string.IsNullOrWhiteSpace(trimmedLine))
|
||||
{
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.UnmatchedLine,
|
||||
Message = "Line does not match expected format (Name Month Day Time/Location)"
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -74,30 +120,71 @@ public class EventOccurrenceParser
|
||||
// Determine event definition based on occurrence name pattern or current section
|
||||
EventDefinition? eventDefinition = DetermineEventDefinition(occurrenceName, currentEventDefinition);
|
||||
|
||||
// Skip if we can't determine the event definition
|
||||
// Track issue if we can't determine the event definition
|
||||
if (eventDefinition == null)
|
||||
continue;
|
||||
|
||||
timeAndLocation = SanitizeInput(timeAndLocation);
|
||||
var timeAndLocationMatch = _timeLocationRegex.Match(timeAndLocation);
|
||||
|
||||
var time = timeAndLocation;
|
||||
var location = string.Empty;
|
||||
|
||||
if (timeAndLocationMatch.Success)
|
||||
{
|
||||
time= timeAndLocationMatch.Groups["Time"].Captures[0].Value;
|
||||
if (timeAndLocationMatch.Groups["Location"].Success)
|
||||
location = timeAndLocationMatch.Groups["Location"].Captures[0].Value;
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.MissingEventDefinition,
|
||||
Message = $"Cannot determine event definition for occurrence: {occurrenceName}"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
var startDate = ParseDate(month, dayOfMonth, DateTime.Now.Year);
|
||||
var startTime = ParseStartTime(time);
|
||||
var t = new DateTime(startDate, startTime);
|
||||
timeAndLocation = SanitizeInput(timeAndLocation);
|
||||
|
||||
// Parse time and location using configurable patterns
|
||||
var (time, location, locationParseSuccess) = ParseTimeAndLocation(timeAndLocation, index, trimmedLine, issues);
|
||||
|
||||
// Parse date
|
||||
DateOnly? startDate = null;
|
||||
try
|
||||
{
|
||||
startDate = ParseDate(month, dayOfMonth, DateTime.Now.Year);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.DateParseFailure,
|
||||
Message = $"Failed to parse date: {ex.Message}"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse time
|
||||
TimeOnly? startTime = null;
|
||||
try
|
||||
{
|
||||
startTime = ParseStartTime(time);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.TimeParseFailure,
|
||||
Message = $"Failed to parse time '{time}': {ex.Message}"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (startDate == null || startTime == null)
|
||||
continue;
|
||||
|
||||
var t = new DateTime(startDate.Value, startTime.Value);
|
||||
|
||||
var eventOccurrence = new EventOccurrence
|
||||
{
|
||||
Name = occurrenceName, StartTime = t, Time = $"{time}", Date = $"{month} {dayOfMonth}",
|
||||
Name = occurrenceName,
|
||||
StartTime = t,
|
||||
Time = $"{time}",
|
||||
Date = $"{month} {dayOfMonth}",
|
||||
Location = location
|
||||
};
|
||||
|
||||
@@ -106,7 +193,7 @@ public class EventOccurrenceParser
|
||||
occurrences[eventDefinition].Add(eventOccurrence);
|
||||
}
|
||||
|
||||
return occurrences;
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -174,6 +261,89 @@ public class EventOccurrenceParser
|
||||
return new DateOnly(year, monthNum, day); ;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses time and location from the timeAndLocation string using configurable location patterns.
|
||||
/// </summary>
|
||||
private (string time, string location, bool locationParseSuccess) ParseTimeAndLocation(
|
||||
string timeAndLocation,
|
||||
int lineNumber,
|
||||
string lineContent,
|
||||
List<ParsingIssue> issues)
|
||||
{
|
||||
var time = timeAndLocation;
|
||||
var location = string.Empty;
|
||||
var locationParseSuccess = false;
|
||||
|
||||
// First, try to separate time from location using the time regex
|
||||
var timeLocationMatch = _timeLocationRegex.Match(timeAndLocation);
|
||||
|
||||
if (timeLocationMatch.Success)
|
||||
{
|
||||
time = timeLocationMatch.Groups["Time"].Captures[0].Value.Trim();
|
||||
var locationPart = timeLocationMatch.Groups["Location"].Success
|
||||
? timeLocationMatch.Groups["Location"].Captures[0].Value.Trim()
|
||||
: string.Empty;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(locationPart))
|
||||
{
|
||||
// Try to match location using configurable patterns
|
||||
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
|
||||
{
|
||||
location = MatchLocationPattern(locationPart, _locationConfig.LocationPatterns);
|
||||
locationParseSuccess = !string.IsNullOrEmpty(location);
|
||||
}
|
||||
|
||||
// If no pattern matched, fall back to using the location part as-is
|
||||
if (!locationParseSuccess)
|
||||
{
|
||||
location = locationPart;
|
||||
// Only add issue if we have patterns configured but none matched
|
||||
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
|
||||
{
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = lineNumber,
|
||||
LineContent = lineContent,
|
||||
IssueType = ParsingIssueType.LocationParseFailure,
|
||||
Message = $"Location '{locationPart}' does not match any configured pattern"
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// If time regex doesn't match, use the whole string as time
|
||||
time = timeAndLocation.Trim();
|
||||
}
|
||||
|
||||
return (time, location, locationParseSuccess || string.IsNullOrWhiteSpace(location));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Matches location text against configured patterns and returns the matched location.
|
||||
/// </summary>
|
||||
private string MatchLocationPattern(string locationText, List<string> patterns)
|
||||
{
|
||||
foreach (var pattern in patterns)
|
||||
{
|
||||
if (!pattern.Contains('*'))
|
||||
continue;
|
||||
|
||||
// Convert pattern to regex: escape special chars, replace * with .*
|
||||
var escapedPattern = Regex.Escape(pattern);
|
||||
escapedPattern = escapedPattern.Replace(@"\*", ".*");
|
||||
|
||||
var regex = new Regex($"^{escapedPattern}$", RegexOptions.IgnoreCase);
|
||||
if (regex.IsMatch(locationText))
|
||||
{
|
||||
return locationText; // Return the full matched location
|
||||
}
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
private TimeOnly ParseStartTime(string time)
|
||||
{
|
||||
int hour = 0;
|
||||
@@ -202,6 +372,10 @@ public class EventOccurrenceParser
|
||||
if (timeMatch.Groups["APM"].Captures[0].Value is "p.m." or "pm" && hour < 12)
|
||||
hour += 12;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FormatException($"Time format not recognized: {time}");
|
||||
}
|
||||
}
|
||||
|
||||
return new TimeOnly(hour, minute, 0);
|
||||
|
||||
Reference in New Issue
Block a user