Refactor event occurrence parsing to use dynamic month parsers and improve input normalization
This commit replaces individual month parsers with a dynamic array of month names, enhancing maintainability. The EventOccurrenceParser has been updated to utilize this new structure, ensuring consistent parsing of month names. Additionally, input normalization has been improved by standardizing hyphen handling and ensuring that all relevant parsing methods utilize the sanitized input. This change streamlines the parsing process and enhances overall robustness.
This commit is contained in:
@@ -10,35 +10,27 @@ namespace Core.Parsers;
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public static class EventOccurrenceGrammar
|
public static class EventOccurrenceGrammar
|
||||||
{
|
{
|
||||||
// Months - all 12 months supported
|
/// <summary>
|
||||||
private static readonly Parser<string> January = Parse.String("January").Text().Token();
|
/// Array of all month names in order (January through December).
|
||||||
private static readonly Parser<string> February = Parse.String("February").Text().Token();
|
/// This is the single source of truth for month names used throughout the parser.
|
||||||
private static readonly Parser<string> March = Parse.String("March").Text().Token();
|
/// </summary>
|
||||||
private static readonly Parser<string> April = Parse.String("April").Text().Token();
|
public static readonly string[] MonthNames = new[]
|
||||||
private static readonly Parser<string> May = Parse.String("May").Text().Token();
|
{
|
||||||
private static readonly Parser<string> June = Parse.String("June").Text().Token();
|
"January", "February", "March", "April", "May", "June",
|
||||||
private static readonly Parser<string> July = Parse.String("July").Text().Token();
|
"July", "August", "September", "October", "November", "December"
|
||||||
private static readonly Parser<string> August = Parse.String("August").Text().Token();
|
};
|
||||||
private static readonly Parser<string> September = Parse.String("September").Text().Token();
|
|
||||||
private static readonly Parser<string> October = Parse.String("October").Text().Token();
|
// Build month parsers dynamically from MonthNames array
|
||||||
private static readonly Parser<string> November = Parse.String("November").Text().Token();
|
private static readonly Parser<string>[] MonthParsers = MonthNames
|
||||||
private static readonly Parser<string> December = Parse.String("December").Text().Token();
|
.Select(month => Parse.String(month).Text().Token())
|
||||||
|
.ToArray();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parser for month names (January through December).
|
/// Parser for month names (January through December).
|
||||||
|
/// Built dynamically from MonthNames array.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static readonly Parser<string> Month = January
|
public static readonly Parser<string> Month = MonthParsers
|
||||||
.Or(February)
|
.Aggregate((current, next) => current.Or(next));
|
||||||
.Or(March)
|
|
||||||
.Or(April)
|
|
||||||
.Or(May)
|
|
||||||
.Or(June)
|
|
||||||
.Or(July)
|
|
||||||
.Or(August)
|
|
||||||
.Or(September)
|
|
||||||
.Or(October)
|
|
||||||
.Or(November)
|
|
||||||
.Or(December);
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parser for day of month (1-31, optional semicolon).
|
/// Parser for day of month (1-31, optional semicolon).
|
||||||
@@ -66,9 +58,10 @@ public static class EventOccurrenceGrammar
|
|||||||
select $"{hour}:{(minute.IsDefined ? minute.Get() : "00")} {ampm}";
|
select $"{hour}:{(minute.IsDefined ? minute.Get() : "00")} {ampm}";
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parser for hyphen characters (en-dash, hyphen, em-dash).
|
/// Parser for hyphen character.
|
||||||
|
/// Note: Input is assumed to be normalized (en-dash and em-dash converted to regular hyphen) via SanitizeInput.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static readonly Parser<char> Hyphen = Parse.Char('–').Or(Parse.Char('-')).Or(Parse.Char('—'));
|
public static readonly Parser<char> Hyphen = Parse.Char('-');
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parser for time values, including ranges and special values (NOON, TBD).
|
/// Parser for time values, including ranges and special values (NOON, TBD).
|
||||||
@@ -80,12 +73,13 @@ public static class EventOccurrenceGrammar
|
|||||||
from dash in Hyphen.Then(_ => Parse.WhiteSpace.Many()).Optional()
|
from dash in Hyphen.Then(_ => Parse.WhiteSpace.Many()).Optional()
|
||||||
from end in TimeValue.Or(Noon).Optional()
|
from end in TimeValue.Or(Noon).Optional()
|
||||||
select end.IsDefined
|
select end.IsDefined
|
||||||
? $"{start} – {end.Get()}"
|
? $"{start} - {end.Get()}"
|
||||||
: start
|
: start
|
||||||
);
|
);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parser for section headers: EventName [–-—] (MS|HS).
|
/// Parser for section headers: EventName - (MS|HS).
|
||||||
|
/// Note: Input is assumed to be normalized (hyphens normalized) via SanitizeInput.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static readonly Parser<(string EventName, string SchoolLevel)> SectionHeader =
|
public static readonly Parser<(string EventName, string SchoolLevel)> SectionHeader =
|
||||||
from eventName in Parse.AnyChar.Except(Hyphen).Many().Text().Token()
|
from eventName in Parse.AnyChar.Except(Hyphen).Many().Text().Token()
|
||||||
@@ -149,5 +143,51 @@ public static class EventOccurrenceGrammar
|
|||||||
{
|
{
|
||||||
return line.TrimStart().StartsWith("#", StringComparison.Ordinal);
|
return line.TrimStart().StartsWith("#", StringComparison.Ordinal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Attempts to parse an occurrence line from the given text.
|
||||||
|
/// Returns null if parsing fails.
|
||||||
|
/// Strategy: Find the first month name in the line, then parse from there.
|
||||||
|
/// </summary>
|
||||||
|
public static (string Name, string Month, int Day, string TimeAndLocation)? TryParseOccurrenceLine(string line)
|
||||||
|
{
|
||||||
|
// Find the first occurrence of any month name (using normalized MonthNames array)
|
||||||
|
int monthIndex = -1;
|
||||||
|
string foundMonth = string.Empty;
|
||||||
|
|
||||||
|
foreach (var month in MonthNames)
|
||||||
|
{
|
||||||
|
var index = line.IndexOf(month, StringComparison.OrdinalIgnoreCase);
|
||||||
|
if (index >= 0 && (monthIndex < 0 || index < monthIndex))
|
||||||
|
{
|
||||||
|
monthIndex = index;
|
||||||
|
foundMonth = month;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (monthIndex < 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// Extract name (everything before the month)
|
||||||
|
var name = line.Substring(0, monthIndex).Trim();
|
||||||
|
|
||||||
|
// Parse from the month onwards
|
||||||
|
var restOfLine = line.Substring(monthIndex);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var monthParser = Parse.String(foundMonth).Text().Token();
|
||||||
|
var result = from month in monthParser
|
||||||
|
from day in DayOfMonth.Token()
|
||||||
|
from timeAndLocation in Parse.AnyChar.Many().Text()
|
||||||
|
select (name, month, day, timeAndLocation.Trim());
|
||||||
|
|
||||||
|
var parsed = result.Parse(restOfLine);
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,25 +27,17 @@ public class EventOccurrenceParser
|
|||||||
_locationConfig = locationConfig;
|
_locationConfig = locationConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Regex _re =
|
|
||||||
new (
|
|
||||||
@"" + //
|
|
||||||
@"(?<Name>^[^#].*)\s" +
|
|
||||||
@"(?<Month>January|February|March|April|May|June|July|August|September|October|November|December)\s" +
|
|
||||||
@"(?<DayOfMonth>\d{1,2});?\s" +
|
|
||||||
@"(?<TimeAndLocation>.*)"
|
|
||||||
);
|
|
||||||
|
|
||||||
private readonly Regex _timeRe = new(@"(?<Hour>\d{1,2}):?(?<Minute>\d{2})?\s?(?<APM>(?:a|p)\.?m\.?)");
|
private readonly Regex _timeRe = new(@"(?<Hour>\d{1,2}):?(?<Minute>\d{2})?\s?(?<APM>(?:a|p)\.?m\.?)");
|
||||||
|
|
||||||
// Regex to match time ranges like "10:30 a.m. - 12:00 p.m." or "10:30 a.m. - NOON"
|
// Regex to match time ranges like "10:30 a.m. - 12:00 p.m." or "10:30 a.m. - NOON"
|
||||||
// Matches: time1 (optional dash time2/NOON), then location
|
// Matches: time1 (optional dash time2/NOON), then location
|
||||||
// The time group captures the full time range (including " - NOON" if present)
|
// The time group captures the full time range (including " - NOON" if present)
|
||||||
|
// Note: Input is normalized via SanitizeInput, so only regular hyphens need to be handled
|
||||||
// Pattern breakdown:
|
// Pattern breakdown:
|
||||||
// - First time: (?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?)) - matches NOON or time with AM/PM (more flexible whitespace)
|
// - First time: (?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?)) - matches NOON or time with AM/PM (more flexible whitespace)
|
||||||
// - Optional range: (?:\s*[–-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?))) - matches dash followed by NOON or time
|
// - Optional range: (?:\s*-\s*(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?))) - matches dash followed by NOON or time
|
||||||
// - Location: (?:\s+(?<Location>.+))? - optional whitespace followed by location (capture group with explicit name)
|
// - Location: (?:\s+(?<Location>.+))? - optional whitespace followed by location (capture group with explicit name)
|
||||||
private readonly Regex _timeLocationRegex = new(@"(?<Time>(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?))(?:\s*[–-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?)))?)(?:\s+(?<Location>.+))?");
|
private readonly Regex _timeLocationRegex = new(@"(?<Time>(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?))(?:\s*-\s*(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?)))?)(?:\s+(?<Location>.+))?");
|
||||||
|
|
||||||
public EventOccurrenceParserResult Parse()
|
public EventOccurrenceParserResult Parse()
|
||||||
{
|
{
|
||||||
@@ -57,21 +49,25 @@ public class EventOccurrenceParser
|
|||||||
var lines = File.ReadLines(_txtFile.FullName);
|
var lines = File.ReadLines(_txtFile.FullName);
|
||||||
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
foreach (var (line, index) in lines.Select((line, index) => (line, index + 1)))
|
||||||
{
|
{
|
||||||
var trimmedLine = line.Trim();
|
// Normalize input: trim and normalize hyphens (en-dash, em-dash -> regular hyphen)
|
||||||
|
// This allows the grammar parser to assume normalized input
|
||||||
|
var normalizedLine = SanitizeInput(line.Trim());
|
||||||
|
|
||||||
// Skip empty lines
|
// Skip empty lines
|
||||||
if (string.IsNullOrWhiteSpace(trimmedLine))
|
if (string.IsNullOrWhiteSpace(normalizedLine))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Skip comment lines (starting with "#") - use grammar parser
|
// Skip comment lines (starting with "#") - use grammar parser
|
||||||
if (EventOccurrenceGrammar.IsCommentLine(trimmedLine))
|
if (EventOccurrenceGrammar.IsCommentLine(normalizedLine))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
var match = _re.Match(trimmedLine);
|
// Try to parse occurrence line using grammar parser
|
||||||
if (!match.Success)
|
var occurrenceLine = EventOccurrenceGrammar.TryParseOccurrenceLine(normalizedLine);
|
||||||
|
if (!occurrenceLine.HasValue)
|
||||||
{
|
{
|
||||||
|
// Not an occurrence line, try other line types
|
||||||
// Try to parse section header using grammar parser
|
// Try to parse section header using grammar parser
|
||||||
var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(trimmedLine);
|
var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(normalizedLine);
|
||||||
if (sectionHeader.HasValue)
|
if (sectionHeader.HasValue)
|
||||||
{
|
{
|
||||||
var (eventNamePart, schoolLevel) = sectionHeader.Value;
|
var (eventNamePart, schoolLevel) = sectionHeader.Value;
|
||||||
@@ -88,9 +84,9 @@ public class EventOccurrenceParser
|
|||||||
issues.Add(new ParsingIssue
|
issues.Add(new ParsingIssue
|
||||||
{
|
{
|
||||||
LineNumber = index,
|
LineNumber = index,
|
||||||
LineContent = trimmedLine,
|
LineContent = normalizedLine,
|
||||||
IssueType = ParsingIssueType.UnmatchedLine,
|
IssueType = ParsingIssueType.UnmatchedLine,
|
||||||
Message = $"Section header '{eventNamePart} – {schoolLevel}' found but no matching event definition (best match ratio: {Fuzz.Ratio(eventNamePart, _events.FirstOrDefault()?.Name ?? "")})"
|
Message = $"Section header '{eventNamePart} - {schoolLevel}' found but no matching event definition (best match ratio: {Fuzz.Ratio(eventNamePart, _events.FirstOrDefault()?.Name ?? "")})"
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -99,18 +95,18 @@ public class EventOccurrenceParser
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check for General Schedule/Session using grammar parser
|
// Check for General Schedule/Session using grammar parser
|
||||||
if (EventOccurrenceGrammar.IsGeneralSchedule(trimmedLine))
|
if (EventOccurrenceGrammar.IsGeneralSchedule(normalizedLine))
|
||||||
{
|
{
|
||||||
currentEventDefinition = EventDefinition.GeneralSchedule;
|
currentEventDefinition = EventDefinition.GeneralSchedule;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also check for simple "MS" or "HS" in line (backward compatibility)
|
// Also check for simple "MS" or "HS" in line (backward compatibility)
|
||||||
if (trimmedLine.Contains("MS") || trimmedLine.Contains("HS"))
|
if (normalizedLine.Contains("MS") || normalizedLine.Contains("HS"))
|
||||||
{
|
{
|
||||||
var evt =
|
var evt =
|
||||||
(from e in _events
|
(from e in _events
|
||||||
let rat = Fuzz.Ratio(e.Name, trimmedLine)
|
let rat = Fuzz.Ratio(e.Name, normalizedLine)
|
||||||
where rat > 50
|
where rat > 50
|
||||||
orderby rat descending
|
orderby rat descending
|
||||||
select e).FirstOrDefault();
|
select e).FirstOrDefault();
|
||||||
@@ -119,9 +115,9 @@ public class EventOccurrenceParser
|
|||||||
issues.Add(new ParsingIssue
|
issues.Add(new ParsingIssue
|
||||||
{
|
{
|
||||||
LineNumber = index,
|
LineNumber = index,
|
||||||
LineContent = trimmedLine,
|
LineContent = normalizedLine,
|
||||||
IssueType = ParsingIssueType.UnmatchedLine,
|
IssueType = ParsingIssueType.UnmatchedLine,
|
||||||
Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})"
|
Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {Fuzz.Ratio(normalizedLine, _events.FirstOrDefault()?.Name ?? "")})"
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -134,19 +130,19 @@ public class EventOccurrenceParser
|
|||||||
// - Start with lowercase or special characters (not event names)
|
// - Start with lowercase or special characters (not event names)
|
||||||
// - Are parenthetical notes like "(Semifinalists only)"
|
// - Are parenthetical notes like "(Semifinalists only)"
|
||||||
// - Are informational text like "Schedule Posted on..."
|
// - Are informational text like "Schedule Posted on..."
|
||||||
if (IsContinuationLine(trimmedLine))
|
if (IsContinuationLine(normalizedLine))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern
|
// "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern
|
||||||
// Track as unmatched line if it's not empty
|
// Track as unmatched line if it's not empty
|
||||||
if (!string.IsNullOrWhiteSpace(trimmedLine))
|
if (!string.IsNullOrWhiteSpace(normalizedLine))
|
||||||
{
|
{
|
||||||
issues.Add(new ParsingIssue
|
issues.Add(new ParsingIssue
|
||||||
{
|
{
|
||||||
LineNumber = index,
|
LineNumber = index,
|
||||||
LineContent = trimmedLine,
|
LineContent = normalizedLine,
|
||||||
IssueType = ParsingIssueType.UnmatchedLine,
|
IssueType = ParsingIssueType.UnmatchedLine,
|
||||||
Message = "Line does not match expected format (Name Month Day Time/Location)"
|
Message = "Line does not match expected format (Name Month Day Time/Location)"
|
||||||
});
|
});
|
||||||
@@ -154,11 +150,9 @@ public class EventOccurrenceParser
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
var occurrenceName = match.Groups["Name"].Captures[0].Value;
|
var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value;
|
||||||
var month = match.Groups["Month"].Captures[0].Value;
|
|
||||||
var dayOfMonth = match.Groups["DayOfMonth"].Captures[0].Value;
|
|
||||||
var timeAndLocation = match.Groups["TimeAndLocation"].Captures[0].Value;
|
|
||||||
|
|
||||||
|
// Remove weekday suffix from occurrence name if present
|
||||||
occurrenceName = Regex.Replace(occurrenceName,
|
occurrenceName = Regex.Replace(occurrenceName,
|
||||||
@"(?<Weekday>Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s?$", "").Trim();
|
@"(?<Weekday>Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s?$", "").Trim();
|
||||||
|
|
||||||
@@ -171,30 +165,30 @@ public class EventOccurrenceParser
|
|||||||
issues.Add(new ParsingIssue
|
issues.Add(new ParsingIssue
|
||||||
{
|
{
|
||||||
LineNumber = index,
|
LineNumber = index,
|
||||||
LineContent = trimmedLine,
|
LineContent = normalizedLine,
|
||||||
IssueType = ParsingIssueType.MissingEventDefinition,
|
IssueType = ParsingIssueType.MissingEventDefinition,
|
||||||
Message = $"Cannot determine event definition for occurrence: {occurrenceName}"
|
Message = $"Cannot determine event definition for occurrence: {occurrenceName}"
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
timeAndLocation = SanitizeInput(timeAndLocation);
|
// timeAndLocation is already normalized (hyphens normalized) since normalizedLine was sanitized
|
||||||
|
|
||||||
// Parse time and location using configurable patterns
|
// Parse time and location using configurable patterns
|
||||||
var (time, location, locationParseSuccess) = ParseTimeAndLocation(timeAndLocation, index, trimmedLine, issues);
|
var (time, location, locationParseSuccess) = ParseTimeAndLocation(timeAndLocation, index, normalizedLine, issues);
|
||||||
|
|
||||||
// Parse date
|
// Parse date
|
||||||
DateOnly? startDate = null;
|
DateOnly? startDate = null;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
startDate = ParseDate(month, dayOfMonth, DateTime.Now.Year);
|
startDate = ParseDate(month, dayOfMonthStr.ToString(), DateTime.Now.Year);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
issues.Add(new ParsingIssue
|
issues.Add(new ParsingIssue
|
||||||
{
|
{
|
||||||
LineNumber = index,
|
LineNumber = index,
|
||||||
LineContent = trimmedLine,
|
LineContent = normalizedLine,
|
||||||
IssueType = ParsingIssueType.DateParseFailure,
|
IssueType = ParsingIssueType.DateParseFailure,
|
||||||
Message = $"Failed to parse date: {ex.Message}"
|
Message = $"Failed to parse date: {ex.Message}"
|
||||||
});
|
});
|
||||||
@@ -212,7 +206,7 @@ public class EventOccurrenceParser
|
|||||||
issues.Add(new ParsingIssue
|
issues.Add(new ParsingIssue
|
||||||
{
|
{
|
||||||
LineNumber = index,
|
LineNumber = index,
|
||||||
LineContent = trimmedLine,
|
LineContent = normalizedLine,
|
||||||
IssueType = ParsingIssueType.TimeParseFailure,
|
IssueType = ParsingIssueType.TimeParseFailure,
|
||||||
Message = $"Failed to parse time '{time}': {ex.Message}"
|
Message = $"Failed to parse time '{time}': {ex.Message}"
|
||||||
});
|
});
|
||||||
@@ -229,7 +223,7 @@ public class EventOccurrenceParser
|
|||||||
Name = occurrenceName,
|
Name = occurrenceName,
|
||||||
StartTime = t,
|
StartTime = t,
|
||||||
Time = $"{time}",
|
Time = $"{time}",
|
||||||
Date = $"{month} {dayOfMonth}",
|
Date = $"{month} {dayOfMonthStr}",
|
||||||
Location = location
|
Location = location
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -311,24 +305,17 @@ public class EventOccurrenceParser
|
|||||||
|
|
||||||
private DateOnly ParseDate(string month, string dayOfMonth, int year)
|
private DateOnly ParseDate(string month, string dayOfMonth, int year)
|
||||||
{
|
{
|
||||||
int monthNum = month.ToLower() switch
|
// Use normalized MonthNames array from grammar
|
||||||
{
|
var monthLower = month.ToLower();
|
||||||
"january" => 1,
|
var monthIndex = Array.FindIndex(EventOccurrenceGrammar.MonthNames,
|
||||||
"february" => 2,
|
m => m.ToLower() == monthLower);
|
||||||
"march" => 3,
|
|
||||||
"april" => 4,
|
if (monthIndex < 0)
|
||||||
"may" => 5,
|
throw new ArgumentException($"Invalid month: {month}", nameof(month));
|
||||||
"june" => 6,
|
|
||||||
"july" => 7,
|
// Month index is 0-based, month number is 1-based
|
||||||
"august" => 8,
|
int monthNum = monthIndex + 1;
|
||||||
"september" => 9,
|
var day = int.Parse(dayOfMonth);
|
||||||
"october" => 10,
|
|
||||||
"november" => 11,
|
|
||||||
"december" => 12,
|
|
||||||
_ => throw new ArgumentException($"Invalid month: {month}", nameof(month))
|
|
||||||
};
|
|
||||||
|
|
||||||
var day = int.Parse(dayOfMonth);
|
|
||||||
return new DateOnly(year, monthNum, day);
|
return new DateOnly(year, monthNum, day);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -428,11 +415,13 @@ public class EventOccurrenceParser
|
|||||||
return string.Empty;
|
return string.Empty;
|
||||||
|
|
||||||
// Remove leading dashes and whitespace
|
// Remove leading dashes and whitespace
|
||||||
locationText = locationText.TrimStart('–', '-', ' ', '\t');
|
// Note: Input is normalized, so only regular hyphens need to be handled
|
||||||
|
locationText = locationText.TrimStart('-', ' ', '\t');
|
||||||
|
|
||||||
// Try to match and remove time patterns at the start
|
// Try to match and remove time patterns at the start
|
||||||
// Pattern 1: Dash, whitespace, time (e.g., "– 12:15 p.m. " or "– NOON ")
|
// Pattern 1: Dash, whitespace, time (e.g., "- 12:15 p.m. " or "- NOON ")
|
||||||
var dashTimePattern = new Regex(@"^[–-]\s+(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s+", RegexOptions.IgnoreCase);
|
// Note: Input is normalized, so only regular hyphens need to be handled
|
||||||
|
var dashTimePattern = new Regex(@"^-\s+(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s+", RegexOptions.IgnoreCase);
|
||||||
locationText = dashTimePattern.Replace(locationText, "").Trim();
|
locationText = dashTimePattern.Replace(locationText, "").Trim();
|
||||||
|
|
||||||
// Pattern 2: Time without dash at start (e.g., "12:15 p.m. " or "NOON ")
|
// Pattern 2: Time without dash at start (e.g., "12:15 p.m. " or "NOON ")
|
||||||
@@ -440,7 +429,8 @@ public class EventOccurrenceParser
|
|||||||
locationText = timePatternAtStart.Replace(locationText, "").Trim();
|
locationText = timePatternAtStart.Replace(locationText, "").Trim();
|
||||||
|
|
||||||
// Pattern 3: Any remaining dash-time combinations (more flexible)
|
// Pattern 3: Any remaining dash-time combinations (more flexible)
|
||||||
var remainingDashTime = new Regex(@"^[–-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s*", RegexOptions.IgnoreCase);
|
// Note: Input is normalized, so only regular hyphens need to be handled
|
||||||
|
var remainingDashTime = new Regex(@"^-\s*(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s*", RegexOptions.IgnoreCase);
|
||||||
locationText = remainingDashTime.Replace(locationText, "").Trim();
|
locationText = remainingDashTime.Replace(locationText, "").Trim();
|
||||||
|
|
||||||
// Pattern 4: Remove any standalone time at the start (handles cases where dash was already removed)
|
// Pattern 4: Remove any standalone time at the start (handles cases where dash was already removed)
|
||||||
@@ -521,7 +511,8 @@ public class EventOccurrenceParser
|
|||||||
hour = 12;
|
hour = 12;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
var timeMatch = _timeRe.Match(time.ToLower());
|
// Regex is case-insensitive, so ToLower() is not needed
|
||||||
|
var timeMatch = _timeRe.Match(time);
|
||||||
if (timeMatch.Success)
|
if (timeMatch.Success)
|
||||||
{
|
{
|
||||||
hour = int.Parse(timeMatch.Groups["Hour"].Captures[0].Value);
|
hour = int.Parse(timeMatch.Groups["Hour"].Captures[0].Value);
|
||||||
|
|||||||
Reference in New Issue
Block a user