f916cfad6b
This commit restructures the EventOccurrenceParser by breaking down its functionality into modular components, including EventDefinitionResolver, LineClassifier, LocationPatternMatcher, SectionHeaderMatcher, TimeLocationParser, and TimeParser. This refactoring enhances code readability and maintainability, allowing for easier updates and testing. Additionally, the TextUtil class has been updated to include input sanitization methods. Comprehensive unit tests have been added to ensure the correctness of the new parsing logic and to validate the handling of various event occurrence scenarios.
97 lines
3.2 KiB
C#
97 lines
3.2 KiB
C#
using System.Text.RegularExpressions;
|
|
|
|
namespace Core.Parsers.EventOccurrence;
|
|
|
|
/// <summary>
|
|
/// Parses time strings into TimeOnly objects.
|
|
/// Handles various time formats including NOON, TBD, time ranges, and AM/PM formats.
|
|
/// </summary>
|
|
public static class TimeParser
|
|
{
|
|
private static readonly Regex TimeRegex = new(
|
|
TimePatterns.TimeWithGroups,
|
|
RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
/// <summary>
|
|
/// Extracts the start time from a time range string.
|
|
/// Example: "10:00 a.m. - 12:00 p.m." -> "10:00 a.m."
|
|
/// </summary>
|
|
public static string ExtractStartTime(string timeRange)
|
|
{
|
|
if (timeRange.Contains(" - ", StringComparison.Ordinal))
|
|
{
|
|
return timeRange[..timeRange.IndexOf(" - ", StringComparison.Ordinal)];
|
|
}
|
|
return timeRange;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Parses a time string into a TimeOnly object.
|
|
/// Handles:
|
|
/// - NOON -> 12:00 PM
|
|
/// - TBD -> 00:00:00 (midnight as placeholder)
|
|
/// - Time ranges -> extracts start time (e.g., "10:00 a.m. - 12:00 p.m." -> parses "10:00 a.m.")
|
|
/// - Standard AM/PM formats (e.g., "3:00 p.m.", "10:30 am")
|
|
/// </summary>
|
|
/// <param name="time">The time string to parse.</param>
|
|
/// <returns>A TimeOnly object representing the parsed time.</returns>
|
|
/// <exception cref="FormatException">Thrown when the time format is not recognized.</exception>
|
|
public static TimeOnly Parse(string time)
|
|
{
|
|
int hour = 0;
|
|
int minute = 0;
|
|
|
|
// Handle TBD (To Be Determined) times gracefully
|
|
if (string.Equals(time.Trim(), "TBD", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// Use a placeholder time (midnight) for TBD - the occurrence will still be created
|
|
// but with a time that indicates it's TBD
|
|
return new TimeOnly(0, 0, 0);
|
|
}
|
|
|
|
// Extract start time from range if present
|
|
var timeToParse = ExtractStartTime(time);
|
|
|
|
// Handle NOON
|
|
if (timeToParse == "NOON")
|
|
{
|
|
hour = 12;
|
|
}
|
|
else
|
|
{
|
|
// Parse time with regex
|
|
var timeMatch = TimeRegex.Match(timeToParse);
|
|
if (timeMatch.Success)
|
|
{
|
|
hour = int.Parse(timeMatch.Groups["Hour"].Captures[0].Value);
|
|
if (timeMatch.Groups["Minute"].Success)
|
|
{
|
|
minute = int.Parse(timeMatch.Groups["Minute"].Captures[0].Value);
|
|
}
|
|
|
|
// Convert AM/PM times to 24-hour format
|
|
var apmValue = timeMatch.Groups["APM"].Captures[0].Value.ToLower();
|
|
if (apmValue is "p.m." or "pm")
|
|
{
|
|
// PM: add 12 unless it's 12 PM (which stays 12)
|
|
if (hour < 12)
|
|
hour += 12;
|
|
}
|
|
else if (apmValue is "a.m." or "am")
|
|
{
|
|
// AM: if it's 12 AM, convert to midnight (0)
|
|
if (hour == 12)
|
|
hour = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw new FormatException($"Time format not recognized: {time}");
|
|
}
|
|
}
|
|
|
|
return new TimeOnly(hour, minute, 0);
|
|
}
|
|
}
|
|
|