Files
chapter-organizer/Core/Parsers/EventOccurrenceGrammar.cs
T
poprhythm 7ddc55f672 Refactor event occurrence parsing to use dynamic month parsers and improve input normalization
This commit replaces individual month parsers with a dynamic array of month names, enhancing maintainability. The EventOccurrenceParser has been updated to utilize this new structure, ensuring consistent parsing of month names. Additionally, input normalization has been improved by standardizing hyphen handling and ensuring that all relevant parsing methods utilize the sanitized input. This change streamlines the parsing process and enhances overall robustness.
2026-01-08 08:46:11 -05:00

194 lines
6.6 KiB
C#

using Sprache;
using Core.Entities;
using Core.Models;
namespace Core.Parsers;
/// <summary>
/// Grammar definitions for parsing event occurrence DSL using parser combinators.
/// Provides composable parsers for each grammar rule.
/// </summary>
public static class EventOccurrenceGrammar
{
/// <summary>
/// Array of all month names in order (January through December).
/// This is the single source of truth for month names used throughout the parser.
/// </summary>
public static readonly string[] MonthNames = new[]
{
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
};
// Build month parsers dynamically from MonthNames array
private static readonly Parser<string>[] MonthParsers = MonthNames
.Select(month => Parse.String(month).Text().Token())
.ToArray();
/// <summary>
/// Parser for month names (January through December).
/// Built dynamically from MonthNames array.
/// </summary>
public static readonly Parser<string> Month = MonthParsers
.Aggregate((current, next) => current.Or(next));
/// <summary>
/// Parser for day of month (1-31, optional semicolon).
/// </summary>
public static readonly Parser<int> DayOfMonth =
from day in Parse.Number
from semicolon in Parse.Char(';').Optional()
select int.Parse(day);
// Time parsing components
private static readonly Parser<string> Noon = Parse.String("NOON").Text().Token();
private static readonly Parser<string> Tbd = Parse.String("TBD").Text().Token();
private static readonly Parser<string> AmPm =
Parse.String("a.m.").Or(Parse.String("am")).Or(Parse.String("A.M.")).Or(Parse.String("AM"))
.Or(Parse.String("p.m.")).Or(Parse.String("pm")).Or(Parse.String("P.M.")).Or(Parse.String("PM"))
.Text().Token();
private static readonly Parser<string> TimeValue =
from hour in Parse.Number
from colon in Parse.Char(':').Optional()
from minute in Parse.Number.Optional()
from ws in Parse.WhiteSpace.Many()
from ampm in AmPm
select $"{hour}:{(minute.IsDefined ? minute.Get() : "00")} {ampm}";
/// <summary>
/// Parser for hyphen character.
/// Note: Input is assumed to be normalized (en-dash and em-dash converted to regular hyphen) via SanitizeInput.
/// </summary>
public static readonly Parser<char> Hyphen = Parse.Char('-');
/// <summary>
/// Parser for time values, including ranges and special values (NOON, TBD).
/// </summary>
public static readonly Parser<string> Time =
Noon.Or(Tbd)
.Or(
from start in TimeValue.Or(Noon)
from dash in Hyphen.Then(_ => Parse.WhiteSpace.Many()).Optional()
from end in TimeValue.Or(Noon).Optional()
select end.IsDefined
? $"{start} - {end.Get()}"
: start
);
/// <summary>
/// Parser for section headers: EventName - (MS|HS).
/// Note: Input is assumed to be normalized (hyphens normalized) via SanitizeInput.
/// </summary>
public static readonly Parser<(string EventName, string SchoolLevel)> SectionHeader =
from eventName in Parse.AnyChar.Except(Hyphen).Many().Text().Token()
from hyphen in Hyphen.Token()
from schoolLevel in Parse.String("MS").Or(Parse.String("HS")).Text().Token()
select (eventName.Trim(), schoolLevel);
/// <summary>
/// Parser for General Schedule/Session headers.
/// </summary>
public static readonly Parser<string> GeneralSchedule =
Parse.String("General Schedule").Or(Parse.String("General Session")).Text().Token();
/// <summary>
/// Parser for comment lines (starting with #).
/// </summary>
public static readonly Parser<string> CommentLine =
from hash in Parse.Char('#')
from rest in Parse.AnyChar.Many().Text()
select rest;
/// <summary>
/// Attempts to parse a section header from the given line.
/// Returns null if not a section header.
/// </summary>
public static (string EventName, string SchoolLevel)? TryParseSectionHeader(string line)
{
try
{
var result = SectionHeader.Parse(line);
return result;
}
catch
{
return null;
}
}
/// <summary>
/// Attempts to parse a General Schedule/Session header from the given line.
/// Returns null if not a General Schedule header.
/// </summary>
public static bool IsGeneralSchedule(string line)
{
try
{
GeneralSchedule.Parse(line);
return true;
}
catch
{
return false;
}
}
/// <summary>
/// Attempts to parse a comment line from the given line.
/// Returns true if the line is a comment.
/// </summary>
public static bool IsCommentLine(string line)
{
return line.TrimStart().StartsWith("#", StringComparison.Ordinal);
}
/// <summary>
/// Attempts to parse an occurrence line from the given text.
/// Returns null if parsing fails.
/// Strategy: Find the first month name in the line, then parse from there.
/// </summary>
public static (string Name, string Month, int Day, string TimeAndLocation)? TryParseOccurrenceLine(string line)
{
// Find the first occurrence of any month name (using normalized MonthNames array)
int monthIndex = -1;
string foundMonth = string.Empty;
foreach (var month in MonthNames)
{
var index = line.IndexOf(month, StringComparison.OrdinalIgnoreCase);
if (index >= 0 && (monthIndex < 0 || index < monthIndex))
{
monthIndex = index;
foundMonth = month;
}
}
if (monthIndex < 0)
return null;
// Extract name (everything before the month)
var name = line.Substring(0, monthIndex).Trim();
// Parse from the month onwards
var restOfLine = line.Substring(monthIndex);
try
{
var monthParser = Parse.String(foundMonth).Text().Token();
var result = from month in monthParser
from day in DayOfMonth.Token()
from timeAndLocation in Parse.AnyChar.Many().Text()
select (name, month, day, timeAndLocation.Trim());
var parsed = result.Parse(restOfLine);
return parsed;
}
catch
{
return null;
}
}
}