Add Sprache package and enhance event occurrence parsing with new grammar rules
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="9.0.8" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration" Version="9.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" Version="9.0.0" />
|
||||
<PackageReference Include="Sprache" Version="2.3.1" />
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="9.0.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -51,6 +51,11 @@ public class ParsingIssue
|
||||
/// </summary>
|
||||
public int LineNumber { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The column number where the issue occurred (1-based, 0 if not available).
|
||||
/// </summary>
|
||||
public int ColumnNumber { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The actual line content where the issue occurred.
|
||||
/// </summary>
|
||||
@@ -65,6 +70,16 @@ public class ParsingIssue
|
||||
/// Human-readable description of the issue.
|
||||
/// </summary>
|
||||
public string Message { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// What was expected at the error location (e.g., "month name", "MS or HS", "time value").
|
||||
/// </summary>
|
||||
public string Expected { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// What was actually found at the error location.
|
||||
/// </summary>
|
||||
public string Found { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
using Sprache;
|
||||
using Core.Entities;
|
||||
using Core.Models;
|
||||
|
||||
namespace Core.Parsers;
|
||||
|
||||
/// <summary>
|
||||
/// Grammar definitions for parsing event occurrence DSL using parser combinators.
|
||||
/// Provides composable parsers for each grammar rule.
|
||||
/// </summary>
|
||||
public static class EventOccurrenceGrammar
|
||||
{
|
||||
// Months - all 12 months supported
|
||||
private static readonly Parser<string> January = Parse.String("January").Text().Token();
|
||||
private static readonly Parser<string> February = Parse.String("February").Text().Token();
|
||||
private static readonly Parser<string> March = Parse.String("March").Text().Token();
|
||||
private static readonly Parser<string> April = Parse.String("April").Text().Token();
|
||||
private static readonly Parser<string> May = Parse.String("May").Text().Token();
|
||||
private static readonly Parser<string> June = Parse.String("June").Text().Token();
|
||||
private static readonly Parser<string> July = Parse.String("July").Text().Token();
|
||||
private static readonly Parser<string> August = Parse.String("August").Text().Token();
|
||||
private static readonly Parser<string> September = Parse.String("September").Text().Token();
|
||||
private static readonly Parser<string> October = Parse.String("October").Text().Token();
|
||||
private static readonly Parser<string> November = Parse.String("November").Text().Token();
|
||||
private static readonly Parser<string> December = Parse.String("December").Text().Token();
|
||||
|
||||
/// <summary>
|
||||
/// Parser for month names (January through December).
|
||||
/// </summary>
|
||||
public static readonly Parser<string> Month = January
|
||||
.Or(February)
|
||||
.Or(March)
|
||||
.Or(April)
|
||||
.Or(May)
|
||||
.Or(June)
|
||||
.Or(July)
|
||||
.Or(August)
|
||||
.Or(September)
|
||||
.Or(October)
|
||||
.Or(November)
|
||||
.Or(December);
|
||||
|
||||
/// <summary>
|
||||
/// Parser for day of month (1-31, optional semicolon).
|
||||
/// </summary>
|
||||
public static readonly Parser<int> DayOfMonth =
|
||||
from day in Parse.Number
|
||||
from semicolon in Parse.Char(';').Optional()
|
||||
select int.Parse(day);
|
||||
|
||||
// Time parsing components
|
||||
private static readonly Parser<string> Noon = Parse.String("NOON").Text().Token();
|
||||
private static readonly Parser<string> Tbd = Parse.String("TBD").Text().Token();
|
||||
|
||||
private static readonly Parser<string> AmPm =
|
||||
Parse.String("a.m.").Or(Parse.String("am")).Or(Parse.String("A.M.")).Or(Parse.String("AM"))
|
||||
.Or(Parse.String("p.m.")).Or(Parse.String("pm")).Or(Parse.String("P.M.")).Or(Parse.String("PM"))
|
||||
.Text().Token();
|
||||
|
||||
private static readonly Parser<string> TimeValue =
|
||||
from hour in Parse.Number
|
||||
from colon in Parse.Char(':').Optional()
|
||||
from minute in Parse.Number.Optional()
|
||||
from ws in Parse.WhiteSpace.Many()
|
||||
from ampm in AmPm
|
||||
select $"{hour}:{(minute.IsDefined ? minute.Get() : "00")} {ampm}";
|
||||
|
||||
/// <summary>
|
||||
/// Parser for hyphen characters (en-dash, hyphen, em-dash).
|
||||
/// </summary>
|
||||
public static readonly Parser<char> Hyphen = Parse.Char('–').Or(Parse.Char('-')).Or(Parse.Char('—'));
|
||||
|
||||
/// <summary>
|
||||
/// Parser for time values, including ranges and special values (NOON, TBD).
|
||||
/// </summary>
|
||||
public static readonly Parser<string> Time =
|
||||
Noon.Or(Tbd)
|
||||
.Or(
|
||||
from start in TimeValue.Or(Noon)
|
||||
from dash in Hyphen.Then(_ => Parse.WhiteSpace.Many()).Optional()
|
||||
from end in TimeValue.Or(Noon).Optional()
|
||||
select end.IsDefined
|
||||
? $"{start} – {end.Get()}"
|
||||
: start
|
||||
);
|
||||
|
||||
/// <summary>
|
||||
/// Parser for section headers: EventName [–-—] (MS|HS).
|
||||
/// </summary>
|
||||
public static readonly Parser<(string EventName, string SchoolLevel)> SectionHeader =
|
||||
from eventName in Parse.AnyChar.Except(Hyphen).Many().Text().Token()
|
||||
from hyphen in Hyphen.Token()
|
||||
from schoolLevel in Parse.String("MS").Or(Parse.String("HS")).Text().Token()
|
||||
select (eventName.Trim(), schoolLevel);
|
||||
|
||||
/// <summary>
|
||||
/// Parser for General Schedule/Session headers.
|
||||
/// </summary>
|
||||
public static readonly Parser<string> GeneralSchedule =
|
||||
Parse.String("General Schedule").Or(Parse.String("General Session")).Text().Token();
|
||||
|
||||
/// <summary>
|
||||
/// Parser for comment lines (starting with #).
|
||||
/// </summary>
|
||||
public static readonly Parser<string> CommentLine =
|
||||
from hash in Parse.Char('#')
|
||||
from rest in Parse.AnyChar.Many().Text()
|
||||
select rest;
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to parse a section header from the given line.
|
||||
/// Returns null if not a section header.
|
||||
/// </summary>
|
||||
public static (string EventName, string SchoolLevel)? TryParseSectionHeader(string line)
|
||||
{
|
||||
try
|
||||
{
|
||||
var result = SectionHeader.Parse(line);
|
||||
return result;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to parse a General Schedule/Session header from the given line.
|
||||
/// Returns null if not a General Schedule header.
|
||||
/// </summary>
|
||||
public static bool IsGeneralSchedule(string line)
|
||||
{
|
||||
try
|
||||
{
|
||||
GeneralSchedule.Parse(line);
|
||||
return true;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to parse a comment line from the given line.
|
||||
/// Returns true if the line is a comment.
|
||||
/// </summary>
|
||||
public static bool IsCommentLine(string line)
|
||||
{
|
||||
return line.TrimStart().StartsWith("#", StringComparison.Ordinal);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ public class EventOccurrenceParser
|
||||
new (
|
||||
@"" + //
|
||||
@"(?<Name>^[^#].*)\s" +
|
||||
@"(?<Month>February|March|April|May|June|July)\s" +
|
||||
@"(?<Month>January|February|March|April|May|June|July|August|September|October|November|December)\s" +
|
||||
@"(?<DayOfMonth>\d{1,2});?\s" +
|
||||
@"(?<TimeAndLocation>.*)"
|
||||
);
|
||||
@@ -63,14 +63,50 @@ public class EventOccurrenceParser
|
||||
if (string.IsNullOrWhiteSpace(trimmedLine))
|
||||
continue;
|
||||
|
||||
// Skip comment lines (starting with "#")
|
||||
if (trimmedLine.StartsWith("#", StringComparison.Ordinal))
|
||||
// Skip comment lines (starting with "#") - use grammar parser
|
||||
if (EventOccurrenceGrammar.IsCommentLine(trimmedLine))
|
||||
continue;
|
||||
|
||||
var match = _re.Match(trimmedLine);
|
||||
if (!match.Success)
|
||||
{
|
||||
if (trimmedLine.Contains("MS"))
|
||||
// Try to parse section header using grammar parser
|
||||
var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(trimmedLine);
|
||||
if (sectionHeader.HasValue)
|
||||
{
|
||||
var (eventNamePart, schoolLevel) = sectionHeader.Value;
|
||||
|
||||
// Use fuzzy matching to find the best matching event definition
|
||||
var evt =
|
||||
(from e in _events
|
||||
let rat = Fuzz.Ratio(e.Name, eventNamePart)
|
||||
where rat > 50
|
||||
orderby rat descending
|
||||
select e).FirstOrDefault();
|
||||
if (evt == null)
|
||||
{
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.UnmatchedLine,
|
||||
Message = $"Section header '{eventNamePart} – {schoolLevel}' found but no matching event definition (best match ratio: {Fuzz.Ratio(eventNamePart, _events.FirstOrDefault()?.Name ?? "")})"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
currentEventDefinition = evt;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for General Schedule/Session using grammar parser
|
||||
if (EventOccurrenceGrammar.IsGeneralSchedule(trimmedLine))
|
||||
{
|
||||
currentEventDefinition = EventDefinition.GeneralSchedule;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Also check for simple "MS" or "HS" in line (backward compatibility)
|
||||
if (trimmedLine.Contains("MS") || trimmedLine.Contains("HS"))
|
||||
{
|
||||
var evt =
|
||||
(from e in _events
|
||||
@@ -85,18 +121,13 @@ public class EventOccurrenceParser
|
||||
LineNumber = index,
|
||||
LineContent = trimmedLine,
|
||||
IssueType = ParsingIssueType.UnmatchedLine,
|
||||
Message = $"Section header with 'MS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})"
|
||||
Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})"
|
||||
});
|
||||
continue;
|
||||
}
|
||||
currentEventDefinition = evt;
|
||||
continue;
|
||||
}
|
||||
if (trimmedLine == "General Schedule" || trimmedLine == "General Session")
|
||||
{
|
||||
currentEventDefinition = EventDefinition.GeneralSchedule;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip continuation lines (lines that look like they're continuing from previous line)
|
||||
// These are typically lines that:
|
||||
@@ -280,31 +311,25 @@ public class EventOccurrenceParser
|
||||
|
||||
private DateOnly ParseDate(string month, string dayOfMonth, int year)
|
||||
{
|
||||
int monthNum = 1;
|
||||
switch (month)
|
||||
int monthNum = month.ToLower() switch
|
||||
{
|
||||
case "February":
|
||||
monthNum = 2;
|
||||
break;
|
||||
case "March":
|
||||
monthNum = 3;
|
||||
break;
|
||||
case "April":
|
||||
monthNum = 4;
|
||||
break;
|
||||
case "May":
|
||||
monthNum = 5;
|
||||
break;
|
||||
case "June":
|
||||
monthNum = 6;
|
||||
break;
|
||||
case "July":
|
||||
monthNum = 7;
|
||||
break;
|
||||
}
|
||||
"january" => 1,
|
||||
"february" => 2,
|
||||
"march" => 3,
|
||||
"april" => 4,
|
||||
"may" => 5,
|
||||
"june" => 6,
|
||||
"july" => 7,
|
||||
"august" => 8,
|
||||
"september" => 9,
|
||||
"october" => 10,
|
||||
"november" => 11,
|
||||
"december" => 12,
|
||||
_ => throw new ArgumentException($"Invalid month: {month}", nameof(month))
|
||||
};
|
||||
|
||||
var day = int.Parse(dayOfMonth);
|
||||
return new DateOnly(year, monthNum, day); ;
|
||||
return new DateOnly(year, monthNum, day);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user