Add Sprache package and enhance event occurrence parsing with new grammar rules

This commit is contained in:
2026-01-08 08:23:31 -05:00
parent 5fdd5fadba
commit f32ce649cd
5 changed files with 373 additions and 32 deletions
+1
View File
@@ -12,6 +12,7 @@
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="9.0.8" />
<PackageReference Include="Microsoft.Extensions.Configuration" Version="9.0.0" />
<PackageReference Include="Microsoft.Extensions.Configuration.Abstractions" Version="9.0.0" />
<PackageReference Include="Sprache" Version="2.3.1" />
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="9.0.0" />
</ItemGroup>
</Project>
+15
View File
@@ -51,6 +51,11 @@ public class ParsingIssue
/// </summary>
public int LineNumber { get; set; }
/// <summary>
/// The column number where the issue occurred (1-based, 0 if not available).
/// </summary>
public int ColumnNumber { get; set; }
/// <summary>
/// The actual line content where the issue occurred.
/// </summary>
@@ -65,6 +70,16 @@ public class ParsingIssue
/// Human-readable description of the issue.
/// </summary>
public string Message { get; set; } = string.Empty;
/// <summary>
/// What was expected at the error location (e.g., "month name", "MS or HS", "time value").
/// </summary>
public string Expected { get; set; } = string.Empty;
/// <summary>
/// What was actually found at the error location.
/// </summary>
public string Found { get; set; } = string.Empty;
}
/// <summary>
+153
View File
@@ -0,0 +1,153 @@
using Sprache;
using Core.Entities;
using Core.Models;
namespace Core.Parsers;
/// <summary>
/// Grammar definitions for parsing event occurrence DSL using parser combinators.
/// Provides composable parsers for each grammar rule.
/// </summary>
public static class EventOccurrenceGrammar
{
// Months - all 12 months supported
private static readonly Parser<string> January = Parse.String("January").Text().Token();
private static readonly Parser<string> February = Parse.String("February").Text().Token();
private static readonly Parser<string> March = Parse.String("March").Text().Token();
private static readonly Parser<string> April = Parse.String("April").Text().Token();
private static readonly Parser<string> May = Parse.String("May").Text().Token();
private static readonly Parser<string> June = Parse.String("June").Text().Token();
private static readonly Parser<string> July = Parse.String("July").Text().Token();
private static readonly Parser<string> August = Parse.String("August").Text().Token();
private static readonly Parser<string> September = Parse.String("September").Text().Token();
private static readonly Parser<string> October = Parse.String("October").Text().Token();
private static readonly Parser<string> November = Parse.String("November").Text().Token();
private static readonly Parser<string> December = Parse.String("December").Text().Token();
/// <summary>
/// Parser for month names (January through December).
/// </summary>
public static readonly Parser<string> Month = January
.Or(February)
.Or(March)
.Or(April)
.Or(May)
.Or(June)
.Or(July)
.Or(August)
.Or(September)
.Or(October)
.Or(November)
.Or(December);
/// <summary>
/// Parser for day of month (1-31, optional semicolon).
/// </summary>
public static readonly Parser<int> DayOfMonth =
from day in Parse.Number
from semicolon in Parse.Char(';').Optional()
select int.Parse(day);
// Time parsing components
private static readonly Parser<string> Noon = Parse.String("NOON").Text().Token();
private static readonly Parser<string> Tbd = Parse.String("TBD").Text().Token();
private static readonly Parser<string> AmPm =
Parse.String("a.m.").Or(Parse.String("am")).Or(Parse.String("A.M.")).Or(Parse.String("AM"))
.Or(Parse.String("p.m.")).Or(Parse.String("pm")).Or(Parse.String("P.M.")).Or(Parse.String("PM"))
.Text().Token();
private static readonly Parser<string> TimeValue =
from hour in Parse.Number
from colon in Parse.Char(':').Optional()
from minute in Parse.Number.Optional()
from ws in Parse.WhiteSpace.Many()
from ampm in AmPm
select $"{hour}:{(minute.IsDefined ? minute.Get() : "00")} {ampm}";
/// <summary>
/// Parser for hyphen characters (en-dash, hyphen, em-dash).
/// </summary>
public static readonly Parser<char> Hyphen = Parse.Char('').Or(Parse.Char('-')).Or(Parse.Char('—'));
/// <summary>
/// Parser for time values, including ranges and special values (NOON, TBD).
/// </summary>
public static readonly Parser<string> Time =
Noon.Or(Tbd)
.Or(
from start in TimeValue.Or(Noon)
from dash in Hyphen.Then(_ => Parse.WhiteSpace.Many()).Optional()
from end in TimeValue.Or(Noon).Optional()
select end.IsDefined
? $"{start} {end.Get()}"
: start
);
/// <summary>
/// Parser for section headers: EventName [-—] (MS|HS).
/// </summary>
public static readonly Parser<(string EventName, string SchoolLevel)> SectionHeader =
from eventName in Parse.AnyChar.Except(Hyphen).Many().Text().Token()
from hyphen in Hyphen.Token()
from schoolLevel in Parse.String("MS").Or(Parse.String("HS")).Text().Token()
select (eventName.Trim(), schoolLevel);
/// <summary>
/// Parser for General Schedule/Session headers.
/// </summary>
public static readonly Parser<string> GeneralSchedule =
Parse.String("General Schedule").Or(Parse.String("General Session")).Text().Token();
/// <summary>
/// Parser for comment lines (starting with #).
/// </summary>
public static readonly Parser<string> CommentLine =
from hash in Parse.Char('#')
from rest in Parse.AnyChar.Many().Text()
select rest;
/// <summary>
/// Attempts to parse a section header from the given line.
/// Returns null if not a section header.
/// </summary>
public static (string EventName, string SchoolLevel)? TryParseSectionHeader(string line)
{
try
{
var result = SectionHeader.Parse(line);
return result;
}
catch
{
return null;
}
}
/// <summary>
/// Attempts to parse a General Schedule/Session header from the given line.
/// Returns null if not a General Schedule header.
/// </summary>
public static bool IsGeneralSchedule(string line)
{
try
{
GeneralSchedule.Parse(line);
return true;
}
catch
{
return false;
}
}
/// <summary>
/// Attempts to parse a comment line from the given line.
/// Returns true if the line is a comment.
/// </summary>
public static bool IsCommentLine(string line)
{
return line.TrimStart().StartsWith("#", StringComparison.Ordinal);
}
}
+57 -32
View File
@@ -31,7 +31,7 @@ public class EventOccurrenceParser
new (
@"" + //
@"(?<Name>^[^#].*)\s" +
@"(?<Month>February|March|April|May|June|July)\s" +
@"(?<Month>January|February|March|April|May|June|July|August|September|October|November|December)\s" +
@"(?<DayOfMonth>\d{1,2});?\s" +
@"(?<TimeAndLocation>.*)"
);
@@ -63,14 +63,50 @@ public class EventOccurrenceParser
if (string.IsNullOrWhiteSpace(trimmedLine))
continue;
// Skip comment lines (starting with "#")
if (trimmedLine.StartsWith("#", StringComparison.Ordinal))
// Skip comment lines (starting with "#") - use grammar parser
if (EventOccurrenceGrammar.IsCommentLine(trimmedLine))
continue;
var match = _re.Match(trimmedLine);
if (!match.Success)
{
if (trimmedLine.Contains("MS"))
// Try to parse section header using grammar parser
var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(trimmedLine);
if (sectionHeader.HasValue)
{
var (eventNamePart, schoolLevel) = sectionHeader.Value;
// Use fuzzy matching to find the best matching event definition
var evt =
(from e in _events
let rat = Fuzz.Ratio(e.Name, eventNamePart)
where rat > 50
orderby rat descending
select e).FirstOrDefault();
if (evt == null)
{
issues.Add(new ParsingIssue
{
LineNumber = index,
LineContent = trimmedLine,
IssueType = ParsingIssueType.UnmatchedLine,
Message = $"Section header '{eventNamePart} {schoolLevel}' found but no matching event definition (best match ratio: {Fuzz.Ratio(eventNamePart, _events.FirstOrDefault()?.Name ?? "")})"
});
continue;
}
currentEventDefinition = evt;
continue;
}
// Check for General Schedule/Session using grammar parser
if (EventOccurrenceGrammar.IsGeneralSchedule(trimmedLine))
{
currentEventDefinition = EventDefinition.GeneralSchedule;
continue;
}
// Also check for simple "MS" or "HS" in line (backward compatibility)
if (trimmedLine.Contains("MS") || trimmedLine.Contains("HS"))
{
var evt =
(from e in _events
@@ -85,18 +121,13 @@ public class EventOccurrenceParser
LineNumber = index,
LineContent = trimmedLine,
IssueType = ParsingIssueType.UnmatchedLine,
Message = $"Section header with 'MS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})"
Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})"
});
continue;
}
currentEventDefinition = evt;
continue;
}
if (trimmedLine == "General Schedule" || trimmedLine == "General Session")
{
currentEventDefinition = EventDefinition.GeneralSchedule;
continue;
}
// Skip continuation lines (lines that look like they're continuing from previous line)
// These are typically lines that:
@@ -280,31 +311,25 @@ public class EventOccurrenceParser
private DateOnly ParseDate(string month, string dayOfMonth, int year)
{
int monthNum = 1;
switch (month)
int monthNum = month.ToLower() switch
{
case "February":
monthNum = 2;
break;
case "March":
monthNum = 3;
break;
case "April":
monthNum = 4;
break;
case "May":
monthNum = 5;
break;
case "June":
monthNum = 6;
break;
case "July":
monthNum = 7;
break;
}
"january" => 1,
"february" => 2,
"march" => 3,
"april" => 4,
"may" => 5,
"june" => 6,
"july" => 7,
"august" => 8,
"september" => 9,
"october" => 10,
"november" => 11,
"december" => 12,
_ => throw new ArgumentException($"Invalid month: {month}", nameof(month))
};
var day = int.Parse(dayOfMonth);
return new DateOnly(year, monthNum, day); ;
return new DateOnly(year, monthNum, day);
}
/// <summary>
@@ -469,6 +469,153 @@ public class EventOccurrenceParserIssues_Tests
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
[Test]
public void Parse_AllMonths_AreSupported()
{
// Arrange
var months = new[] { "January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December" };
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") };
var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *");
foreach (var month in months)
{
var testContent = $"Test Event MS\n" +
$"Submit Entry {month} 15 3:00 p.m. Room A";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var parser = new EventOccurrenceParser(tempFile, events, locationConfig);
try
{
// Act
var result = parser.Parse();
// Assert
if (result.Issues.Count > 0)
{
var issueMessages = string.Join("; ", result.Issues.Select(i => $"{i.IssueType}: {i.Message}"));
Assert.Fail($"Month {month} had {result.Issues.Count} issue(s): {issueMessages}");
}
Assert.That(result.Issues, Has.Count.EqualTo(0),
$"Month {month} should parse without issues");
Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1),
$"Month {month} should produce one occurrence");
var occurrence = result.Occurrences.Values.First().First();
Assert.That(occurrence.Date, Does.Contain(month),
$"Occurrence date should contain {month}");
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
}
[Test]
public void Parse_SectionHeader_WithEnDash_IsRecognized()
{
// Arrange
var testContent = "Biotechnology MS\n" +
"Submit Entry March 15 8:00 a.m. Room 1";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Biotechnology") };
var parser = new EventOccurrenceParser(tempFile, events);
try
{
// Act
var result = parser.Parse();
// Assert
Assert.That(result.Issues, Has.Count.EqualTo(0));
Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1));
Assert.That(result.Occurrences.ContainsKey(events[0]));
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
[Test]
public void Parse_SectionHeader_WithHyphen_IsRecognized()
{
// Arrange
var testContent = "Architectural Design - HS\n" +
"Submit Entry March 15 8:00 a.m. Room 1";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Architectural Design") };
var parser = new EventOccurrenceParser(tempFile, events);
try
{
// Act
var result = parser.Parse();
// Assert
Assert.That(result.Issues, Has.Count.EqualTo(0));
Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1));
Assert.That(result.Occurrences.ContainsKey(events[0]));
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
[Test]
public void Parse_SectionHeader_WithEmDash_IsRecognized()
{
// Arrange
var testContent = "Coding — MS\n" +
"Submit Entry March 15 8:00 a.m. Room 1";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Coding") };
var parser = new EventOccurrenceParser(tempFile, events);
try
{
// Act
var result = parser.Parse();
// Assert
Assert.That(result.Issues, Has.Count.EqualTo(0));
Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1));
Assert.That(result.Occurrences.ContainsKey(events[0]));
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
[Test]
public void Parse_SectionHeader_WithWhitespace_IsRecognized()
{
// Arrange
var testContent = "Event Name MS\n" +
"Submit Entry March 15 8:00 a.m. Room 1";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Event Name") };
var parser = new EventOccurrenceParser(tempFile, events);
try
{
// Act
var result = parser.Parse();
// Assert
Assert.That(result.Issues, Has.Count.EqualTo(0));
Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1));
Assert.That(result.Occurrences.ContainsKey(events[0]));
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
}