diff --git a/Core/Core.csproj b/Core/Core.csproj index 12f20cd..6e4c8b5 100644 --- a/Core/Core.csproj +++ b/Core/Core.csproj @@ -12,6 +12,7 @@ + \ No newline at end of file diff --git a/Core/Models/EventOccurrenceParseResult.cs b/Core/Models/EventOccurrenceParseResult.cs index 4ebbbd2..914b8e8 100644 --- a/Core/Models/EventOccurrenceParseResult.cs +++ b/Core/Models/EventOccurrenceParseResult.cs @@ -51,6 +51,11 @@ public class ParsingIssue /// public int LineNumber { get; set; } + /// + /// The column number where the issue occurred (1-based, 0 if not available). + /// + public int ColumnNumber { get; set; } + /// /// The actual line content where the issue occurred. /// @@ -65,6 +70,16 @@ public class ParsingIssue /// Human-readable description of the issue. /// public string Message { get; set; } = string.Empty; + + /// + /// What was expected at the error location (e.g., "month name", "MS or HS", "time value"). + /// + public string Expected { get; set; } = string.Empty; + + /// + /// What was actually found at the error location. + /// + public string Found { get; set; } = string.Empty; } /// diff --git a/Core/Parsers/EventOccurrenceGrammar.cs b/Core/Parsers/EventOccurrenceGrammar.cs new file mode 100644 index 0000000..96cc823 --- /dev/null +++ b/Core/Parsers/EventOccurrenceGrammar.cs @@ -0,0 +1,153 @@ +using Sprache; +using Core.Entities; +using Core.Models; + +namespace Core.Parsers; + +/// +/// Grammar definitions for parsing event occurrence DSL using parser combinators. +/// Provides composable parsers for each grammar rule. +/// +public static class EventOccurrenceGrammar +{ + // Months - all 12 months supported + private static readonly Parser January = Parse.String("January").Text().Token(); + private static readonly Parser February = Parse.String("February").Text().Token(); + private static readonly Parser March = Parse.String("March").Text().Token(); + private static readonly Parser April = Parse.String("April").Text().Token(); + private static readonly Parser May = Parse.String("May").Text().Token(); + private static readonly Parser June = Parse.String("June").Text().Token(); + private static readonly Parser July = Parse.String("July").Text().Token(); + private static readonly Parser August = Parse.String("August").Text().Token(); + private static readonly Parser September = Parse.String("September").Text().Token(); + private static readonly Parser October = Parse.String("October").Text().Token(); + private static readonly Parser November = Parse.String("November").Text().Token(); + private static readonly Parser December = Parse.String("December").Text().Token(); + + /// + /// Parser for month names (January through December). + /// + public static readonly Parser Month = January + .Or(February) + .Or(March) + .Or(April) + .Or(May) + .Or(June) + .Or(July) + .Or(August) + .Or(September) + .Or(October) + .Or(November) + .Or(December); + + /// + /// Parser for day of month (1-31, optional semicolon). + /// + public static readonly Parser DayOfMonth = + from day in Parse.Number + from semicolon in Parse.Char(';').Optional() + select int.Parse(day); + + // Time parsing components + private static readonly Parser Noon = Parse.String("NOON").Text().Token(); + private static readonly Parser Tbd = Parse.String("TBD").Text().Token(); + + private static readonly Parser AmPm = + Parse.String("a.m.").Or(Parse.String("am")).Or(Parse.String("A.M.")).Or(Parse.String("AM")) + .Or(Parse.String("p.m.")).Or(Parse.String("pm")).Or(Parse.String("P.M.")).Or(Parse.String("PM")) + .Text().Token(); + + private static readonly Parser TimeValue = + from hour in Parse.Number + from colon in Parse.Char(':').Optional() + from minute in Parse.Number.Optional() + from ws in Parse.WhiteSpace.Many() + from ampm in AmPm + select $"{hour}:{(minute.IsDefined ? minute.Get() : "00")} {ampm}"; + + /// + /// Parser for hyphen characters (en-dash, hyphen, em-dash). + /// + public static readonly Parser Hyphen = Parse.Char('–').Or(Parse.Char('-')).Or(Parse.Char('—')); + + /// + /// Parser for time values, including ranges and special values (NOON, TBD). + /// + public static readonly Parser Time = + Noon.Or(Tbd) + .Or( + from start in TimeValue.Or(Noon) + from dash in Hyphen.Then(_ => Parse.WhiteSpace.Many()).Optional() + from end in TimeValue.Or(Noon).Optional() + select end.IsDefined + ? $"{start} – {end.Get()}" + : start + ); + + /// + /// Parser for section headers: EventName [–-—] (MS|HS). + /// + public static readonly Parser<(string EventName, string SchoolLevel)> SectionHeader = + from eventName in Parse.AnyChar.Except(Hyphen).Many().Text().Token() + from hyphen in Hyphen.Token() + from schoolLevel in Parse.String("MS").Or(Parse.String("HS")).Text().Token() + select (eventName.Trim(), schoolLevel); + + /// + /// Parser for General Schedule/Session headers. + /// + public static readonly Parser GeneralSchedule = + Parse.String("General Schedule").Or(Parse.String("General Session")).Text().Token(); + + /// + /// Parser for comment lines (starting with #). + /// + public static readonly Parser CommentLine = + from hash in Parse.Char('#') + from rest in Parse.AnyChar.Many().Text() + select rest; + + /// + /// Attempts to parse a section header from the given line. + /// Returns null if not a section header. + /// + public static (string EventName, string SchoolLevel)? TryParseSectionHeader(string line) + { + try + { + var result = SectionHeader.Parse(line); + return result; + } + catch + { + return null; + } + } + + /// + /// Attempts to parse a General Schedule/Session header from the given line. + /// Returns null if not a General Schedule header. + /// + public static bool IsGeneralSchedule(string line) + { + try + { + GeneralSchedule.Parse(line); + return true; + } + catch + { + return false; + } + } + + /// + /// Attempts to parse a comment line from the given line. + /// Returns true if the line is a comment. + /// + public static bool IsCommentLine(string line) + { + return line.TrimStart().StartsWith("#", StringComparison.Ordinal); + } +} + diff --git a/Core/Parsers/EventOccurrenceParser.cs b/Core/Parsers/EventOccurrenceParser.cs index f1b33c9..b77ea83 100644 --- a/Core/Parsers/EventOccurrenceParser.cs +++ b/Core/Parsers/EventOccurrenceParser.cs @@ -31,7 +31,7 @@ public class EventOccurrenceParser new ( @"" + // @"(?^[^#].*)\s" + - @"(?February|March|April|May|June|July)\s" + + @"(?January|February|March|April|May|June|July|August|September|October|November|December)\s" + @"(?\d{1,2});?\s" + @"(?.*)" ); @@ -63,14 +63,50 @@ public class EventOccurrenceParser if (string.IsNullOrWhiteSpace(trimmedLine)) continue; - // Skip comment lines (starting with "#") - if (trimmedLine.StartsWith("#", StringComparison.Ordinal)) + // Skip comment lines (starting with "#") - use grammar parser + if (EventOccurrenceGrammar.IsCommentLine(trimmedLine)) continue; var match = _re.Match(trimmedLine); if (!match.Success) { - if (trimmedLine.Contains("MS")) + // Try to parse section header using grammar parser + var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(trimmedLine); + if (sectionHeader.HasValue) + { + var (eventNamePart, schoolLevel) = sectionHeader.Value; + + // Use fuzzy matching to find the best matching event definition + var evt = + (from e in _events + let rat = Fuzz.Ratio(e.Name, eventNamePart) + where rat > 50 + orderby rat descending + select e).FirstOrDefault(); + if (evt == null) + { + issues.Add(new ParsingIssue + { + LineNumber = index, + LineContent = trimmedLine, + IssueType = ParsingIssueType.UnmatchedLine, + Message = $"Section header '{eventNamePart} – {schoolLevel}' found but no matching event definition (best match ratio: {Fuzz.Ratio(eventNamePart, _events.FirstOrDefault()?.Name ?? "")})" + }); + continue; + } + currentEventDefinition = evt; + continue; + } + + // Check for General Schedule/Session using grammar parser + if (EventOccurrenceGrammar.IsGeneralSchedule(trimmedLine)) + { + currentEventDefinition = EventDefinition.GeneralSchedule; + continue; + } + + // Also check for simple "MS" or "HS" in line (backward compatibility) + if (trimmedLine.Contains("MS") || trimmedLine.Contains("HS")) { var evt = (from e in _events @@ -85,18 +121,13 @@ public class EventOccurrenceParser LineNumber = index, LineContent = trimmedLine, IssueType = ParsingIssueType.UnmatchedLine, - Message = $"Section header with 'MS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})" + Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {Fuzz.Ratio(trimmedLine, _events.FirstOrDefault()?.Name ?? "")})" }); continue; } currentEventDefinition = evt; continue; } - if (trimmedLine == "General Schedule" || trimmedLine == "General Session") - { - currentEventDefinition = EventDefinition.GeneralSchedule; - continue; - } // Skip continuation lines (lines that look like they're continuing from previous line) // These are typically lines that: @@ -280,31 +311,25 @@ public class EventOccurrenceParser private DateOnly ParseDate(string month, string dayOfMonth, int year) { - int monthNum = 1; - switch (month) + int monthNum = month.ToLower() switch { - case "February": - monthNum = 2; - break; - case "March": - monthNum = 3; - break; - case "April": - monthNum = 4; - break; - case "May": - monthNum = 5; - break; - case "June": - monthNum = 6; - break; - case "July": - monthNum = 7; - break; - } + "january" => 1, + "february" => 2, + "march" => 3, + "april" => 4, + "may" => 5, + "june" => 6, + "july" => 7, + "august" => 8, + "september" => 9, + "october" => 10, + "november" => 11, + "december" => 12, + _ => throw new ArgumentException($"Invalid month: {month}", nameof(month)) + }; var day = int.Parse(dayOfMonth); - return new DateOnly(year, monthNum, day); ; + return new DateOnly(year, monthNum, day); } /// diff --git a/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs b/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs index b8c7bc3..51344d6 100644 --- a/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs +++ b/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs @@ -469,6 +469,153 @@ public class EventOccurrenceParserIssues_Tests EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); } } + + [Test] + public void Parse_AllMonths_AreSupported() + { + // Arrange + var months = new[] { "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" }; + var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") }; + var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *"); + + foreach (var month in months) + { + var testContent = $"Test Event – MS\n" + + $"Submit Entry {month} 15 3:00 p.m. Room A"; + var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); + var parser = new EventOccurrenceParser(tempFile, events, locationConfig); + + try + { + // Act + var result = parser.Parse(); + + // Assert + if (result.Issues.Count > 0) + { + var issueMessages = string.Join("; ", result.Issues.Select(i => $"{i.IssueType}: {i.Message}")); + Assert.Fail($"Month {month} had {result.Issues.Count} issue(s): {issueMessages}"); + } + Assert.That(result.Issues, Has.Count.EqualTo(0), + $"Month {month} should parse without issues"); + Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1), + $"Month {month} should produce one occurrence"); + + var occurrence = result.Occurrences.Values.First().First(); + Assert.That(occurrence.Date, Does.Contain(month), + $"Occurrence date should contain {month}"); + } + finally + { + EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); + } + } + } + + [Test] + public void Parse_SectionHeader_WithEnDash_IsRecognized() + { + // Arrange + var testContent = "Biotechnology – MS\n" + + "Submit Entry March 15 8:00 a.m. Room 1"; + var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); + var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Biotechnology") }; + var parser = new EventOccurrenceParser(tempFile, events); + + try + { + // Act + var result = parser.Parse(); + + // Assert + Assert.That(result.Issues, Has.Count.EqualTo(0)); + Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1)); + Assert.That(result.Occurrences.ContainsKey(events[0])); + } + finally + { + EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); + } + } + + [Test] + public void Parse_SectionHeader_WithHyphen_IsRecognized() + { + // Arrange + var testContent = "Architectural Design - HS\n" + + "Submit Entry March 15 8:00 a.m. Room 1"; + var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); + var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Architectural Design") }; + var parser = new EventOccurrenceParser(tempFile, events); + + try + { + // Act + var result = parser.Parse(); + + // Assert + Assert.That(result.Issues, Has.Count.EqualTo(0)); + Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1)); + Assert.That(result.Occurrences.ContainsKey(events[0])); + } + finally + { + EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); + } + } + + [Test] + public void Parse_SectionHeader_WithEmDash_IsRecognized() + { + // Arrange + var testContent = "Coding — MS\n" + + "Submit Entry March 15 8:00 a.m. Room 1"; + var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); + var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Coding") }; + var parser = new EventOccurrenceParser(tempFile, events); + + try + { + // Act + var result = parser.Parse(); + + // Assert + Assert.That(result.Issues, Has.Count.EqualTo(0)); + Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1)); + Assert.That(result.Occurrences.ContainsKey(events[0])); + } + finally + { + EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); + } + } + + [Test] + public void Parse_SectionHeader_WithWhitespace_IsRecognized() + { + // Arrange + var testContent = "Event Name – MS\n" + + "Submit Entry March 15 8:00 a.m. Room 1"; + var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); + var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Event Name") }; + var parser = new EventOccurrenceParser(tempFile, events); + + try + { + // Act + var result = parser.Parse(); + + // Assert + Assert.That(result.Issues, Has.Count.EqualTo(0)); + Assert.That(result.Occurrences.Values.Sum(list => list.Count), Is.EqualTo(1)); + Assert.That(result.Occurrences.ContainsKey(events[0])); + } + finally + { + EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); + } + } }