diff --git a/Core/Models/EventOccurrenceParseResult.cs b/Core/Models/EventOccurrenceParseResult.cs index 914b8e8..31c8744 100644 --- a/Core/Models/EventOccurrenceParseResult.cs +++ b/Core/Models/EventOccurrenceParseResult.cs @@ -30,6 +30,12 @@ public class EventOccurrenceParseResult /// public List Issues { get; set; } = new(); + /// + /// List of high school (HS) section headers that were encountered but skipped + /// because they don't match any event definition in the system. + /// + public List SkippedHSSectionHeaders { get; set; } = new(); + /// /// Total number of event occurrences successfully parsed. /// @@ -110,11 +116,6 @@ public enum ParsingIssueType /// /// Invalid format or other parsing issue. /// - InvalidFormat, - - /// - /// Location parsing failed (no matching pattern found). - /// - LocationParseFailure + InvalidFormat } diff --git a/Core/Parsers/EventOccurrence/LineClassifier.cs b/Core/Parsers/EventOccurrence/LineClassifier.cs index ce2cf0e..2322c9a 100644 --- a/Core/Parsers/EventOccurrence/LineClassifier.cs +++ b/Core/Parsers/EventOccurrence/LineClassifier.cs @@ -24,33 +24,19 @@ public static class LineClassifier /// /// Determines if a line is a continuation/wrapped line that should be skipped. /// These are typically lines that: - /// - Start with lowercase or special characters (not event names) + /// - Start with "*" (marks the start of a continuation block) /// - Are parenthetical notes like "(Semifinalists only)" - /// - Are informational text like "Schedule Posted on..." /// public static bool IsContinuationLine(string line) { var trimmed = line.Trim(); - // Skip parenthetical notes - if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal)) + // Check if line starts with "*" (marks continuation block start) + if (trimmed.StartsWith("*", StringComparison.Ordinal)) return true; - // Skip lines that are clearly continuation text (start with lowercase, common continuation words) - if (trimmed.Length > 0 && char.IsLower(trimmed[0])) - { - // Check if it starts with common continuation words - var continuationPrefixes = new[] { "be ", "the ", "and ", "or ", "to ", "a ", "an ", "will ", "may ", "can " }; - foreach (var prefix in continuationPrefixes) - { - if (trimmed.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) - return true; - } - } - - // Skip informational lines that don't contain dates/times - if (trimmed.Contains("Schedule Posted", StringComparison.OrdinalIgnoreCase) || - trimmed.Contains("Note:", StringComparison.OrdinalIgnoreCase)) + // Skip parenthetical notes + if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal)) return true; return false; diff --git a/Core/Parsers/EventOccurrence/TimeLocationParser.cs b/Core/Parsers/EventOccurrence/TimeLocationParser.cs index 9af16d3..32a4bfa 100644 --- a/Core/Parsers/EventOccurrence/TimeLocationParser.cs +++ b/Core/Parsers/EventOccurrence/TimeLocationParser.cs @@ -1,11 +1,10 @@ using System.Text.RegularExpressions; -using Core.Models; namespace Core.Parsers.EventOccurrence; /// /// Parses time and location from combined strings. -/// Handles time ranges, location extraction, and pattern matching. +/// Extracts time using regex, then uses everything after the time as the location. /// public static class TimeLocationParser { @@ -28,21 +27,18 @@ public static class TimeLocationParser RegexOptions.Compiled | RegexOptions.IgnoreCase); /// - /// Parses time and location from the timeAndLocation string using configurable location patterns. + /// Parses time and location from the timeAndLocation string. + /// Extracts time using regex, then uses everything after the time as the location (after cleaning time fragments). /// /// The combined time and location string. - /// The location parsing configuration with patterns. /// Output parameter: the parsed time string. /// Output parameter: the parsed location string. - /// Output parameter: whether location parsing was successful. public static void Parse( string timeAndLocation, - LocationParsingConfiguration? locationConfig, out string time, - out string location, - out bool locationParseSuccess) + out string location) { - // Try to separate time from location using the time regex + // Extract time using regex var timeLocationMatch = TimeLocationRegex.Match(timeAndLocation); if (!timeLocationMatch.Success) @@ -50,7 +46,6 @@ public static class TimeLocationParser // If time regex doesn't match, use the whole string as time time = timeAndLocation.Trim(); location = string.Empty; - locationParseSuccess = false; return; } @@ -63,61 +58,12 @@ public static class TimeLocationParser if (string.IsNullOrWhiteSpace(locationPart)) { location = string.Empty; - locationParseSuccess = true; // Consider it a success since no location is needed return; } - // Clean up location part - remove any remaining time components + // Clean location of any remaining time fragments // (e.g., "– 12:15 p.m. Exhibit Hall C" -> "Exhibit Hall C") - locationPart = CleanLocationText(locationPart); - - if (string.IsNullOrWhiteSpace(locationPart)) - { - location = string.Empty; - locationParseSuccess = true; // No location after cleaning is also valid - return; - } - - // Try to match location using configurable patterns - (location, locationParseSuccess) = TryMatchLocation(locationPart, locationConfig); - - // If no pattern matched but we have a location, use it anyway - // This allows parsing to continue while still tracking that the location didn't match a pattern - if (!locationParseSuccess) - { - location = locationPart; - } - } - - /// - /// Attempts to match a location string against configured patterns. - /// - private static (string location, bool success) TryMatchLocation( - string locationPart, - LocationParsingConfiguration? locationConfig) - { - // No patterns configured - can't match - if (locationConfig == null || !locationConfig.LocationPatterns.Any()) - { - return (string.Empty, false); - } - - // Try initial match - var location = LocationPatternMatcher.Match(locationPart, locationConfig.LocationPatterns); - if (!string.IsNullOrEmpty(location)) - { - return (location, true); - } - - // Try matching against trimmed version (handles extra whitespace) - var cleanedForMatching = locationPart.Trim(); - location = LocationPatternMatcher.Match(cleanedForMatching, locationConfig.LocationPatterns); - if (!string.IsNullOrEmpty(location)) - { - return (cleanedForMatching, true); - } - - return (string.Empty, false); + location = CleanLocationText(locationPart); } /// diff --git a/Core/Parsers/EventOccurrenceParser.cs b/Core/Parsers/EventOccurrenceParser.cs index 85468d0..6cff470 100644 --- a/Core/Parsers/EventOccurrenceParser.cs +++ b/Core/Parsers/EventOccurrenceParser.cs @@ -13,19 +13,18 @@ public class EventOccurrenceParserResult { public IDictionary> Occurrences { get; set; } = new Dictionary>(); public List Issues { get; set; } = new(); + public List SkippedHSSectionHeaders { get; set; } = new(); } public class EventOccurrenceParser { private FileSystemInfo _txtFile; private ICollection _events; - private LocationParsingConfiguration? _locationConfig; - public EventOccurrenceParser(FileSystemInfo txtFile, ICollection events, LocationParsingConfiguration? locationConfig = null) + public EventOccurrenceParser(FileSystemInfo txtFile, ICollection events) { _events = events; _txtFile = txtFile; - _locationConfig = locationConfig; } public EventOccurrenceParserResult Parse() @@ -34,6 +33,8 @@ public class EventOccurrenceParser var occurrences = result.Occurrences; var issues = result.Issues; EventDefinition? currentEventDefinition = null; + bool inContinuationMode = false; + bool inHSSection = false; var lines = File.ReadLines(_txtFile.FullName); foreach (var (line, index) in lines.Select((line, index) => (line, index + 1))) @@ -44,11 +45,19 @@ public class EventOccurrenceParser // Skip empty lines if (EventOccurrenceParsers.LineClassifier.IsEmptyLine(normalizedLine)) + { + // Empty lines break continuation mode + inContinuationMode = false; continue; + } // Skip comment lines (starting with "#") - use grammar parser if (EventOccurrenceParsers.LineClassifier.IsCommentLine(normalizedLine)) + { + // Comment lines break continuation mode + inContinuationMode = false; continue; + } // Try to parse occurrence line using grammar parser var occurrenceLine = EventOccurrenceGrammar.TryParseOccurrenceLine(normalizedLine); @@ -61,10 +70,23 @@ public class EventOccurrenceParser { var (eventNamePart, schoolLevel) = sectionHeader.Value; + // Section headers break continuation mode + inContinuationMode = false; + // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events); if (evt == null) { + // Check if this is an HS event - if so, skip gracefully + if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase)) + { + result.SkippedHSSectionHeaders.Add(normalizedLine); + currentEventDefinition = null; // Skip subsequent occurrences + inHSSection = true; // Mark that we're in an HS section + continue; // No issue created + } + + // For non-HS unmatched headers, create issue as before var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events); issues.Add(new ParsingIssue { @@ -76,12 +98,16 @@ public class EventOccurrenceParser continue; } currentEventDefinition = evt; + inHSSection = false; // Reset HS section flag for MS events continue; } // Check for General Schedule/Session using grammar parser if (EventOccurrenceParsers.SectionHeaderMatcher.IsGeneralSchedule(normalizedLine)) { + // General schedule breaks continuation mode + inContinuationMode = false; + inHSSection = false; // Reset HS section flag currentEventDefinition = EventDefinition.GeneralSchedule; continue; } @@ -89,9 +115,22 @@ public class EventOccurrenceParser // Also check for simple "MS" or "HS" in line (backward compatibility) if (EventOccurrenceParsers.SectionHeaderMatcher.HasSchoolLevel(normalizedLine)) { + // Section headers break continuation mode + inContinuationMode = false; + var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events); if (evt == null) { + // Check if this is an HS event - if so, skip gracefully + if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) + { + result.SkippedHSSectionHeaders.Add(normalizedLine); + currentEventDefinition = null; // Skip subsequent occurrences + inHSSection = true; // Mark that we're in an HS section + continue; // No issue created + } + + // For non-HS unmatched headers, create issue as before var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events); issues.Add(new ParsingIssue { @@ -103,11 +142,18 @@ public class EventOccurrenceParser continue; } currentEventDefinition = evt; + inHSSection = false; // Reset HS section flag for MS events continue; } - // Skip continuation lines (lines that look like they're continuing from previous line) - if (EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine)) + // Check if line starts with "*" to enter continuation mode + if (normalizedLine.TrimStart().StartsWith("*", StringComparison.Ordinal)) + { + inContinuationMode = true; + } + + // Skip continuation lines (in continuation mode OR line starts with "*" or is parenthetical) + if (inContinuationMode || EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine)) { continue; } @@ -127,6 +173,15 @@ public class EventOccurrenceParser continue; } + // Occurrence lines break continuation mode + inContinuationMode = false; + + // Skip occurrences under HS sections (they won't match any event definition) + if (inHSSection) + { + continue; + } + var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value; // Remove weekday suffix from occurrence name if present @@ -151,23 +206,8 @@ public class EventOccurrenceParser // timeAndLocation is already normalized (hyphens normalized) since normalizedLine was sanitized - // Parse time and location using configurable patterns - EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, _locationConfig, out string time, out string location, out bool locationParseSuccess); - - // Track location parsing failure if patterns are configured but none matched - if (!locationParseSuccess && !string.IsNullOrWhiteSpace(location)) - { - if (_locationConfig != null && _locationConfig.LocationPatterns.Any()) - { - issues.Add(new ParsingIssue - { - LineNumber = index, - LineContent = normalizedLine, - IssueType = ParsingIssueType.LocationParseFailure, - Message = $"Location '{location}' does not match any configured pattern" - }); - } - } + // Parse time and location - extract time using regex, then use everything after time as location + EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, out string time, out string location); // Parse date DateOnly? startDate = null; @@ -222,6 +262,9 @@ public class EventOccurrenceParser if (!occurrences.ContainsKey(eventDefinition)) occurrences.Add(eventDefinition, []); occurrences[eventDefinition].Add(eventOccurrence); + + // Reset HS section flag when we successfully parse an occurrence (means we're in a valid section) + inHSSection = false; } return result; diff --git a/Core/Services/EventOccurrenceParserService.cs b/Core/Services/EventOccurrenceParserService.cs index 60f0f4c..97c6283 100644 --- a/Core/Services/EventOccurrenceParserService.cs +++ b/Core/Services/EventOccurrenceParserService.cs @@ -12,20 +12,9 @@ namespace Core.Services; /// public class EventOccurrenceParserService : IEventOccurrenceParserService { - private readonly LocationParsingConfiguration? _locationConfig; - public EventOccurrenceParserService(IConfiguration? configuration = null) { - // Load location parsing configuration from IConfiguration if provided - if (configuration != null) - { - _locationConfig = configuration.GetSection("LocationParsingSettings").Get() - ?? LocationParsingConfiguration.Default; - } - else - { - _locationConfig = LocationParsingConfiguration.Default; - } + // Configuration parameter kept for backward compatibility but not used } /// @@ -48,8 +37,8 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService File.WriteAllText(tempFile, text, Encoding.UTF8); var fileInfo = new FileInfo(tempFile); - // Use the existing EventOccurrenceParser with location configuration - var parser = new EventOccurrenceParser(fileInfo, events, _locationConfig); + // Use the existing EventOccurrenceParser + var parser = new EventOccurrenceParser(fileInfo, events); var parserResult = parser.Parse(); // Copy occurrences from parser result @@ -101,6 +90,9 @@ public class EventOccurrenceParserService : IEventOccurrenceParserService // Copy parsing issues from parser result result.Issues.AddRange(parserResult.Issues); + + // Copy skipped HS section headers from parser result + result.SkippedHSSectionHeaders.AddRange(parserResult.SkippedHSSectionHeaders); } finally { diff --git a/Tests/Parsers/EventOccurrence/LineClassifier_Tests.cs b/Tests/Parsers/EventOccurrence/LineClassifier_Tests.cs index 396f283..1259eb0 100644 --- a/Tests/Parsers/EventOccurrence/LineClassifier_Tests.cs +++ b/Tests/Parsers/EventOccurrence/LineClassifier_Tests.cs @@ -51,42 +51,23 @@ public class LineClassifier_Tests } [Test] - public void IsContinuationLine_StartsWithLowercaseContinuationWord_ReturnsTrue() + public void IsContinuationLine_StartsWithAsterisk_ReturnsTrue() { // Act & Assert - Assert.That(LineClassifier.IsContinuationLine("the event will be held"), Is.True); - Assert.That(LineClassifier.IsContinuationLine("and participants should arrive"), Is.True); - Assert.That(LineClassifier.IsContinuationLine("be sure to register"), Is.True); - Assert.That(LineClassifier.IsContinuationLine("or contact the coordinator"), Is.True); - } - - [Test] - public void IsContinuationLine_StartsWithLowercase_NonContinuationWord_ReturnsFalse() - { - // Act & Assert - Assert.That(LineClassifier.IsContinuationLine("important: bring materials"), Is.False); - } - - [Test] - public void IsContinuationLine_StartsWithUppercase_ReturnsFalse() - { - // Act & Assert - Assert.That(LineClassifier.IsContinuationLine("Important Event March 15"), Is.False); - } - - [Test] - public void IsContinuationLine_ContainsSchedulePosted_ReturnsTrue() - { - // Act & Assert - Assert.That(LineClassifier.IsContinuationLine("Schedule Posted on website"), Is.True); - } - - [Test] - public void IsContinuationLine_ContainsNote_ReturnsTrue() - { - // Act & Assert - Assert.That(LineClassifier.IsContinuationLine("Note: Additional information"), Is.True); + Assert.That(LineClassifier.IsContinuationLine("*The books of semifinalist teams"), Is.True); Assert.That(LineClassifier.IsContinuationLine("*Note: Important details"), Is.True); + Assert.That(LineClassifier.IsContinuationLine("*This is a continuation line"), Is.True); + Assert.That(LineClassifier.IsContinuationLine(" *Line with leading whitespace"), Is.True); + } + + [Test] + public void IsContinuationLine_DoesNotStartWithAsterisk_ReturnsFalse() + { + // Act & Assert + Assert.That(LineClassifier.IsContinuationLine("The event will be held"), Is.False); + Assert.That(LineClassifier.IsContinuationLine("Note: Additional information"), Is.False); + Assert.That(LineClassifier.IsContinuationLine("Important Event March 15"), Is.False); + Assert.That(LineClassifier.IsContinuationLine("Schedule Posted on website"), Is.False); } [Test] diff --git a/Tests/Parsers/EventOccurrence/TimeLocationParser_Tests.cs b/Tests/Parsers/EventOccurrence/TimeLocationParser_Tests.cs index 0acb3fc..abcc7b5 100644 --- a/Tests/Parsers/EventOccurrence/TimeLocationParser_Tests.cs +++ b/Tests/Parsers/EventOccurrence/TimeLocationParser_Tests.cs @@ -1,4 +1,3 @@ -using Core.Models; using Core.Parsers.EventOccurrence; using NUnit.Framework; @@ -10,110 +9,69 @@ public class TimeLocationParser_Tests [Test] public void Parse_TimeAndLocation_ExtractsBoth() { - // Arrange - var locationConfig = new LocationParsingConfiguration - { - LocationPatterns = new List { "Room *" } - }; - // Act - TimeLocationParser.Parse("10:30 a.m. Room 101", locationConfig, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("10:30 a.m. Room 101", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("10:30 a.m.")); Assert.That(location, Is.EqualTo("Room 101")); - Assert.That(locationParseSuccess, Is.True); } [Test] public void Parse_TimeRangeAndLocation_ExtractsTimeRangeAndLocation() { - // Arrange - var locationConfig = new LocationParsingConfiguration - { - LocationPatterns = new List { "Room *" } - }; - // Act - TimeLocationParser.Parse("10:00 a.m. - 12:00 p.m. Room 202", locationConfig, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("10:00 a.m. - 12:00 p.m. Room 202", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("10:00 a.m. - 12:00 p.m.")); Assert.That(location, Is.EqualTo("Room 202")); - Assert.That(locationParseSuccess, Is.True); } [Test] public void Parse_NOONAndLocation_ExtractsBoth() { - // Arrange - var locationConfig = new LocationParsingConfiguration - { - LocationPatterns = new List { "Hall *" } - }; - // Act - TimeLocationParser.Parse("NOON Hall C", locationConfig, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("NOON Hall C", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("NOON")); Assert.That(location, Is.EqualTo("Hall C")); - Assert.That(locationParseSuccess, Is.True); } [Test] public void Parse_TimeOnly_NoLocation() { - // Arrange - var locationConfig = new LocationParsingConfiguration - { - LocationPatterns = new List { "Room *" } - }; - // Act - TimeLocationParser.Parse("3:00 p.m.", locationConfig, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("3:00 p.m.", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("3:00 p.m.")); Assert.That(location, Is.Empty); - Assert.That(locationParseSuccess, Is.True); // No location is valid } [Test] - public void Parse_LocationNotMatchingPattern_StillReturnsLocation_ReportsFailure() + public void Parse_AnyLocation_ExtractsLocationWithoutValidation() { - // Arrange - var locationConfig = new LocationParsingConfiguration - { - LocationPatterns = new List { "Room *" } - }; - // Act - TimeLocationParser.Parse("10:00 a.m. Unknown Location", locationConfig, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("10:00 a.m. Unknown Location", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("10:00 a.m.")); Assert.That(location, Is.EqualTo("Unknown Location")); - Assert.That(locationParseSuccess, Is.False); } [Test] public void Parse_LocationWithTimeComponent_CleansTimeComponent() { - // Arrange - var locationConfig = new LocationParsingConfiguration - { - LocationPatterns = new List { "Exhibit Hall *" } - }; - // Act - TimeLocationParser.Parse("10:00 a.m. - 12:15 p.m. Exhibit Hall C", locationConfig, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("10:00 a.m. - 12:15 p.m. Exhibit Hall C", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("10:00 a.m. - 12:15 p.m.")); @@ -121,16 +79,15 @@ public class TimeLocationParser_Tests } [Test] - public void Parse_NoLocationConfig_StillExtractsTimeAndLocation() + public void Parse_AnyLocation_ExtractsAsIs() { // Act - TimeLocationParser.Parse("3:00 p.m. Room A", null, - out string time, out string location, out bool locationParseSuccess); + TimeLocationParser.Parse("3:00 p.m. Room A", + out string time, out string location); // Assert Assert.That(time, Is.EqualTo("3:00 p.m.")); Assert.That(location, Is.EqualTo("Room A")); - Assert.That(locationParseSuccess, Is.False); // No patterns to match against } [Test] diff --git a/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs b/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs index 51344d6..788162b 100644 --- a/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs +++ b/Tests/Parsers/EventOccurrenceParserIssues_Tests.cs @@ -156,18 +156,19 @@ public class EventOccurrenceParserIssues_Tests } [Test] - public void Parse_LocationParseFailure_ReportsIssue() + public void Parse_LocationExtraction_WorksWithoutPatterns() { // Arrange - // Locations that don't match "Room *" or "Hall *" patterns - // The timeLocationRegex needs to match to extract location, so we need valid time format - var testContent = "Test Event March 15 2:00 p.m. Auditorium A\n" + // Doesn't match Room * or Hall * - "Test Event March 15 3:00 p.m. Room 101\n" + // This should match "Room *" - "Test Event March 15 4:00 p.m. Conference Center"; // Doesn't match any pattern + // Test that locations are extracted correctly without pattern matching + // Locations should be extracted as everything after the time + var testContent = "Test Event - MS\n" + + "Submit Entry March 15 2:00 p.m. Auditorium A\n" + + "Judging March 15 3:00 p.m. Room 101\n" + + "Pick-up March 15 4:00 p.m. Conference Center\n" + + "Final March 15 5:00 p.m."; // No location var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") }; - var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *", "Hall *"); - var parser = new EventOccurrenceParser(tempFile, events, locationConfig); + var parser = new EventOccurrenceParser(tempFile, events); try { @@ -175,32 +176,16 @@ public class EventOccurrenceParserIssues_Tests var result = parser.Parse(); // Assert - // Should have location parse failures for unmatched locations - // Note: Location issues are only reported when: - // 1. Time/location regex matches (can extract location) - // 2. Location part is not empty - // 3. Patterns are configured - // 4. No pattern matches - var locationIssues = result.Issues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList(); + // Should parse successfully - location parsing no longer uses patterns, locations are extracted as-is + // Verify that locations are extracted correctly without pattern validation - // The parser should report location parse failures for "Auditorium A" and "Conference Center" - // But only if the timeLocationRegex successfully extracts them as locations - if (locationIssues.Any()) - { - foreach (var issue in locationIssues) - { - Assert.That(issue.Message, Does.Contain("does not match any configured pattern")); - } - - // Verify that "Room 101" was parsed successfully (no issue for it) - Assert.That(locationIssues, Has.None.Matches(i => i.LineContent.Contains("Room 101"))); - } - else - { - // If no location issues, it might be because the regex didn't extract locations properly - // This is still a valid test - we're verifying the parser behavior - Assert.Pass("Location parsing may not extract locations in all cases - this is acceptable behavior"); - } + // Verify that locations are extracted correctly + var occurrences = result.Occurrences.Values.SelectMany(list => list).ToList(); + Assert.That(occurrences, Has.Count.GreaterThan(0), "Should parse at least some occurrences"); + + // Verify locations are extracted + var locations = occurrences.Select(eo => eo.Location).Where(loc => !string.IsNullOrWhiteSpace(loc)).ToList(); + Assert.That(locations, Has.Count.GreaterThan(0), "Should extract at least some locations"); } finally { @@ -216,12 +201,11 @@ public class EventOccurrenceParserIssues_Tests "Unknown Event March 15 2:00 p.m. Room 101\n" + // MissingEventDefinition "Test Event February 30 2:00 p.m. Room 101\n" + // DateParseFailure (invalid date) "Test Event March 15 invalid time format Room 101\n" + // TimeParseFailure (no AM/PM) - "Test Event March 15 3:00 p.m. Unmatched Location\n" + // LocationParseFailure (if location extracted) + "Test Event March 15 3:00 p.m. Unmatched Location\n" + // Location extracted as-is (no validation) "Valid Event March 20 4:00 p.m. Room 202"; // Valid line var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Valid Event"), EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") }; - var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *"); - var parser = new EventOccurrenceParser(tempFile, events, locationConfig); + var parser = new EventOccurrenceParser(tempFile, events); try { @@ -349,9 +333,8 @@ public class EventOccurrenceParserIssues_Tests EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event"), EventOccurrenceParserTestHelpers.CreateTestEvent("Another Event") }; - // All locations match the patterns - var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *", "Hall *"); - var parser = new EventOccurrenceParser(tempFile, events, locationConfig); + // Locations are extracted without pattern matching + var parser = new EventOccurrenceParser(tempFile, events); try { @@ -378,26 +361,15 @@ public class EventOccurrenceParserIssues_Tests // Note: If the test event wasn't parsed, it might be due to location parsing or other edge cases // The important thing is that the parser doesn't crash and processes the input - // Verify no location parse failures for locations that match patterns - // Note: Location parsing only reports failures when: - // 1. Location is successfully extracted from time/location string - // 2. Patterns are configured - // 3. No pattern matches - // If location isn't extracted, no issue is created (which is also acceptable) - var locationIssues = result.Issues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList(); - - // Verify that locations that match patterns don't create issues - // "Room 101" should match "Room *", "Hall A" and "Hall B" should match "Hall *" - // Note: The parser might create location issues if the location extraction doesn't work perfectly, - // but we verify that at least the test event lines don't create false positives - var locationIssuesForTestEvents = locationIssues.Where(i => - i.LineContent.Contains("Test Event") && i.LineContent.Contains("Room 101") || - i.LineContent.Contains("Another Event") && i.LineContent.Contains("Hall B")).ToList(); - - // The important thing is that matching locations for our test events don't create false positives - // "Opening Session" might have different behavior since it's in GeneralSchedule section - Assert.That(locationIssuesForTestEvents, Has.Count.EqualTo(0), - "Should have no location parse failures for test event locations that match configured patterns"); + // Verify locations are extracted correctly (pattern matching is no longer used) + var testEventOccurrence = result.Occurrences.ContainsKey(testEvent) + ? result.Occurrences[testEvent].FirstOrDefault() + : null; + if (testEventOccurrence != null) + { + Assert.That(testEventOccurrence.Location, Is.EqualTo("Room 101"), + "Location should be extracted correctly without pattern matching"); + } } finally { @@ -419,8 +391,7 @@ public class EventOccurrenceParserIssues_Tests // For General Schedule section, we don't need a specific event definition // The parser will use EventDefinition.GeneralSchedule var events = Array.Empty(); - var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Mtg. Room *", "Room *"); - var parser = new EventOccurrenceParser(tempFile, events, locationConfig); + var parser = new EventOccurrenceParser(tempFile, events); try { @@ -477,14 +448,13 @@ public class EventOccurrenceParserIssues_Tests var months = new[] { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }; var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Test Event") }; - var locationConfig = EventOccurrenceParserTestHelpers.CreateLocationConfig("Room *"); foreach (var month in months) { var testContent = $"Test Event – MS\n" + $"Submit Entry {month} 15 3:00 p.m. Room A"; var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent); - var parser = new EventOccurrenceParser(tempFile, events, locationConfig); + var parser = new EventOccurrenceParser(tempFile, events); try { diff --git a/Tests/Parsers/EventOccurrenceParser_Tests.cs b/Tests/Parsers/EventOccurrenceParser_Tests.cs index b85d024..6efd56f 100644 --- a/Tests/Parsers/EventOccurrenceParser_Tests.cs +++ b/Tests/Parsers/EventOccurrenceParser_Tests.cs @@ -96,55 +96,6 @@ public class EventOccurrenceParser_Tests .ToList(); } - /// - /// Analyzes location parsing failures and extracts common patterns. - /// - private static Dictionary AnalyzeLocationFailures( - List locationIssues, List fileLines) - { - var locationPatterns = new Dictionary(); - - foreach (var issue in locationIssues) - { - // Try to extract the location part from the line - // The format is typically: "EventName Month Day Time Location" - var parts = issue.LineContent.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); - - // Look for location-like strings (usually after time) - // This is a heuristic - we'll look for parts that don't match date/time patterns - var timeRegex = new System.Text.RegularExpressions.Regex( - @"\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?|NOON"); - - bool foundTime = false; - var locationParts = new List(); - - foreach (var part in parts) - { - if (timeRegex.IsMatch(part) || part == "NOON") - { - foundTime = true; - continue; - } - - if (foundTime && !string.IsNullOrWhiteSpace(part)) - { - locationParts.Add(part); - } - } - - if (locationParts.Any()) - { - var location = string.Join(" ", locationParts).Trim(); - if (!string.IsNullOrWhiteSpace(location)) - { - locationPatterns.TryGetValue(location, out var count); - locationPatterns[location] = count + 1; - } - } - } - - return locationPatterns; - } /// /// Counts HS vs MS event sections in the file. @@ -300,8 +251,7 @@ public class EventOccurrenceParser_Tests // Arrange var events = TestEntityHandler.GetEvents(); var fileInfo = TestEntityHandler.GetEventOccurrenceNationalsFileInfo(); - var locationConfig = LocationParsingConfiguration.Default; - var parser = new EventOccurrenceParser(fileInfo, events, locationConfig); + var parser = new EventOccurrenceParser(fileInfo, events); // Act var result = parser.Parse(); @@ -369,19 +319,7 @@ public class EventOccurrenceParser_Tests } } - // Pattern Analysis - var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList(); - if (locationFailures.Any()) - { - Console.WriteLine($"\n--- Location Parse Failure Analysis ---"); - var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines); - var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10); - Console.WriteLine($"Top unmatched location strings:"); - foreach (var loc in topLocations) - { - Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)"); - } - } + // Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed) var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList(); if (unmatchedLines.Any()) @@ -408,8 +346,7 @@ public class EventOccurrenceParser_Tests // Arrange var events = TestEntityHandler.GetEvents(); var fileInfo = TestEntityHandler.GetEventOccurrenceStateFileInfo(); - var locationConfig = LocationParsingConfiguration.Default; - var parser = new EventOccurrenceParser(fileInfo, events, locationConfig); + var parser = new EventOccurrenceParser(fileInfo, events); // Act var result = parser.Parse(); @@ -477,19 +414,7 @@ public class EventOccurrenceParser_Tests } } - // Pattern Analysis - var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList(); - if (locationFailures.Any()) - { - Console.WriteLine($"\n--- Location Parse Failure Analysis ---"); - var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines); - var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10); - Console.WriteLine($"Top unmatched location strings:"); - foreach (var loc in topLocations) - { - Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)"); - } - } + // Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed) var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList(); if (unmatchedLines.Any()) @@ -516,8 +441,7 @@ public class EventOccurrenceParser_Tests // Arrange var events = TestEntityHandler.GetEvents(); var fileInfo = TestEntityHandler.GetEventOccurrenceState2024FileInfo(); - var locationConfig = LocationParsingConfiguration.Default; - var parser = new EventOccurrenceParser(fileInfo, events, locationConfig); + var parser = new EventOccurrenceParser(fileInfo, events); // Act var result = parser.Parse(); @@ -585,19 +509,7 @@ public class EventOccurrenceParser_Tests } } - // Pattern Analysis - var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList(); - if (locationFailures.Any()) - { - Console.WriteLine($"\n--- Location Parse Failure Analysis ---"); - var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines); - var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10); - Console.WriteLine($"Top unmatched location strings:"); - foreach (var loc in topLocations) - { - Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)"); - } - } + // Pattern Analysis - LocationParseFailure issues are no longer created (pattern matching removed) var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList(); if (unmatchedLines.Any()) @@ -617,4 +529,124 @@ public class EventOccurrenceParser_Tests // Test passes if no exceptions were thrown Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues ({fixableIssues.Count} fixable)"); } + + [Test] + public void Parse_Section_Lines64To92_ChildrensStoriesToConstructionChallenge() + { + // Arrange + // Extract lines 64-92 from the test file - contains MS and HS events with various formats + var allLines = File.ReadAllLines(TestEntityHandler.GetEventOccurrenceStateFileInfo().FullName); + var sectionLines = allLines.Skip(63).Take(29).ToArray(); // Lines 64-92 (0-indexed: 63-91) + var sectionContent = string.Join("\n", sectionLines); + + var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(sectionContent); + var events = TestEntityHandler.GetEvents(); + var parser = new EventOccurrenceParser(tempFile, events); + + try + { + // Act + var result = parser.Parse(); + + // Assert - Should parse without exceptions + Assert.That(result, Is.Not.Null, "Parser should return a result"); + + // Count occurrences by event type + var totalOccurrences = result.Occurrences.Values.Sum(list => list.Count); + + // Verify MS events are parsed + var childrensStories = events.FirstOrDefault(e => e.Name.Contains("Children's Stories", StringComparison.OrdinalIgnoreCase)); + var coding = events.FirstOrDefault(e => e.Name == "Coding"); + var communityServiceVideo = events.FirstOrDefault(e => e.Name.Contains("Community Service Video", StringComparison.OrdinalIgnoreCase)); + var constructionChallenge = events.FirstOrDefault(e => e.Name.Contains("Construction Challenge", StringComparison.OrdinalIgnoreCase)); + + // Count expected MS occurrences: + // Children's Stories – MS: 5 occurrences (lines 65-69) + // Coding – MS: 2 occurrences (lines 76-77) + // Community Service Video – MS: 4 occurrences (lines 79-82) + // Construction Challenge – MS: 5 occurrences (lines 88-92) + // Total expected MS occurrences: 16 + + var msEventCount = 0; + if (childrensStories != null && result.Occurrences.ContainsKey(childrensStories)) + msEventCount += result.Occurrences[childrensStories].Count; + if (coding != null && result.Occurrences.ContainsKey(coding)) + msEventCount += result.Occurrences[coding].Count; + if (communityServiceVideo != null && result.Occurrences.ContainsKey(communityServiceVideo)) + msEventCount += result.Occurrences[communityServiceVideo].Count; + if (constructionChallenge != null && result.Occurrences.ContainsKey(constructionChallenge)) + msEventCount += result.Occurrences[constructionChallenge].Count; + + // Verify HS events are skipped gracefully (no issues should be created for them) + var hsIssues = result.Issues.Where(i => + i.LineContent.Contains("Coding – HS") || + i.LineContent.Contains("CAD") && i.LineContent.Contains("HS") || + i.LineNumber >= 72 && i.LineNumber <= 86 && IsHighSchoolEvent(i.LineContent) + ).ToList(); + + // Verify HS section headers are tracked in SkippedHSSectionHeaders + var skippedHSHeaders = result.SkippedHSSectionHeaders; + + // Verify continuation lines are skipped + // Line 70 starts with "*The" - this enters continuation mode and both line 70 and 71 should be skipped + var continuationLineIssues = result.Issues.Where(i => + i.LineContent.Contains("books of semifinalist teams") || + i.LineContent.Contains("be returned to teams") + ).ToList(); + + // Verify specific time formats are parsed correctly + var noonOccurrence = result.Occurrences.Values + .SelectMany(list => list) + .FirstOrDefault(eo => eo.Time.Contains("NOON", StringComparison.OrdinalIgnoreCase)); + + var lateTimeOccurrence = result.Occurrences.Values + .SelectMany(list => list) + .FirstOrDefault(eo => eo.Time.Contains("11:59", StringComparison.OrdinalIgnoreCase)); + + // Output detailed analysis + Console.WriteLine($"\n=== Section Lines 64-92 Parsing Results ==="); + Console.WriteLine($"Total occurrences parsed: {totalOccurrences}"); + Console.WriteLine($"MS event occurrences: {msEventCount}"); + Console.WriteLine($"Total issues: {result.Issues.Count}"); + Console.WriteLine($"HS-related issues: {hsIssues.Count}"); + Console.WriteLine($"Skipped HS section headers: {skippedHSHeaders.Count}"); + Console.WriteLine($"Continuation line issues: {continuationLineIssues.Count}"); + + Console.WriteLine($"\n--- Issue Types ---"); + foreach (var issueType in result.Issues.GroupBy(i => i.IssueType)) + { + Console.WriteLine($" {issueType.Key}: {issueType.Count()}"); + } + + // Assertions + Assert.That(totalOccurrences, Is.GreaterThan(0), "Should parse at least some occurrences"); + Assert.That(msEventCount, Is.GreaterThanOrEqualTo(14), "Should parse most MS occurrences (at least 14 out of 16)"); + // HS events should not create issues - they should be skipped gracefully + Assert.That(hsIssues, Has.Count.EqualTo(0), "HS events should be skipped gracefully without creating issues"); + // HS section headers should be tracked + Assert.That(skippedHSHeaders, Has.Count.GreaterThanOrEqualTo(2), "Should track at least 2 HS section headers (Coding - HS, CAD Architecture - HS, CAD Engineering - HS)"); + // Line 70 (starts with "*The") enters continuation mode and both line 70 and 71 should be skipped without issues + Assert.That(continuationLineIssues, Has.Count.EqualTo(0), + "Continuation lines starting with '*' and subsequent lines should be skipped without issues"); + Assert.That(noonOccurrence, Is.Not.Null, "Should parse NOON time format"); + Assert.That(lateTimeOccurrence, Is.Not.Null, "Should parse 11:59 p.m. time format"); + + // Verify specific locations are parsed + if (childrensStories != null && result.Occurrences.ContainsKey(childrensStories)) + { + var locations = result.Occurrences[childrensStories] + .Select(eo => eo.Location) + .Where(loc => !string.IsNullOrWhiteSpace(loc)) + .ToList(); + Assert.That(locations, Has.Count.GreaterThan(0), "Children's Stories should have locations parsed"); + } + + // Test passes with detailed information + Assert.Pass($"Successfully parsed section: {totalOccurrences} occurrences, {result.Issues.Count} issues, {msEventCount} MS events"); + } + finally + { + EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile); + } + } } \ No newline at end of file diff --git a/WebApp/Components/Features/Calendar/Import.razor b/WebApp/Components/Features/Calendar/Import.razor index 3fe3872..1f7ea3a 100644 --- a/WebApp/Components/Features/Calendar/Import.razor +++ b/WebApp/Components/Features/Calendar/Import.razor @@ -302,7 +302,6 @@ ParsingIssueType.MissingEventDefinition => Color.Warning, ParsingIssueType.TimeParseFailure => Color.Error, ParsingIssueType.DateParseFailure => Color.Error, - ParsingIssueType.LocationParseFailure => Color.Warning, ParsingIssueType.InvalidFormat => Color.Error, _ => Color.Default };