Enhance event occurrence parsing to correctly skip high school events

This commit updates the EventOccurrenceParser to ensure that high school (HS) events are not incorrectly associated with middle school (MS) events during parsing. The logic now gracefully skips HS section headers, preventing any fuzzy matching from leading to incorrect associations. Additionally, a new unit test has been added to verify that HS occurrences are correctly excluded from MS event occurrences, ensuring the integrity of the parsing process.
This commit is contained in:
2026-01-09 08:32:41 -05:00
parent 19e5ef0675
commit 2eae3f205c
3 changed files with 96 additions and 22 deletions
+25 -21
View File
@@ -73,20 +73,22 @@ public class EventOccurrenceParser
// Section headers break continuation mode
inContinuationMode = false;
// Use fuzzy matching to find the best matching event definition
// Check if this is an HS event - if so, skip gracefully regardless of whether it matches
// This prevents HS events from being incorrectly associated with MS events (e.g.,
// "Biotechnology Design HS" matching "Biotechnology" MS event)
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
{
result.SkippedHSSectionHeaders.Add(normalizedLine);
currentEventDefinition = null; // Skip subsequent occurrences
inHSSection = true; // Mark that we're in an HS section
continue; // No issue created
}
// For MS events, use fuzzy matching to find the best matching event definition
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
if (evt == null)
{
// Check if this is an HS event - if so, skip gracefully
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
{
result.SkippedHSSectionHeaders.Add(normalizedLine);
currentEventDefinition = null; // Skip subsequent occurrences
inHSSection = true; // Mark that we're in an HS section
continue; // No issue created
}
// For non-HS unmatched headers, create issue as before
// For unmatched MS headers, create issue
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events);
issues.Add(new ParsingIssue
{
@@ -118,19 +120,21 @@ public class EventOccurrenceParser
// Section headers break continuation mode
inContinuationMode = false;
// Check if this is an HS event - if so, skip gracefully regardless of whether it matches
// This prevents HS events from being incorrectly associated with MS events
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
{
result.SkippedHSSectionHeaders.Add(normalizedLine);
currentEventDefinition = null; // Skip subsequent occurrences
inHSSection = true; // Mark that we're in an HS section
continue; // No issue created
}
// For MS events, use fuzzy matching to find the best matching event definition
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
if (evt == null)
{
// Check if this is an HS event - if so, skip gracefully
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
{
result.SkippedHSSectionHeaders.Add(normalizedLine);
currentEventDefinition = null; // Skip subsequent occurrences
inHSSection = true; // Mark that we're in an HS section
continue; // No issue created
}
// For non-HS unmatched headers, create issue as before
// For unmatched MS headers, create issue
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events);
issues.Add(new ParsingIssue
{
@@ -513,7 +513,8 @@ public class EventOccurrenceParserIssues_Tests
public void Parse_SectionHeader_WithHyphen_IsRecognized()
{
// Arrange
var testContent = "Architectural Design - HS\n" +
// Test that section headers with hyphens are recognized (using MS event since HS events are skipped)
var testContent = "Architectural Design - MS\n" +
"Submit Entry March 15 8:00 a.m. Room 1";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Architectural Design") };
@@ -649,4 +649,73 @@ public class EventOccurrenceParser_Tests
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
[Test]
public void Parse_BiotechnologyMSAndHS_HSOccurrencesNotAssociatedWithMS()
{
// Arrange
// This test verifies that HS events (like "Biotechnology Design HS") are not incorrectly
// associated with MS events (like "Biotechnology MS") even if fuzzy matching finds a match
var testContent = "Biotechnology MS\n" +
"Submit Entry April 3 8 a.m. 9 a.m. Exhibit Hall C\n" +
"Judging April 3 9 a.m. 5 p.m. Exhibit Hall C\n" +
"Biotechnology Design HS\n" +
"Submit Entry April 3 8 a.m. 9:00 a.m. Exhibit Hall C\n" +
"Judging April 3 9 a.m. 5 p.m. Exhibit Hall C\n" +
"Pick-up April 4 5 p.m. 5:30 p.m. Exhibit Hall C";
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Biotechnology") };
var parser = new EventOccurrenceParser(tempFile, events);
try
{
// Act
var result = parser.Parse();
// Assert
var biotechnology = events.FirstOrDefault(e => e.Name == "Biotechnology");
Assert.That(biotechnology, Is.Not.Null, "Biotechnology event should exist");
// MS occurrences should be parsed
if (result.Occurrences.ContainsKey(biotechnology))
{
var msOccurrences = result.Occurrences[biotechnology];
Assert.That(msOccurrences, Has.Count.EqualTo(2),
"Should have 2 MS occurrences (Submit Entry and Judging)");
// Verify MS occurrences have correct names
var msNames = msOccurrences.Select(o => o.Name).ToList();
Assert.That(msNames, Does.Contain("Submit Entry"), "MS should have Submit Entry");
Assert.That(msNames, Does.Contain("Judging"), "MS should have Judging");
}
// HS section header should be skipped (note: normalized to regular hyphen)
Assert.That(result.SkippedHSSectionHeaders, Does.Contain("Biotechnology Design - HS"),
"HS section header should be in SkippedHSSectionHeaders");
// HS occurrences should NOT be associated with Biotechnology MS event
if (result.Occurrences.ContainsKey(biotechnology))
{
var allOccurrences = result.Occurrences[biotechnology];
var hsOccurrenceNames = new[] { "Submit Entry", "Judging", "Pick-up" };
var hsOccurrencesFound = allOccurrences.Where(o =>
o.Name == "Submit Entry" && o.Date.Contains("April 3") ||
o.Name == "Judging" && o.Date.Contains("April 3") ||
o.Name == "Pick-up" && o.Date.Contains("April 4")).ToList();
// The HS occurrences should not be in the MS event's occurrences
// We expect only 2 MS occurrences, not 5 (2 MS + 3 HS)
Assert.That(allOccurrences, Has.Count.LessThanOrEqualTo(2),
"HS occurrences should not be associated with MS event. " +
$"Found {allOccurrences.Count} occurrences, expected 2 MS only.");
}
Assert.Pass("HS events correctly skipped and not associated with MS events");
}
finally
{
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
}
}
}