Enhance event occurrence parsing to correctly skip high school events
This commit updates the EventOccurrenceParser to ensure that high school (HS) events are not incorrectly associated with middle school (MS) events during parsing. The logic now gracefully skips HS section headers, preventing any fuzzy matching from leading to incorrect associations. Additionally, a new unit test has been added to verify that HS occurrences are correctly excluded from MS event occurrences, ensuring the integrity of the parsing process.
This commit is contained in:
@@ -73,20 +73,22 @@ public class EventOccurrenceParser
|
||||
// Section headers break continuation mode
|
||||
inContinuationMode = false;
|
||||
|
||||
// Use fuzzy matching to find the best matching event definition
|
||||
// Check if this is an HS event - if so, skip gracefully regardless of whether it matches
|
||||
// This prevents HS events from being incorrectly associated with MS events (e.g.,
|
||||
// "Biotechnology Design – HS" matching "Biotechnology" MS event)
|
||||
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
currentEventDefinition = null; // Skip subsequent occurrences
|
||||
inHSSection = true; // Mark that we're in an HS section
|
||||
continue; // No issue created
|
||||
}
|
||||
|
||||
// For MS events, use fuzzy matching to find the best matching event definition
|
||||
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events);
|
||||
if (evt == null)
|
||||
{
|
||||
// Check if this is an HS event - if so, skip gracefully
|
||||
if (schoolLevel.Equals("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
currentEventDefinition = null; // Skip subsequent occurrences
|
||||
inHSSection = true; // Mark that we're in an HS section
|
||||
continue; // No issue created
|
||||
}
|
||||
|
||||
// For non-HS unmatched headers, create issue as before
|
||||
// For unmatched MS headers, create issue
|
||||
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events);
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
@@ -118,19 +120,21 @@ public class EventOccurrenceParser
|
||||
// Section headers break continuation mode
|
||||
inContinuationMode = false;
|
||||
|
||||
// Check if this is an HS event - if so, skip gracefully regardless of whether it matches
|
||||
// This prevents HS events from being incorrectly associated with MS events
|
||||
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
currentEventDefinition = null; // Skip subsequent occurrences
|
||||
inHSSection = true; // Mark that we're in an HS section
|
||||
continue; // No issue created
|
||||
}
|
||||
|
||||
// For MS events, use fuzzy matching to find the best matching event definition
|
||||
var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events);
|
||||
if (evt == null)
|
||||
{
|
||||
// Check if this is an HS event - if so, skip gracefully
|
||||
if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
result.SkippedHSSectionHeaders.Add(normalizedLine);
|
||||
currentEventDefinition = null; // Skip subsequent occurrences
|
||||
inHSSection = true; // Mark that we're in an HS section
|
||||
continue; // No issue created
|
||||
}
|
||||
|
||||
// For non-HS unmatched headers, create issue as before
|
||||
// For unmatched MS headers, create issue
|
||||
var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events);
|
||||
issues.Add(new ParsingIssue
|
||||
{
|
||||
|
||||
@@ -513,7 +513,8 @@ public class EventOccurrenceParserIssues_Tests
|
||||
public void Parse_SectionHeader_WithHyphen_IsRecognized()
|
||||
{
|
||||
// Arrange
|
||||
var testContent = "Architectural Design - HS\n" +
|
||||
// Test that section headers with hyphens are recognized (using MS event since HS events are skipped)
|
||||
var testContent = "Architectural Design - MS\n" +
|
||||
"Submit Entry March 15 8:00 a.m. Room 1";
|
||||
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
|
||||
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Architectural Design") };
|
||||
|
||||
@@ -649,4 +649,73 @@ public class EventOccurrenceParser_Tests
|
||||
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void Parse_BiotechnologyMSAndHS_HSOccurrencesNotAssociatedWithMS()
|
||||
{
|
||||
// Arrange
|
||||
// This test verifies that HS events (like "Biotechnology Design – HS") are not incorrectly
|
||||
// associated with MS events (like "Biotechnology – MS") even if fuzzy matching finds a match
|
||||
var testContent = "Biotechnology – MS\n" +
|
||||
"Submit Entry April 3 8 a.m. – 9 a.m. Exhibit Hall C\n" +
|
||||
"Judging April 3 9 a.m. – 5 p.m. Exhibit Hall C\n" +
|
||||
"Biotechnology Design – HS\n" +
|
||||
"Submit Entry April 3 8 a.m. – 9:00 a.m. Exhibit Hall C\n" +
|
||||
"Judging April 3 9 a.m. – 5 p.m. Exhibit Hall C\n" +
|
||||
"Pick-up April 4 5 p.m. – 5:30 p.m. Exhibit Hall C";
|
||||
|
||||
var tempFile = EventOccurrenceParserTestHelpers.CreateTempFile(testContent);
|
||||
var events = new[] { EventOccurrenceParserTestHelpers.CreateTestEvent("Biotechnology") };
|
||||
var parser = new EventOccurrenceParser(tempFile, events);
|
||||
|
||||
try
|
||||
{
|
||||
// Act
|
||||
var result = parser.Parse();
|
||||
|
||||
// Assert
|
||||
var biotechnology = events.FirstOrDefault(e => e.Name == "Biotechnology");
|
||||
Assert.That(biotechnology, Is.Not.Null, "Biotechnology event should exist");
|
||||
|
||||
// MS occurrences should be parsed
|
||||
if (result.Occurrences.ContainsKey(biotechnology))
|
||||
{
|
||||
var msOccurrences = result.Occurrences[biotechnology];
|
||||
Assert.That(msOccurrences, Has.Count.EqualTo(2),
|
||||
"Should have 2 MS occurrences (Submit Entry and Judging)");
|
||||
|
||||
// Verify MS occurrences have correct names
|
||||
var msNames = msOccurrences.Select(o => o.Name).ToList();
|
||||
Assert.That(msNames, Does.Contain("Submit Entry"), "MS should have Submit Entry");
|
||||
Assert.That(msNames, Does.Contain("Judging"), "MS should have Judging");
|
||||
}
|
||||
|
||||
// HS section header should be skipped (note: normalized to regular hyphen)
|
||||
Assert.That(result.SkippedHSSectionHeaders, Does.Contain("Biotechnology Design - HS"),
|
||||
"HS section header should be in SkippedHSSectionHeaders");
|
||||
|
||||
// HS occurrences should NOT be associated with Biotechnology MS event
|
||||
if (result.Occurrences.ContainsKey(biotechnology))
|
||||
{
|
||||
var allOccurrences = result.Occurrences[biotechnology];
|
||||
var hsOccurrenceNames = new[] { "Submit Entry", "Judging", "Pick-up" };
|
||||
var hsOccurrencesFound = allOccurrences.Where(o =>
|
||||
o.Name == "Submit Entry" && o.Date.Contains("April 3") ||
|
||||
o.Name == "Judging" && o.Date.Contains("April 3") ||
|
||||
o.Name == "Pick-up" && o.Date.Contains("April 4")).ToList();
|
||||
|
||||
// The HS occurrences should not be in the MS event's occurrences
|
||||
// We expect only 2 MS occurrences, not 5 (2 MS + 3 HS)
|
||||
Assert.That(allOccurrences, Has.Count.LessThanOrEqualTo(2),
|
||||
"HS occurrences should not be associated with MS event. " +
|
||||
$"Found {allOccurrences.Count} occurrences, expected 2 MS only.");
|
||||
}
|
||||
|
||||
Assert.Pass("HS events correctly skipped and not associated with MS events");
|
||||
}
|
||||
finally
|
||||
{
|
||||
EventOccurrenceParserTestHelpers.CleanupTempFile(tempFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user