Enhance event occurrence parsing with new location patterns and improved issue handling

This commit updates the LocationParsingConfiguration to include additional location patterns such as "Exhibit Hall *", "Mtg. Room *", and "Online". The EventOccurrenceParser has been enhanced to better handle parsing issues, including skipping comment and continuation lines, and cleaning up location text. New methods for analyzing location parsing failures and categorizing issues have been added to improve reporting. Additionally, the UI has been updated to support larger input sizes for event occurrence text, ensuring a smoother user experience during data import.
This commit is contained in:
2026-01-08 08:08:36 -05:00
parent c937192496
commit 5fdd5fadba
5 changed files with 572 additions and 67 deletions
+9 -1
View File
@@ -21,9 +21,17 @@ public class LocationParsingConfiguration
{
"Room *",
"Hall *",
"Exhibit Hall *",
"Conference Room *",
"Building *",
"Auditorium *"
"Auditorium *",
"Mtg. Room *",
"Meeting Room *",
"Banquet Room *",
"Banquet Hall *",
"Online",
"Virtual",
"TBD"
}
};
}
+164 -28
View File
@@ -42,10 +42,10 @@ public class EventOccurrenceParser
// Matches: time1 (optional dash time2/NOON), then location
// The time group captures the full time range (including " - NOON" if present)
// Pattern breakdown:
// - First time: (?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?)) - matches NOON or time with AM/PM
// - Optional range: (?:\s*[-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?))) - matches dash followed by NOON or time
// - Location: \s+.+ - whitespace followed by rest of string
private readonly Regex _timeLocationRegex = new(@"(?<Time>(?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?))(?:\s*[-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s?(?:[AaPp]\.?[Mm]\.?)))?)(?<Location>\s+.+)?");
// - First time: (?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?)) - matches NOON or time with AM/PM (more flexible whitespace)
// - Optional range: (?:\s*[-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?))) - matches dash followed by NOON or time
// - Location: (?:\s+(?<Location>.+))? - optional whitespace followed by location (capture group with explicit name)
private readonly Regex _timeLocationRegex = new(@"(?<Time>(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?))(?:\s*[-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s*(?:[AaPp]\.?[Mm]\.?)))?)(?:\s+(?<Location>.+))?");
public EventOccurrenceParserResult Parse()
{
@@ -61,6 +61,10 @@ public class EventOccurrenceParser
// Skip empty lines
if (string.IsNullOrWhiteSpace(trimmedLine))
continue;
// Skip comment lines (starting with "#")
if (trimmedLine.StartsWith("#", StringComparison.Ordinal))
continue;
var match = _re.Match(trimmedLine);
@@ -94,6 +98,16 @@ public class EventOccurrenceParser
continue;
}
// Skip continuation lines (lines that look like they're continuing from previous line)
// These are typically lines that:
// - Start with lowercase or special characters (not event names)
// - Are parenthetical notes like "(Semifinalists only)"
// - Are informational text like "Schedule Posted on..."
if (IsContinuationLine(trimmedLine))
{
continue;
}
// "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern
// Track as unmatched line if it's not empty
if (!string.IsNullOrWhiteSpace(trimmedLine))
@@ -223,6 +237,38 @@ public class EventOccurrenceParser
return null;
}
/// <summary>
/// Determines if a line is a continuation/wrapped line that should be skipped.
/// </summary>
private bool IsContinuationLine(string line)
{
var trimmed = line.Trim();
// Skip parenthetical notes
if (trimmed.StartsWith("(", StringComparison.Ordinal) && trimmed.EndsWith(")", StringComparison.Ordinal))
return true;
// Skip lines that are clearly continuation text (start with lowercase, common continuation words)
if (trimmed.Length > 0 && char.IsLower(trimmed[0]))
{
// Check if it starts with common continuation words
var continuationPrefixes = new[] { "be ", "the ", "and ", "or ", "to ", "a ", "an ", "will ", "may ", "can " };
foreach (var prefix in continuationPrefixes)
{
if (trimmed.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
return true;
}
}
// Skip informational lines that don't contain dates/times
if (trimmed.Contains("Schedule Posted", StringComparison.OrdinalIgnoreCase) ||
trimmed.Contains("Note:", StringComparison.OrdinalIgnoreCase) ||
trimmed.Contains("*Note:", StringComparison.OrdinalIgnoreCase))
return true;
return false;
}
private string SanitizeInput(string input)
{
@@ -286,30 +332,57 @@ public class EventOccurrenceParser
if (!string.IsNullOrWhiteSpace(locationPart))
{
// Try to match location using configurable patterns
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
{
location = MatchLocationPattern(locationPart, _locationConfig.LocationPatterns);
locationParseSuccess = !string.IsNullOrEmpty(location);
}
// Clean up location part - remove any remaining time components (e.g., " 12:15 p.m. Exhibit Hall C" -> "Exhibit Hall C")
locationPart = CleanLocationText(locationPart);
// If no pattern matched, fall back to using the location part as-is
if (!locationParseSuccess)
if (!string.IsNullOrWhiteSpace(locationPart))
{
location = locationPart;
// Only add issue if we have patterns configured but none matched
// Try to match location using configurable patterns
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
{
issues.Add(new ParsingIssue
location = MatchLocationPattern(locationPart, _locationConfig.LocationPatterns);
locationParseSuccess = !string.IsNullOrEmpty(location);
// If pattern matching failed but location part looks valid, try matching against cleaned version
if (!locationParseSuccess && !string.IsNullOrWhiteSpace(locationPart))
{
LineNumber = lineNumber,
LineContent = lineContent,
IssueType = ParsingIssueType.LocationParseFailure,
Message = $"Location '{locationPart}' does not match any configured pattern"
});
// Some locations might not match because of extra whitespace or formatting
// Try matching the location even if initial match failed
var cleanedForMatching = locationPart.Trim();
location = MatchLocationPattern(cleanedForMatching, _locationConfig.LocationPatterns);
locationParseSuccess = !string.IsNullOrEmpty(location);
if (locationParseSuccess)
{
location = cleanedForMatching; // Use the cleaned version
}
}
}
// If no pattern matched but we have a location, use it anyway but mark as not matching pattern
// This allows parsing to continue while still tracking that the location didn't match a pattern
if (!locationParseSuccess && !string.IsNullOrWhiteSpace(locationPart))
{
location = locationPart;
// Only add issue if we have patterns configured but none matched
// This helps identify locations that might need new patterns added
if (_locationConfig != null && _locationConfig.LocationPatterns.Any())
{
issues.Add(new ParsingIssue
{
LineNumber = lineNumber,
LineContent = lineContent,
IssueType = ParsingIssueType.LocationParseFailure,
Message = $"Location '{locationPart}' does not match any configured pattern"
});
}
}
}
}
else
{
// No location part found, which is valid (some events might not have locations)
locationParseSuccess = true; // Consider it a success since no location is needed
}
}
else
{
@@ -320,24 +393,79 @@ public class EventOccurrenceParser
return (time, location, locationParseSuccess || string.IsNullOrWhiteSpace(location));
}
/// <summary>
/// Cleans location text by removing any remaining time components.
/// Handles cases like " 12:15 p.m. Exhibit Hall C" -> "Exhibit Hall C"
/// </summary>
private string CleanLocationText(string locationText)
{
if (string.IsNullOrWhiteSpace(locationText))
return string.Empty;
// Remove leading dashes and whitespace
locationText = locationText.TrimStart('', '-', ' ', '\t');
// Try to match and remove time patterns at the start
// Pattern 1: Dash, whitespace, time (e.g., " 12:15 p.m. " or " NOON ")
var dashTimePattern = new Regex(@"^[-]\s+(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s+", RegexOptions.IgnoreCase);
locationText = dashTimePattern.Replace(locationText, "").Trim();
// Pattern 2: Time without dash at start (e.g., "12:15 p.m. " or "NOON ")
var timePatternAtStart = new Regex(@"^(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s+", RegexOptions.IgnoreCase);
locationText = timePatternAtStart.Replace(locationText, "").Trim();
// Pattern 3: Any remaining dash-time combinations (more flexible)
var remainingDashTime = new Regex(@"^[-]\s*(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)\s*", RegexOptions.IgnoreCase);
locationText = remainingDashTime.Replace(locationText, "").Trim();
// Pattern 4: Remove any standalone time at the start (handles cases where dash was already removed)
var standaloneTime = new Regex(@"^(?:NOON|\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?)$", RegexOptions.IgnoreCase);
if (standaloneTime.IsMatch(locationText))
return string.Empty; // If only time remains, there's no location
return locationText.Trim();
}
/// <summary>
/// Matches location text against configured patterns and returns the matched location.
/// </summary>
private string MatchLocationPattern(string locationText, List<string> patterns)
{
// Normalize location text for matching (trim and handle variations)
var normalizedLocation = locationText.Trim();
// If location is empty after normalization, return empty
if (string.IsNullOrWhiteSpace(normalizedLocation))
return string.Empty;
foreach (var pattern in patterns)
{
if (!pattern.Contains('*'))
var normalizedPattern = pattern.Trim();
// Skip empty patterns
if (string.IsNullOrWhiteSpace(normalizedPattern))
continue;
// Convert pattern to regex: escape special chars, replace * with .*
var escapedPattern = Regex.Escape(pattern);
escapedPattern = escapedPattern.Replace(@"\*", ".*");
var regex = new Regex($"^{escapedPattern}$", RegexOptions.IgnoreCase);
if (regex.IsMatch(locationText))
// Handle exact matches (patterns without wildcards like "Online", "Virtual", "TBD")
if (!normalizedPattern.Contains('*'))
{
return locationText; // Return the full matched location
if (string.Equals(normalizedPattern, normalizedLocation, StringComparison.OrdinalIgnoreCase))
{
return normalizedLocation;
}
continue;
}
// Convert pattern to regex: escape special chars, replace * with .*
// This handles patterns like "Exhibit Hall *", "Room *", "Mtg. Room *", etc.
var escapedPattern = Regex.Escape(normalizedPattern);
escapedPattern = escapedPattern.Replace(@"\*", ".*?");
// Use case-insensitive matching
var regex = new Regex($"^{escapedPattern}$", RegexOptions.IgnoreCase);
if (regex.IsMatch(normalizedLocation))
{
return normalizedLocation; // Return the full matched location
}
}
@@ -349,6 +477,14 @@ public class EventOccurrenceParser
int hour = 0;
int minute = 0;
// Handle TBD (To Be Determined) times gracefully
if (string.Equals(time.Trim(), "TBD", StringComparison.OrdinalIgnoreCase))
{
// Use a placeholder time (midnight) for TBD - the occurrence will still be created
// but with a time that indicates it's TBD
return new TimeOnly(0, 0, 0);
}
// get the part of the time before a timespan
if (time.Contains(" - "))
{
+380 -30
View File
@@ -9,6 +9,164 @@ namespace Tests.Parsers;
/// </summary>
public class EventOccurrenceParser_Tests
{
#region Helper Methods
/// <summary>
/// Checks if a line contains a High School event marker.
/// </summary>
private static bool IsHighSchoolEvent(string line)
{
return line.Contains(" HS", StringComparison.OrdinalIgnoreCase) ||
line.Contains(" - HS", StringComparison.OrdinalIgnoreCase) ||
line.Contains("- HS", StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Checks if a line contains a Middle School event marker.
/// </summary>
private static bool IsMiddleSchoolEvent(string line)
{
return line.Contains(" MS", StringComparison.OrdinalIgnoreCase) ||
line.Contains(" - MS", StringComparison.OrdinalIgnoreCase) ||
line.Contains("- MS", StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Determines if an issue is expected (HS-related) or fixable.
/// </summary>
private static bool IsExpectedIssue(ParsingIssue issue, List<string> fileLines, int currentLineIndex)
{
// Check if the issue line itself is an HS event header
if (IsHighSchoolEvent(issue.LineContent))
return true;
// For MissingEventDefinition issues, check if we're in an HS section
if (issue.IssueType == ParsingIssueType.MissingEventDefinition)
{
// Look backwards to find the most recent section header
for (int i = currentLineIndex - 1; i >= 0 && i >= currentLineIndex - 20; i--)
{
if (i < fileLines.Count)
{
var line = fileLines[i].Trim();
if (IsHighSchoolEvent(line))
return true;
if (IsMiddleSchoolEvent(line))
return false; // Found MS section, so this is fixable
}
}
}
return false;
}
/// <summary>
/// Categorizes issues into expected (HS-related) and fixable.
/// </summary>
private static (List<ParsingIssue> Expected, List<ParsingIssue> Fixable) CategorizeIssues(
List<ParsingIssue> issues, List<string> fileLines)
{
var expected = new List<ParsingIssue>();
var fixable = new List<ParsingIssue>();
foreach (var issue in issues)
{
var lineIndex = issue.LineNumber - 1; // Convert to 0-based index
if (IsExpectedIssue(issue, fileLines, lineIndex))
{
expected.Add(issue);
}
else
{
fixable.Add(issue);
}
}
return (expected, fixable);
}
/// <summary>
/// Gets sample lines for a list of issues.
/// </summary>
private static List<string> GetSampleLines(List<ParsingIssue> issues, int count = 5)
{
return issues
.Take(count)
.Select(i => $" Line {i.LineNumber}: {i.LineContent}")
.ToList();
}
/// <summary>
/// Analyzes location parsing failures and extracts common patterns.
/// </summary>
private static Dictionary<string, int> AnalyzeLocationFailures(
List<ParsingIssue> locationIssues, List<string> fileLines)
{
var locationPatterns = new Dictionary<string, int>();
foreach (var issue in locationIssues)
{
// Try to extract the location part from the line
// The format is typically: "EventName Month Day Time Location"
var parts = issue.LineContent.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
// Look for location-like strings (usually after time)
// This is a heuristic - we'll look for parts that don't match date/time patterns
var timeRegex = new System.Text.RegularExpressions.Regex(
@"\d{1,2}:?\d{0,2}\s*[AaPp]\.?[Mm]\.?|NOON");
bool foundTime = false;
var locationParts = new List<string>();
foreach (var part in parts)
{
if (timeRegex.IsMatch(part) || part == "NOON")
{
foundTime = true;
continue;
}
if (foundTime && !string.IsNullOrWhiteSpace(part))
{
locationParts.Add(part);
}
}
if (locationParts.Any())
{
var location = string.Join(" ", locationParts).Trim();
if (!string.IsNullOrWhiteSpace(location))
{
locationPatterns.TryGetValue(location, out var count);
locationPatterns[location] = count + 1;
}
}
}
return locationPatterns;
}
/// <summary>
/// Counts HS vs MS event sections in the file.
/// </summary>
private static (int HighSchool, int MiddleSchool) CountEventSections(List<string> fileLines)
{
int hsCount = 0;
int msCount = 0;
foreach (var line in fileLines)
{
var trimmed = line.Trim();
if (IsHighSchoolEvent(trimmed))
hsCount++;
else if (IsMiddleSchoolEvent(trimmed))
msCount++;
}
return (hsCount, msCount);
}
#endregion
[Test]
public void ParseNationalsTest()
{
@@ -151,22 +309,34 @@ public class EventOccurrenceParser_Tests
// Assert - Should parse without exceptions
Assert.That(result, Is.Not.Null, "Parser should return a result");
// Analyze and report results
var totalLines = File.ReadAllLines(fileInfo.FullName).Length;
var issuesByType = result.Issues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
// Load file lines for analysis
var fileLines = File.ReadAllLines(fileInfo.FullName).ToList();
var totalLines = fileLines.Count;
var totalParsed = result.Occurrences.Values.Sum(list => list.Count);
// Categorize issues
var (expectedIssues, fixableIssues) = CategorizeIssues(result.Issues, fileLines);
// Count event sections
var (hsSections, msSections) = CountEventSections(fileLines);
// Group issues by type
var issuesByType = result.Issues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.ToList());
var expectedByType = expectedIssues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
var fixableByType = fixableIssues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
// Output analysis
Console.WriteLine($"\n=== 2025 TSA Nationals Competition Event Times Analysis ===");
Console.WriteLine($"\n--- Summary Statistics ---");
Console.WriteLine($"Total lines in file: {totalLines}");
Console.WriteLine($"Total occurrences parsed: {totalParsed}");
Console.WriteLine($"Total issues found: {result.Issues.Count}");
Console.WriteLine($"Issues by type:");
foreach (var kvp in issuesByType.OrderByDescending(x => x.Value))
{
Console.WriteLine($" {kvp.Key}: {kvp.Value}");
}
Console.WriteLine($" Expected issues (HS-related): {expectedIssues.Count} ({100.0 * expectedIssues.Count / Math.Max(1, result.Issues.Count):F1}%)");
Console.WriteLine($" Fixable issues: {fixableIssues.Count} ({100.0 * fixableIssues.Count / Math.Max(1, result.Issues.Count):F1}%)");
Console.WriteLine($"Event sections: HS={hsSections}, MS={msSections}");
Console.WriteLine($"Events with occurrences: {result.Occurrences.Count}");
Console.WriteLine($"Special events found:");
Console.WriteLine($"\n--- Special Events Found ---");
if (result.Occurrences.ContainsKey(EventDefinition.GeneralSchedule))
Console.WriteLine($" GeneralSchedule: {result.Occurrences[EventDefinition.GeneralSchedule].Count} occurrences");
if (result.Occurrences.ContainsKey(EventDefinition.MeetTheCandidates))
@@ -176,8 +346,60 @@ public class EventOccurrenceParser_Tests
if (result.Occurrences.ContainsKey(EventDefinition.VotingDelegateMeeting))
Console.WriteLine($" VotingDelegateMeeting: {result.Occurrences[EventDefinition.VotingDelegateMeeting].Count} occurrences");
Console.WriteLine($"\n--- Issue Breakdown by Type ---");
foreach (var kvp in issuesByType.OrderByDescending(x => x.Value.Count))
{
var issueType = kvp.Key;
var allIssues = kvp.Value;
expectedByType.TryGetValue(issueType, out var expectedCount);
fixableByType.TryGetValue(issueType, out var fixableCount);
Console.WriteLine($"\n{issueType}:");
Console.WriteLine($" Total: {allIssues.Count} (Expected: {expectedCount}, Fixable: {fixableCount})");
if (fixableCount > 0)
{
var fixableOfType = fixableIssues.Where(i => i.IssueType == issueType).ToList();
var samples = GetSampleLines(fixableOfType, 5);
Console.WriteLine($" Sample fixable issues:");
foreach (var sample in samples)
{
Console.WriteLine(sample);
}
}
}
// Pattern Analysis
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
if (locationFailures.Any())
{
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
Console.WriteLine($"Top unmatched location strings:");
foreach (var loc in topLocations)
{
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
}
}
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
if (unmatchedLines.Any())
{
Console.WriteLine($"\n--- Unmatched Line Analysis ---");
var unmatchedPatterns = unmatchedLines
.GroupBy(i => i.LineContent.Trim())
.OrderByDescending(g => g.Count())
.Take(10);
Console.WriteLine($"Top unmatched line formats:");
foreach (var pattern in unmatchedPatterns)
{
Console.WriteLine($" \"{pattern.Key}\" (appears {pattern.Count()} times)");
}
}
// Test passes if no exceptions were thrown
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues");
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues ({fixableIssues.Count} fixable)");
}
[Test]
@@ -195,22 +417,34 @@ public class EventOccurrenceParser_Tests
// Assert - Should parse without exceptions
Assert.That(result, Is.Not.Null, "Parser should return a result");
// Analyze and report results
var totalLines = File.ReadAllLines(fileInfo.FullName).Length;
var issuesByType = result.Issues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
// Load file lines for analysis
var fileLines = File.ReadAllLines(fileInfo.FullName).ToList();
var totalLines = fileLines.Count;
var totalParsed = result.Occurrences.Values.Sum(list => list.Count);
// Categorize issues
var (expectedIssues, fixableIssues) = CategorizeIssues(result.Issues, fileLines);
// Count event sections
var (hsSections, msSections) = CountEventSections(fileLines);
// Group issues by type
var issuesByType = result.Issues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.ToList());
var expectedByType = expectedIssues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
var fixableByType = fixableIssues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
// Output analysis
Console.WriteLine($"\n=== 2025 TN TSA State Competition Event Times Analysis ===");
Console.WriteLine($"\n--- Summary Statistics ---");
Console.WriteLine($"Total lines in file: {totalLines}");
Console.WriteLine($"Total occurrences parsed: {totalParsed}");
Console.WriteLine($"Total issues found: {result.Issues.Count}");
Console.WriteLine($"Issues by type:");
foreach (var kvp in issuesByType.OrderByDescending(x => x.Value))
{
Console.WriteLine($" {kvp.Key}: {kvp.Value}");
}
Console.WriteLine($" Expected issues (HS-related): {expectedIssues.Count} ({100.0 * expectedIssues.Count / Math.Max(1, result.Issues.Count):F1}%)");
Console.WriteLine($" Fixable issues: {fixableIssues.Count} ({100.0 * fixableIssues.Count / Math.Max(1, result.Issues.Count):F1}%)");
Console.WriteLine($"Event sections: HS={hsSections}, MS={msSections}");
Console.WriteLine($"Events with occurrences: {result.Occurrences.Count}");
Console.WriteLine($"Special events found:");
Console.WriteLine($"\n--- Special Events Found ---");
if (result.Occurrences.ContainsKey(EventDefinition.GeneralSchedule))
Console.WriteLine($" GeneralSchedule: {result.Occurrences[EventDefinition.GeneralSchedule].Count} occurrences");
if (result.Occurrences.ContainsKey(EventDefinition.MeetTheCandidates))
@@ -220,8 +454,60 @@ public class EventOccurrenceParser_Tests
if (result.Occurrences.ContainsKey(EventDefinition.VotingDelegateMeeting))
Console.WriteLine($" VotingDelegateMeeting: {result.Occurrences[EventDefinition.VotingDelegateMeeting].Count} occurrences");
Console.WriteLine($"\n--- Issue Breakdown by Type ---");
foreach (var kvp in issuesByType.OrderByDescending(x => x.Value.Count))
{
var issueType = kvp.Key;
var allIssues = kvp.Value;
expectedByType.TryGetValue(issueType, out var expectedCount);
fixableByType.TryGetValue(issueType, out var fixableCount);
Console.WriteLine($"\n{issueType}:");
Console.WriteLine($" Total: {allIssues.Count} (Expected: {expectedCount}, Fixable: {fixableCount})");
if (fixableCount > 0)
{
var fixableOfType = fixableIssues.Where(i => i.IssueType == issueType).ToList();
var samples = GetSampleLines(fixableOfType, 5);
Console.WriteLine($" Sample fixable issues:");
foreach (var sample in samples)
{
Console.WriteLine(sample);
}
}
}
// Pattern Analysis
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
if (locationFailures.Any())
{
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
Console.WriteLine($"Top unmatched location strings:");
foreach (var loc in topLocations)
{
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
}
}
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
if (unmatchedLines.Any())
{
Console.WriteLine($"\n--- Unmatched Line Analysis ---");
var unmatchedPatterns = unmatchedLines
.GroupBy(i => i.LineContent.Trim())
.OrderByDescending(g => g.Count())
.Take(10);
Console.WriteLine($"Top unmatched line formats:");
foreach (var pattern in unmatchedPatterns)
{
Console.WriteLine($" \"{pattern.Key}\" (appears {pattern.Count()} times)");
}
}
// Test passes if no exceptions were thrown
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues");
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues ({fixableIssues.Count} fixable)");
}
[Test]
@@ -239,22 +525,34 @@ public class EventOccurrenceParser_Tests
// Assert - Should parse without exceptions
Assert.That(result, Is.Not.Null, "Parser should return a result");
// Analyze and report results
var totalLines = File.ReadAllLines(fileInfo.FullName).Length;
var issuesByType = result.Issues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
// Load file lines for analysis
var fileLines = File.ReadAllLines(fileInfo.FullName).ToList();
var totalLines = fileLines.Count;
var totalParsed = result.Occurrences.Values.Sum(list => list.Count);
// Categorize issues
var (expectedIssues, fixableIssues) = CategorizeIssues(result.Issues, fileLines);
// Count event sections
var (hsSections, msSections) = CountEventSections(fileLines);
// Group issues by type
var issuesByType = result.Issues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.ToList());
var expectedByType = expectedIssues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
var fixableByType = fixableIssues.GroupBy(i => i.IssueType).ToDictionary(g => g.Key, g => g.Count());
// Output analysis
Console.WriteLine($"\n=== 2024 TN TSA State Competition Event Times Analysis ===");
Console.WriteLine($"\n--- Summary Statistics ---");
Console.WriteLine($"Total lines in file: {totalLines}");
Console.WriteLine($"Total occurrences parsed: {totalParsed}");
Console.WriteLine($"Total issues found: {result.Issues.Count}");
Console.WriteLine($"Issues by type:");
foreach (var kvp in issuesByType.OrderByDescending(x => x.Value))
{
Console.WriteLine($" {kvp.Key}: {kvp.Value}");
}
Console.WriteLine($" Expected issues (HS-related): {expectedIssues.Count} ({100.0 * expectedIssues.Count / Math.Max(1, result.Issues.Count):F1}%)");
Console.WriteLine($" Fixable issues: {fixableIssues.Count} ({100.0 * fixableIssues.Count / Math.Max(1, result.Issues.Count):F1}%)");
Console.WriteLine($"Event sections: HS={hsSections}, MS={msSections}");
Console.WriteLine($"Events with occurrences: {result.Occurrences.Count}");
Console.WriteLine($"Special events found:");
Console.WriteLine($"\n--- Special Events Found ---");
if (result.Occurrences.ContainsKey(EventDefinition.GeneralSchedule))
Console.WriteLine($" GeneralSchedule: {result.Occurrences[EventDefinition.GeneralSchedule].Count} occurrences");
if (result.Occurrences.ContainsKey(EventDefinition.MeetTheCandidates))
@@ -264,7 +562,59 @@ public class EventOccurrenceParser_Tests
if (result.Occurrences.ContainsKey(EventDefinition.VotingDelegateMeeting))
Console.WriteLine($" VotingDelegateMeeting: {result.Occurrences[EventDefinition.VotingDelegateMeeting].Count} occurrences");
Console.WriteLine($"\n--- Issue Breakdown by Type ---");
foreach (var kvp in issuesByType.OrderByDescending(x => x.Value.Count))
{
var issueType = kvp.Key;
var allIssues = kvp.Value;
expectedByType.TryGetValue(issueType, out var expectedCount);
fixableByType.TryGetValue(issueType, out var fixableCount);
Console.WriteLine($"\n{issueType}:");
Console.WriteLine($" Total: {allIssues.Count} (Expected: {expectedCount}, Fixable: {fixableCount})");
if (fixableCount > 0)
{
var fixableOfType = fixableIssues.Where(i => i.IssueType == issueType).ToList();
var samples = GetSampleLines(fixableOfType, 5);
Console.WriteLine($" Sample fixable issues:");
foreach (var sample in samples)
{
Console.WriteLine(sample);
}
}
}
// Pattern Analysis
var locationFailures = fixableIssues.Where(i => i.IssueType == ParsingIssueType.LocationParseFailure).ToList();
if (locationFailures.Any())
{
Console.WriteLine($"\n--- Location Parse Failure Analysis ---");
var locationPatterns = AnalyzeLocationFailures(locationFailures, fileLines);
var topLocations = locationPatterns.OrderByDescending(x => x.Value).Take(10);
Console.WriteLine($"Top unmatched location strings:");
foreach (var loc in topLocations)
{
Console.WriteLine($" \"{loc.Key}\" (appears {loc.Value} times)");
}
}
var unmatchedLines = fixableIssues.Where(i => i.IssueType == ParsingIssueType.UnmatchedLine).ToList();
if (unmatchedLines.Any())
{
Console.WriteLine($"\n--- Unmatched Line Analysis ---");
var unmatchedPatterns = unmatchedLines
.GroupBy(i => i.LineContent.Trim())
.OrderByDescending(g => g.Count())
.Take(10);
Console.WriteLine($"Top unmatched line formats:");
foreach (var pattern in unmatchedPatterns)
{
Console.WriteLine($" \"{pattern.Key}\" (appears {pattern.Count()} times)");
}
}
// Test passes if no exceptions were thrown
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues");
Assert.Pass($"Successfully parsed {totalParsed} occurrences with {result.Issues.Count} issues ({fixableIssues.Count} fixable)");
}
}
@@ -33,15 +33,6 @@
</MudButton>
</MudStack>
<MudStack Spacing="3">
<MudTextField
T="string"
Label="Event Occurrence Text"
@bind-Value="_inputText"
Variant="Variant.Outlined"
Lines="15"
multiline="true"
Placeholder="Paste event occurrence text here..."
HelperText="Paste the event schedule text in the format expected by the parser" />
<MudStack Row="true" Spacing="2">
<MudButton
@@ -59,6 +50,17 @@
Clear
</MudButton>
</MudStack>
<MudTextField
T="string"
Label="Event Occurrence Text"
@bind-Value="_inputText"
Variant="Variant.Outlined"
Lines="15"
AutoGrow="true"
Placeholder="Paste event occurrence text here..."
HelperText="Paste the event schedule text in the format expected by the parser" />
<!-- @bind-Value:event="oninput" -->
</MudStack>
</MudPaper>
</MudItem>
+10 -1
View File
@@ -1,5 +1,6 @@
using Data;
using Microsoft.AspNetCore.DataProtection;
using Microsoft.AspNetCore.SignalR;
using Microsoft.AspNetCore.StaticFiles;
using Microsoft.EntityFrameworkCore;
using MudBlazor.Services;
@@ -121,8 +122,16 @@ if (builder.Environment.IsProduction())
// Add services to the container.
builder.Services.AddControllersWithViews();
// Configure SignalR HubOptions to support large text inputs (e.g., pasted event occurrence files)
// Default limit is around 32KB, increase to 1MB to support large pasted text files
builder.Services.AddRazorComponents()
.AddInteractiveServerComponents();
.AddInteractiveServerComponents()
.AddHubOptions(options =>
{
// Increase maximum message size to 1MB to support large pasted text files
// The test file is ~430 lines, which is well within this limit
options.MaximumReceiveMessageSize = 1024 * 1024; // 1MB
});
builder.Services.AddMudServices();
builder.Services.AddVisNetwork();