using System.Text.RegularExpressions; using Core.Entities; using Core.Models; using EventOccurrenceParsers = Core.Parsers.EventOccurrence; using Core.Utility; using SchoolLevel = Core.Models.SchoolLevel; namespace Core.Parsers; /// /// Result of parsing event occurrence file, containing both occurrences and parsing issues. /// public class EventOccurrenceParserResult { public IDictionary> Occurrences { get; set; } = new Dictionary>(); public List Issues { get; set; } = new(); public List SkippedHSSectionHeaders { get; set; } = new(); public List SkippedMSSectionHeaders { get; set; } = new(); public int SkippedMSEventCount { get; set; } public int SkippedHSEventCount { get; set; } } public class EventOccurrenceParser { private FileSystemInfo _txtFile; private ICollection _events; private SchoolLevel? _schoolLevel; public EventOccurrenceParser(FileSystemInfo txtFile, ICollection events, SchoolLevel? schoolLevel = null) { _events = events; _txtFile = txtFile; _schoolLevel = schoolLevel; } public EventOccurrenceParserResult Parse() { var result = new EventOccurrenceParserResult(); var occurrences = result.Occurrences; var issues = result.Issues; EventDefinition? currentEventDefinition = null; bool inContinuationMode = false; SchoolLevel? currentSectionLevel = null; var lines = File.ReadLines(_txtFile.FullName); foreach (var (line, index) in lines.Select((line, index) => (line, index + 1))) { // Normalize input: trim and normalize hyphens (en-dash, em-dash -> regular hyphen) // This allows the grammar parser to assume normalized input var normalizedLine = TextUtil.SanitizeInput(line.Trim()); // Skip empty lines if (EventOccurrenceParsers.LineClassifier.IsEmptyLine(normalizedLine)) { // Empty lines break continuation mode inContinuationMode = false; continue; } // Skip comment lines (starting with "#") - use grammar parser if (EventOccurrenceParsers.LineClassifier.IsCommentLine(normalizedLine)) { // Comment lines break continuation mode inContinuationMode = false; continue; } // Try to parse occurrence line using grammar parser var occurrenceLine = EventOccurrenceGrammar.TryParseOccurrenceLine(normalizedLine); if (!occurrenceLine.HasValue) { // Not an occurrence line, try other line types // Try to parse section header using grammar parser var sectionHeader = EventOccurrenceGrammar.TryParseSectionHeader(normalizedLine); if (sectionHeader.HasValue) { var (eventNamePart, schoolLevel) = sectionHeader.Value; // Section headers break continuation mode inContinuationMode = false; // Convert string school level to enum var sectionSchoolLevel = SetCurrentSectionLevel(schoolLevel); // Determine if we should skip this event based on chapter's school level setting if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result)) { currentEventDefinition = null; // Skip subsequent occurrences currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section continue; // No issue created } // Set current section level for events we're processing currentSectionLevel = sectionSchoolLevel; // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(eventNamePart, _events); if (evt == null) { // For unmatched headers, create issue var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(eventNamePart, _events); issues.Add(new ParsingIssue { LineNumber = index, LineContent = normalizedLine, IssueType = ParsingIssueType.UnmatchedLine, Message = $"Section header '{eventNamePart} - {schoolLevel}' found but no matching event definition (best match ratio: {bestRatio})" }); continue; } currentEventDefinition = evt; continue; } // Check for General Schedule/Session using grammar parser if (EventOccurrenceParsers.SectionHeaderMatcher.IsGeneralSchedule(normalizedLine)) { // General schedule breaks continuation mode inContinuationMode = false; currentSectionLevel = null; // Reset section level currentEventDefinition = EventDefinition.GeneralSchedule; continue; } // Also check for simple "MS" or "HS" in line (backward compatibility) if (EventOccurrenceParsers.SectionHeaderMatcher.HasSchoolLevel(normalizedLine)) { // Section headers break continuation mode inContinuationMode = false; // Extract school level from line SchoolLevel? sectionSchoolLevel = null; if (normalizedLine.Contains("MS", StringComparison.OrdinalIgnoreCase)) sectionSchoolLevel = SchoolLevel.MiddleSchool; else if (normalizedLine.Contains("HS", StringComparison.OrdinalIgnoreCase)) sectionSchoolLevel = SchoolLevel.HighSchool; // Determine if we should skip this event based on chapter's school level setting if (ShouldSkipSection(sectionSchoolLevel, normalizedLine, result)) { currentEventDefinition = null; // Skip subsequent occurrences currentSectionLevel = sectionSchoolLevel; // Track that we're in a skipped section continue; // No issue created } // Set current section level for events we're processing currentSectionLevel = sectionSchoolLevel; // Use fuzzy matching to find the best matching event definition var evt = EventOccurrenceParsers.SectionHeaderMatcher.MatchEventDefinition(normalizedLine, _events); if (evt == null) { // For unmatched headers, create issue var bestRatio = EventOccurrenceParsers.SectionHeaderMatcher.GetBestMatchRatio(normalizedLine, _events); issues.Add(new ParsingIssue { LineNumber = index, LineContent = normalizedLine, IssueType = ParsingIssueType.UnmatchedLine, Message = $"Section header with 'MS' or 'HS' found but no matching event definition (best match ratio: {bestRatio})" }); continue; } currentEventDefinition = evt; continue; } // Check if line starts with "*" to enter continuation mode if (normalizedLine.TrimStart().StartsWith("*", StringComparison.Ordinal)) { inContinuationMode = true; } // Skip continuation lines (in continuation mode OR line starts with "*" or is parenthetical) if (inContinuationMode || EventOccurrenceParsers.LineClassifier.IsContinuationLine(normalizedLine)) { continue; } // "Voting Delegates" section header is no longer used - occurrences are categorized by name pattern // Track as unmatched line if it's not empty if (!string.IsNullOrWhiteSpace(normalizedLine)) { issues.Add(new ParsingIssue { LineNumber = index, LineContent = normalizedLine, IssueType = ParsingIssueType.UnmatchedLine, Message = "Line does not match expected format (Name Month Day Time/Location)" }); } continue; } // Occurrence lines break continuation mode inContinuationMode = false; // Skip occurrences under sections that don't match the school level setting if (ShouldSkipOccurrence(currentSectionLevel, result)) { continue; } var (occurrenceName, month, dayOfMonthStr, timeAndLocation) = occurrenceLine.Value; // Remove weekday suffix from occurrence name if present occurrenceName = Regex.Replace(occurrenceName, @"(?Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\s?$", "").Trim(); // Determine event definition based on occurrence name pattern or current section EventDefinition? eventDefinition = EventOccurrenceParsers.EventDefinitionResolver.Resolve(occurrenceName, currentEventDefinition); // Track issue if we can't determine the event definition if (eventDefinition == null) { issues.Add(new ParsingIssue { LineNumber = index, LineContent = normalizedLine, IssueType = ParsingIssueType.MissingEventDefinition, Message = $"Cannot determine event definition for occurrence: {occurrenceName}" }); continue; } // timeAndLocation is already normalized (hyphens normalized) since normalizedLine was sanitized // Parse time and location - extract time using regex, then use everything after time as location EventOccurrenceParsers.TimeLocationParser.Parse(timeAndLocation, out string time, out string location); // Parse date DateOnly? startDate = null; try { startDate = TextUtil.ParseDate(month, dayOfMonthStr.ToString(), DateTime.Now.Year); } catch (Exception ex) { issues.Add(new ParsingIssue { LineNumber = index, LineContent = normalizedLine, IssueType = ParsingIssueType.DateParseFailure, Message = $"Failed to parse date: {ex.Message}" }); continue; } // Parse time TimeOnly? startTime = null; try { startTime = EventOccurrenceParsers.TimeParser.Parse(time); } catch (Exception ex) { issues.Add(new ParsingIssue { LineNumber = index, LineContent = normalizedLine, IssueType = ParsingIssueType.TimeParseFailure, Message = $"Failed to parse time '{time}': {ex.Message}" }); continue; } if (startDate == null || startTime == null) continue; var t = new DateTime(startDate.Value, startTime.Value); var eventOccurrence = new Core.Entities.EventOccurrence { Name = occurrenceName, StartTime = t, Time = $"{time}", Date = $"{month} {dayOfMonthStr}", Location = location }; if (!occurrences.ContainsKey(eventDefinition)) occurrences.Add(eventDefinition, []); occurrences[eventDefinition].Add(eventOccurrence); // Reset section level when we successfully parse an occurrence (means we're in a valid section) currentSectionLevel = null; } return result; } /// /// Determines if a section should be skipped based on chapter's school level setting. /// Handles backward compatibility (null school level = skip HS). /// /// The school level of the section (null if no school level designation). /// The normalized line content for tracking skipped headers. /// The parse result to update with skipped headers. /// True if the section should be skipped, false otherwise. private bool ShouldSkipSection(SchoolLevel? sectionSchoolLevel, string normalizedLine, EventOccurrenceParserResult result) { if (!sectionSchoolLevel.HasValue) return false; // Events without school level are never skipped if (_schoolLevel.HasValue) { // School level is set - filter based on it if (_schoolLevel.Value == SchoolLevel.MiddleSchool && sectionSchoolLevel.Value == SchoolLevel.HighSchool) { result.SkippedHSSectionHeaders.Add(normalizedLine); return true; } if (_schoolLevel.Value == SchoolLevel.HighSchool && sectionSchoolLevel.Value == SchoolLevel.MiddleSchool) { result.SkippedMSSectionHeaders.Add(normalizedLine); return true; } } else { // No school level set - backward compatibility: skip HS events if (sectionSchoolLevel.Value == SchoolLevel.HighSchool) { result.SkippedHSSectionHeaders.Add(normalizedLine); return true; } } return false; } /// /// Converts string school level ("MS", "HS", or empty) to SchoolLevel?. /// /// The school level string from the section header. /// SchoolLevel? representing the school level, or null if no school level designation. private static SchoolLevel? SetCurrentSectionLevel(string schoolLevelStr) { if (string.IsNullOrWhiteSpace(schoolLevelStr)) return null; if (schoolLevelStr.Equals("MS", StringComparison.OrdinalIgnoreCase)) return SchoolLevel.MiddleSchool; if (schoolLevelStr.Equals("HS", StringComparison.OrdinalIgnoreCase)) return SchoolLevel.HighSchool; return null; } /// /// Checks if current occurrence should be skipped based on section level. /// /// The current section level. /// The parse result to update with skip counts. /// True if the occurrence should be skipped, false otherwise. private bool ShouldSkipOccurrence(SchoolLevel? currentSectionLevel, EventOccurrenceParserResult result) { if (!currentSectionLevel.HasValue) return false; // Events without school level are never skipped if (_schoolLevel.HasValue) { // School level is set - filter based on it if (_schoolLevel.Value == SchoolLevel.MiddleSchool && currentSectionLevel.Value == SchoolLevel.HighSchool) { result.SkippedHSEventCount++; return true; } if (_schoolLevel.Value == SchoolLevel.HighSchool && currentSectionLevel.Value == SchoolLevel.MiddleSchool) { result.SkippedMSEventCount++; return true; } } else { // If no school level is set, skip HS sections (backward compatibility) if (currentSectionLevel.Value == SchoolLevel.HighSchool) { result.SkippedHSEventCount++; return true; } } return false; } }