Files
chapter-organizer/Core/Services/EventOccurrenceParserService.cs
T
poprhythm ecd6173a44 Refactor event occurrence parsing to unify section header handling and event count tracking
This commit updates the EventOccurrenceParseResult and EventOccurrenceParserResult classes to consolidate the handling of skipped section headers and event counts into a single set of properties. The previous separate lists and counts for middle school and high school sections have been replaced with a unified approach, improving clarity and maintainability. Additionally, the EventOccurrenceParserService has been modified to reflect these changes, ensuring consistent behavior across the application. This refactor enhances the overall structure of the event parsing logic.
2026-01-10 18:19:16 -05:00

223 lines
9.3 KiB
C#

using System.Text;
using System.Text.RegularExpressions;
using Core.Entities;
using Core.Models;
using Core.Parsers;
using Microsoft.Extensions.Configuration;
using SchoolLevel = Core.Models.SchoolLevel;
namespace Core.Services;
/// <summary>
/// Service implementation for parsing event occurrence text data.
/// Wraps EventOccurrenceParser to support text input and error collection.
/// </summary>
public class EventOccurrenceParserService : IEventOccurrenceParserService
{
private readonly IConfiguration? _configuration;
public EventOccurrenceParserService(IConfiguration? configuration = null)
{
_configuration = configuration;
}
/// <inheritdoc/>
public EventOccurrenceParseResult ParseFromText(string text, ICollection<EventDefinition> events)
{
var result = new EventOccurrenceParseResult();
if (string.IsNullOrWhiteSpace(text))
{
result.Errors.Add("Input text is empty or whitespace.");
return result;
}
try
{
// Create a temporary file from the text content
var tempFile = Path.GetTempFileName();
try
{
File.WriteAllText(tempFile, text, Encoding.UTF8);
var fileInfo = new FileInfo(tempFile);
// Read SchoolLevel from configuration
SchoolLevel? schoolLevel = null;
if (_configuration != null)
{
var schoolLevelStr = _configuration.GetSection("ChapterSettings:SchoolLevel").Get<string>();
if (!string.IsNullOrWhiteSpace(schoolLevelStr))
{
if (Enum.TryParse<SchoolLevel>(schoolLevelStr, ignoreCase: true, out var parsed))
{
schoolLevel = parsed;
}
}
}
// Use the existing EventOccurrenceParser with school level
var parser = new EventOccurrenceParser(fileInfo, events, schoolLevel);
var parserResult = parser.Parse();
// Copy occurrences from parser result
var parsedOccurrences = parserResult.Occurrences;
// Convert parsed occurrences to result format, handling special event types
foreach (var kvp in parsedOccurrences)
{
var eventDefinition = kvp.Key;
var occurrences = kvp.Value;
// Check if this is a special event type (not stored in database)
if (eventDefinition == EventDefinition.GeneralSchedule ||
eventDefinition == EventDefinition.MeetTheCandidates ||
eventDefinition == EventDefinition.ChapterOfficerMeeting ||
eventDefinition == EventDefinition.VotingDelegateMeeting ||
eventDefinition == EventDefinition.SocialGathering)
{
// For special events, set EventDefinitionId to null and set SpecialEventType
foreach (var occurrence in occurrences)
{
occurrence.EventDefinitionId = null;
occurrence.SpecialEventType = eventDefinition switch
{
var ed when ed == EventDefinition.GeneralSchedule => "GeneralSchedule",
var ed when ed == EventDefinition.MeetTheCandidates => "MeetTheCandidates",
var ed when ed == EventDefinition.ChapterOfficerMeeting => "ChapterOfficerMeeting",
var ed when ed == EventDefinition.VotingDelegateMeeting => "VotingDelegateMeeting",
var ed when ed == EventDefinition.SocialGathering => "SocialGathering",
_ => throw new InvalidOperationException($"Unknown special event type: {eventDefinition.Name}")
};
}
// Add to result with the special EventDefinition as key
result.Occurrences[eventDefinition] = occurrences;
}
else
{
// For regular events, set EventDefinitionId and ensure SpecialEventType is null
foreach (var occurrence in occurrences)
{
occurrence.EventDefinitionId = eventDefinition.Id;
occurrence.SpecialEventType = null;
}
result.Occurrences[eventDefinition] = occurrences;
}
}
// Copy parsing issues from parser result
result.Issues.AddRange(parserResult.Issues);
// Copy skipped section headers from parser result
result.SkippedSectionHeaders.AddRange(parserResult.SkippedSectionHeaders);
result.SkippedEventCount = parserResult.SkippedEventCount;
// Copy footnotes from parser result
foreach (var kvp in parserResult.Footnotes)
{
result.Footnotes[kvp.Key] = kvp.Value;
}
// Add informational message about skipped events
if (parserResult.SkippedEventCount > 0)
{
result.Warnings.Add($"Skipped {parserResult.SkippedEventCount} event occurrence(s) from other school level based on school level setting");
}
// Validate locations and add warnings for problematic ones
ValidateLocations(result);
}
finally
{
// Clean up temporary file
try
{
if (File.Exists(tempFile))
{
File.Delete(tempFile);
}
}
catch
{
// Ignore cleanup errors
}
}
}
catch (Exception ex)
{
result.Errors.Add($"Error parsing text: {ex.Message}");
if (ex.InnerException != null)
{
result.Errors.Add($"Inner exception: {ex.InnerException.Message}");
}
}
return result;
}
/// <summary>
/// Validates locations from parsed occurrences and adds warnings for problematic locations.
/// </summary>
private static void ValidateLocations(EventOccurrenceParseResult result)
{
// Collect all unique locations
var locations = result.Occurrences.Values
.SelectMany(list => list)
.Select(eo => eo.Location)
.Where(loc => !string.IsNullOrWhiteSpace(loc))
.Distinct()
.ToList();
if (!locations.Any())
return;
// Check for long locations (>50 chars)
var longLocations = locations.Where(loc => loc != null && loc.Length > 50).ToList();
foreach (var loc in longLocations)
{
if (loc != null)
{
result.Warnings.Add($"Location '{loc}' is unusually long ({loc.Length} characters) and may contain multiple lines or extra text");
}
}
// Check for date/time patterns
// Pattern matches: month names with day numbers, time patterns (HH:MM AM/PM), and NOON
var dateTimePattern = new Regex(
@"\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}\b|\b\d{1,2}:\d{2}\s*(a|p)\.?m\.?\b|\bNOON\b",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
var locationsWithDateTime = locations.Where(loc => loc != null && dateTimePattern.IsMatch(loc)).ToList();
foreach (var loc in locationsWithDateTime)
{
if (loc != null)
{
var match = dateTimePattern.Match(loc);
result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'");
}
}
// Check for section header patterns (missing line break detection)
// Pattern matches: text ending with " - MS", " - HS"
// This indicates a missing line break where the next section header was concatenated to the location
// Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens
var sectionHeaderPattern = new Regex(
@"-\s*(MS|HS)\s*$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList();
foreach (var loc in locationsWithSectionHeader)
{
if (loc != null)
{
var match = sectionHeaderPattern.Match(loc);
// Extract the section header part for better warning message
var sectionHeaderPart = match.Value.Trim();
result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted.");
}
}
}
}