Files
chapter-organizer/Core/Services/EventOccurrenceParserService.cs
T
poprhythm cf2c0d8068 Add footnotes support to event occurrence parsing
This commit introduces a new property for capturing footnotes in both the EventOccurrenceParseResult and EventOccurrenceParserResult classes. The EventOccurrenceParser has been updated to handle footnotes, which are identified by lines starting with "*" or as parenthetical notes. The logic for processing these footnotes has been integrated into the parsing flow, ensuring that they are correctly associated with their respective event definitions. Additionally, the EventOccurrenceParserService has been modified to copy footnotes from the parser result, enhancing the overall event parsing functionality.
2026-01-09 10:30:53 -05:00

229 lines
9.7 KiB
C#

using System.Text;
using System.Text.RegularExpressions;
using Core.Entities;
using Core.Models;
using Core.Parsers;
using Microsoft.Extensions.Configuration;
using SchoolLevel = Core.Models.SchoolLevel;
namespace Core.Services;
/// <summary>
/// Service implementation for parsing event occurrence text data.
/// Wraps EventOccurrenceParser to support text input and error collection.
/// </summary>
public class EventOccurrenceParserService : IEventOccurrenceParserService
{
private readonly IConfiguration? _configuration;
public EventOccurrenceParserService(IConfiguration? configuration = null)
{
_configuration = configuration;
}
/// <inheritdoc/>
public EventOccurrenceParseResult ParseFromText(string text, ICollection<EventDefinition> events)
{
var result = new EventOccurrenceParseResult();
if (string.IsNullOrWhiteSpace(text))
{
result.Errors.Add("Input text is empty or whitespace.");
return result;
}
try
{
// Create a temporary file from the text content
var tempFile = Path.GetTempFileName();
try
{
File.WriteAllText(tempFile, text, Encoding.UTF8);
var fileInfo = new FileInfo(tempFile);
// Read SchoolLevel from configuration
SchoolLevel? schoolLevel = null;
if (_configuration != null)
{
var schoolLevelStr = _configuration.GetSection("ChapterSettings:SchoolLevel").Get<string>();
if (!string.IsNullOrWhiteSpace(schoolLevelStr))
{
if (Enum.TryParse<SchoolLevel>(schoolLevelStr, ignoreCase: true, out var parsed))
{
schoolLevel = parsed;
}
}
}
// Use the existing EventOccurrenceParser with school level
var parser = new EventOccurrenceParser(fileInfo, events, schoolLevel);
var parserResult = parser.Parse();
// Copy occurrences from parser result
var parsedOccurrences = parserResult.Occurrences;
// Convert parsed occurrences to result format, handling special event types
foreach (var kvp in parsedOccurrences)
{
var eventDefinition = kvp.Key;
var occurrences = kvp.Value;
// Check if this is a special event type (not stored in database)
if (eventDefinition == EventDefinition.GeneralSchedule ||
eventDefinition == EventDefinition.MeetTheCandidates ||
eventDefinition == EventDefinition.ChapterOfficerMeeting ||
eventDefinition == EventDefinition.VotingDelegateMeeting ||
eventDefinition == EventDefinition.SocialGathering)
{
// For special events, set EventDefinitionId to null and set SpecialEventType
foreach (var occurrence in occurrences)
{
occurrence.EventDefinitionId = null;
occurrence.SpecialEventType = eventDefinition switch
{
var ed when ed == EventDefinition.GeneralSchedule => "GeneralSchedule",
var ed when ed == EventDefinition.MeetTheCandidates => "MeetTheCandidates",
var ed when ed == EventDefinition.ChapterOfficerMeeting => "ChapterOfficerMeeting",
var ed when ed == EventDefinition.VotingDelegateMeeting => "VotingDelegateMeeting",
var ed when ed == EventDefinition.SocialGathering => "SocialGathering",
_ => throw new InvalidOperationException($"Unknown special event type: {eventDefinition.Name}")
};
}
// Add to result with the special EventDefinition as key
result.Occurrences[eventDefinition] = occurrences;
}
else
{
// For regular events, set EventDefinitionId and ensure SpecialEventType is null
foreach (var occurrence in occurrences)
{
occurrence.EventDefinitionId = eventDefinition.Id;
occurrence.SpecialEventType = null;
}
result.Occurrences[eventDefinition] = occurrences;
}
}
// Copy parsing issues from parser result
result.Issues.AddRange(parserResult.Issues);
// Copy skipped section headers from parser result
result.SkippedHSSectionHeaders.AddRange(parserResult.SkippedHSSectionHeaders);
result.SkippedMSSectionHeaders.AddRange(parserResult.SkippedMSSectionHeaders);
result.SkippedMSEventCount = parserResult.SkippedMSEventCount;
result.SkippedHSEventCount = parserResult.SkippedHSEventCount;
// Copy footnotes from parser result
foreach (var kvp in parserResult.Footnotes)
{
result.Footnotes[kvp.Key] = kvp.Value;
}
// Add informational messages about skipped events
if (parserResult.SkippedMSEventCount > 0)
{
result.Warnings.Add($"Skipped {parserResult.SkippedMSEventCount} Middle School (MS) event occurrence(s) based on school level setting");
}
if (parserResult.SkippedHSEventCount > 0)
{
result.Warnings.Add($"Skipped {parserResult.SkippedHSEventCount} High School (HS) event occurrence(s) based on school level setting");
}
// Validate locations and add warnings for problematic ones
ValidateLocations(result);
}
finally
{
// Clean up temporary file
try
{
if (File.Exists(tempFile))
{
File.Delete(tempFile);
}
}
catch
{
// Ignore cleanup errors
}
}
}
catch (Exception ex)
{
result.Errors.Add($"Error parsing text: {ex.Message}");
if (ex.InnerException != null)
{
result.Errors.Add($"Inner exception: {ex.InnerException.Message}");
}
}
return result;
}
/// <summary>
/// Validates locations from parsed occurrences and adds warnings for problematic locations.
/// </summary>
private static void ValidateLocations(EventOccurrenceParseResult result)
{
// Collect all unique locations
var locations = result.Occurrences.Values
.SelectMany(list => list)
.Select(eo => eo.Location)
.Where(loc => !string.IsNullOrWhiteSpace(loc))
.Distinct()
.ToList();
if (!locations.Any())
return;
// Check for long locations (>50 chars)
var longLocations = locations.Where(loc => loc != null && loc.Length > 50).ToList();
foreach (var loc in longLocations)
{
if (loc != null)
{
result.Warnings.Add($"Location '{loc}' is unusually long ({loc.Length} characters) and may contain multiple lines or extra text");
}
}
// Check for date/time patterns
// Pattern matches: month names with day numbers, time patterns (HH:MM AM/PM), and NOON
var dateTimePattern = new Regex(
@"\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}\b|\b\d{1,2}:\d{2}\s*(a|p)\.?m\.?\b|\bNOON\b",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
var locationsWithDateTime = locations.Where(loc => loc != null && dateTimePattern.IsMatch(loc)).ToList();
foreach (var loc in locationsWithDateTime)
{
if (loc != null)
{
var match = dateTimePattern.Match(loc);
result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'");
}
}
// Check for section header patterns (missing line break detection)
// Pattern matches: text ending with " - MS", " - HS"
// This indicates a missing line break where the next section header was concatenated to the location
// Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens
var sectionHeaderPattern = new Regex(
@"-\s*(MS|HS)\s*$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList();
foreach (var loc in locationsWithSectionHeader)
{
if (loc != null)
{
var match = sectionHeaderPattern.Match(loc);
// Extract the section header part for better warning message
var sectionHeaderPart = match.Value.Trim();
result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted.");
}
}
}
}