cf2c0d8068
This commit introduces a new property for capturing footnotes in both the EventOccurrenceParseResult and EventOccurrenceParserResult classes. The EventOccurrenceParser has been updated to handle footnotes, which are identified by lines starting with "*" or as parenthetical notes. The logic for processing these footnotes has been integrated into the parsing flow, ensuring that they are correctly associated with their respective event definitions. Additionally, the EventOccurrenceParserService has been modified to copy footnotes from the parser result, enhancing the overall event parsing functionality.
229 lines
9.7 KiB
C#
229 lines
9.7 KiB
C#
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using Core.Entities;
|
|
using Core.Models;
|
|
using Core.Parsers;
|
|
using Microsoft.Extensions.Configuration;
|
|
using SchoolLevel = Core.Models.SchoolLevel;
|
|
|
|
namespace Core.Services;
|
|
|
|
/// <summary>
|
|
/// Service implementation for parsing event occurrence text data.
|
|
/// Wraps EventOccurrenceParser to support text input and error collection.
|
|
/// </summary>
|
|
public class EventOccurrenceParserService : IEventOccurrenceParserService
|
|
{
|
|
private readonly IConfiguration? _configuration;
|
|
|
|
public EventOccurrenceParserService(IConfiguration? configuration = null)
|
|
{
|
|
_configuration = configuration;
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public EventOccurrenceParseResult ParseFromText(string text, ICollection<EventDefinition> events)
|
|
{
|
|
var result = new EventOccurrenceParseResult();
|
|
|
|
if (string.IsNullOrWhiteSpace(text))
|
|
{
|
|
result.Errors.Add("Input text is empty or whitespace.");
|
|
return result;
|
|
}
|
|
|
|
try
|
|
{
|
|
// Create a temporary file from the text content
|
|
var tempFile = Path.GetTempFileName();
|
|
try
|
|
{
|
|
File.WriteAllText(tempFile, text, Encoding.UTF8);
|
|
var fileInfo = new FileInfo(tempFile);
|
|
|
|
// Read SchoolLevel from configuration
|
|
SchoolLevel? schoolLevel = null;
|
|
if (_configuration != null)
|
|
{
|
|
var schoolLevelStr = _configuration.GetSection("ChapterSettings:SchoolLevel").Get<string>();
|
|
if (!string.IsNullOrWhiteSpace(schoolLevelStr))
|
|
{
|
|
if (Enum.TryParse<SchoolLevel>(schoolLevelStr, ignoreCase: true, out var parsed))
|
|
{
|
|
schoolLevel = parsed;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Use the existing EventOccurrenceParser with school level
|
|
var parser = new EventOccurrenceParser(fileInfo, events, schoolLevel);
|
|
var parserResult = parser.Parse();
|
|
|
|
// Copy occurrences from parser result
|
|
var parsedOccurrences = parserResult.Occurrences;
|
|
|
|
// Convert parsed occurrences to result format, handling special event types
|
|
foreach (var kvp in parsedOccurrences)
|
|
{
|
|
var eventDefinition = kvp.Key;
|
|
var occurrences = kvp.Value;
|
|
|
|
// Check if this is a special event type (not stored in database)
|
|
if (eventDefinition == EventDefinition.GeneralSchedule ||
|
|
eventDefinition == EventDefinition.MeetTheCandidates ||
|
|
eventDefinition == EventDefinition.ChapterOfficerMeeting ||
|
|
eventDefinition == EventDefinition.VotingDelegateMeeting ||
|
|
eventDefinition == EventDefinition.SocialGathering)
|
|
{
|
|
// For special events, set EventDefinitionId to null and set SpecialEventType
|
|
foreach (var occurrence in occurrences)
|
|
{
|
|
occurrence.EventDefinitionId = null;
|
|
occurrence.SpecialEventType = eventDefinition switch
|
|
{
|
|
var ed when ed == EventDefinition.GeneralSchedule => "GeneralSchedule",
|
|
var ed when ed == EventDefinition.MeetTheCandidates => "MeetTheCandidates",
|
|
var ed when ed == EventDefinition.ChapterOfficerMeeting => "ChapterOfficerMeeting",
|
|
var ed when ed == EventDefinition.VotingDelegateMeeting => "VotingDelegateMeeting",
|
|
var ed when ed == EventDefinition.SocialGathering => "SocialGathering",
|
|
_ => throw new InvalidOperationException($"Unknown special event type: {eventDefinition.Name}")
|
|
};
|
|
}
|
|
|
|
// Add to result with the special EventDefinition as key
|
|
result.Occurrences[eventDefinition] = occurrences;
|
|
}
|
|
else
|
|
{
|
|
// For regular events, set EventDefinitionId and ensure SpecialEventType is null
|
|
foreach (var occurrence in occurrences)
|
|
{
|
|
occurrence.EventDefinitionId = eventDefinition.Id;
|
|
occurrence.SpecialEventType = null;
|
|
}
|
|
|
|
result.Occurrences[eventDefinition] = occurrences;
|
|
}
|
|
}
|
|
|
|
// Copy parsing issues from parser result
|
|
result.Issues.AddRange(parserResult.Issues);
|
|
|
|
// Copy skipped section headers from parser result
|
|
result.SkippedHSSectionHeaders.AddRange(parserResult.SkippedHSSectionHeaders);
|
|
result.SkippedMSSectionHeaders.AddRange(parserResult.SkippedMSSectionHeaders);
|
|
result.SkippedMSEventCount = parserResult.SkippedMSEventCount;
|
|
result.SkippedHSEventCount = parserResult.SkippedHSEventCount;
|
|
|
|
// Copy footnotes from parser result
|
|
foreach (var kvp in parserResult.Footnotes)
|
|
{
|
|
result.Footnotes[kvp.Key] = kvp.Value;
|
|
}
|
|
|
|
// Add informational messages about skipped events
|
|
if (parserResult.SkippedMSEventCount > 0)
|
|
{
|
|
result.Warnings.Add($"Skipped {parserResult.SkippedMSEventCount} Middle School (MS) event occurrence(s) based on school level setting");
|
|
}
|
|
if (parserResult.SkippedHSEventCount > 0)
|
|
{
|
|
result.Warnings.Add($"Skipped {parserResult.SkippedHSEventCount} High School (HS) event occurrence(s) based on school level setting");
|
|
}
|
|
|
|
// Validate locations and add warnings for problematic ones
|
|
ValidateLocations(result);
|
|
}
|
|
finally
|
|
{
|
|
// Clean up temporary file
|
|
try
|
|
{
|
|
if (File.Exists(tempFile))
|
|
{
|
|
File.Delete(tempFile);
|
|
}
|
|
}
|
|
catch
|
|
{
|
|
// Ignore cleanup errors
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
result.Errors.Add($"Error parsing text: {ex.Message}");
|
|
if (ex.InnerException != null)
|
|
{
|
|
result.Errors.Add($"Inner exception: {ex.InnerException.Message}");
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Validates locations from parsed occurrences and adds warnings for problematic locations.
|
|
/// </summary>
|
|
private static void ValidateLocations(EventOccurrenceParseResult result)
|
|
{
|
|
// Collect all unique locations
|
|
var locations = result.Occurrences.Values
|
|
.SelectMany(list => list)
|
|
.Select(eo => eo.Location)
|
|
.Where(loc => !string.IsNullOrWhiteSpace(loc))
|
|
.Distinct()
|
|
.ToList();
|
|
|
|
if (!locations.Any())
|
|
return;
|
|
|
|
// Check for long locations (>50 chars)
|
|
var longLocations = locations.Where(loc => loc != null && loc.Length > 50).ToList();
|
|
foreach (var loc in longLocations)
|
|
{
|
|
if (loc != null)
|
|
{
|
|
result.Warnings.Add($"Location '{loc}' is unusually long ({loc.Length} characters) and may contain multiple lines or extra text");
|
|
}
|
|
}
|
|
|
|
// Check for date/time patterns
|
|
// Pattern matches: month names with day numbers, time patterns (HH:MM AM/PM), and NOON
|
|
var dateTimePattern = new Regex(
|
|
@"\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}\b|\b\d{1,2}:\d{2}\s*(a|p)\.?m\.?\b|\bNOON\b",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
var locationsWithDateTime = locations.Where(loc => loc != null && dateTimePattern.IsMatch(loc)).ToList();
|
|
foreach (var loc in locationsWithDateTime)
|
|
{
|
|
if (loc != null)
|
|
{
|
|
var match = dateTimePattern.Match(loc);
|
|
result.Warnings.Add($"Location '{loc}' may contain date/time information: '{match.Value}'");
|
|
}
|
|
}
|
|
|
|
// Check for section header patterns (missing line break detection)
|
|
// Pattern matches: text ending with " - MS", " - HS"
|
|
// This indicates a missing line break where the next section header was concatenated to the location
|
|
// Note: Input is already sanitized (en-dash/em-dash -> regular hyphen), so we only need to match regular hyphens
|
|
var sectionHeaderPattern = new Regex(
|
|
@"-\s*(MS|HS)\s*$",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
var locationsWithSectionHeader = locations.Where(loc => loc != null && sectionHeaderPattern.IsMatch(loc)).ToList();
|
|
foreach (var loc in locationsWithSectionHeader)
|
|
{
|
|
if (loc != null)
|
|
{
|
|
var match = sectionHeaderPattern.Match(loc);
|
|
// Extract the section header part for better warning message
|
|
var sectionHeaderPart = match.Value.Trim();
|
|
result.Warnings.Add($"Location '{loc}' appears to contain a section header (ends with '{sectionHeaderPart}') - likely missing line break. The location may be corrupted.");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|