module data open System.IO open System.Text.RegularExpressions let getStopwords = let text = File.ReadAllText(@"stopwords.py") Regex.Matches(text, @"'(?\w*)'") |> Seq.cast |> Seq.map (fun m -> m.Groups.["stopword"].Value) type Book = { id : string; authors : string List; title : string; } let getData directory = let parseFile filename = File.ReadAllLines(filename) |> Seq.map (fun l -> Regex.Split(l,":::") |> fun arr -> { id = arr.[0]; authors = Regex.Split(arr.[1],"::") |> List.ofArray; title = arr.[2] } ) let files = Directory.GetFiles(directory) files |> Seq.collect parseFile