Files

32 lines
807 B
FSharp

module data
open System.IO
open System.Text.RegularExpressions
let getStopwords =
let text = File.ReadAllText(@"stopwords.py")
Regex.Matches(text, @"'(?<stopword>\w*)'")
|> Seq.cast<Match>
|> Seq.map (fun m -> m.Groups.["stopword"].Value)
type Book = {
id : string;
authors : string List;
title : string;
}
let getData directory =
let parseFile filename =
File.ReadAllLines(filename)
|> Seq.map (fun l ->
Regex.Split(l,":::")
|> fun arr ->
{
id = arr.[0];
authors = Regex.Split(arr.[1],"::") |> List.ofArray;
title = arr.[2]
}
)
let files = Directory.GetFiles(directory)
files |> Seq.collect parseFile