Files

48 lines
1.7 KiB
FSharp

// Learn more about F# at http://fsharp.net
// See the 'F# Tutorial' project for more help.
open data
open MapReduce
open System.Text.RegularExpressions
[<EntryPoint>]
let main argv =
let stopwords = getStopwords
let booksMap =
let bookData = (getData @"..\..\data")
let bookToTuple book =
book.authors |> List.map (fun a -> a, book.title)
let booksToMap books =
books |> Seq.collect bookToTuple
bookData |> booksToMap
let wordsRegex = new Regex("(?<word>\w{2,})", RegexOptions.Compiled)
let mapfunc (author:string, title) =
let words =
wordsRegex.Matches(title)
|> Seq.cast<Match>
|> Seq.map (fun m -> m.Groups.["word"].Value.ToLower())
|> Seq.filter (fun w -> not (Seq.exists ((=) w) stopwords))
[ author.ToLower() ,words ] |> Seq.ofList
let reducefunc (author, words: seq<seq<string>>) =
//let bw = words |> Seq.filter (fun wl -> wl |> Seq.length > 1) |> Array.ofSeq
//printfn "%A" bw
let countedWords =
words
|> Seq.collect (fun s -> s)
|> Seq.groupBy(fun w -> w)
|> Seq.map (fun (w,l) -> w, Seq.length l)
|> Seq.sortBy (fun (_,c) -> -c - 1)
author, countedWords
let r = map_reduce mapfunc reducefunc booksMap
//printfn "Map Length %A\n" (r |> Map.toSeq |> Seq.length)
//printfn "%A" (r |> Map.toArray)
//let result = (r |> Seq.filter (fun k v -> v |> Seq.exists (fun (w,c) -> c > 3)))
let result = r |> Seq.filter (fun (k:string,_) -> k.Contains(argv.[0].ToLower()))
printfn "%A\n" result
0 // return an integer exit code