48 lines
1.7 KiB
FSharp
48 lines
1.7 KiB
FSharp
// Learn more about F# at http://fsharp.net
|
|
// See the 'F# Tutorial' project for more help.
|
|
|
|
open data
|
|
open MapReduce
|
|
open System.Text.RegularExpressions
|
|
|
|
[<EntryPoint>]
|
|
let main argv =
|
|
let stopwords = getStopwords
|
|
|
|
let booksMap =
|
|
let bookData = (getData @"..\..\data")
|
|
let bookToTuple book =
|
|
book.authors |> List.map (fun a -> a, book.title)
|
|
let booksToMap books =
|
|
books |> Seq.collect bookToTuple
|
|
bookData |> booksToMap
|
|
let wordsRegex = new Regex("(?<word>\w{2,})", RegexOptions.Compiled)
|
|
|
|
let mapfunc (author:string, title) =
|
|
let words =
|
|
wordsRegex.Matches(title)
|
|
|> Seq.cast<Match>
|
|
|> Seq.map (fun m -> m.Groups.["word"].Value.ToLower())
|
|
|> Seq.filter (fun w -> not (Seq.exists ((=) w) stopwords))
|
|
[ author.ToLower() ,words ] |> Seq.ofList
|
|
|
|
let reducefunc (author, words: seq<seq<string>>) =
|
|
//let bw = words |> Seq.filter (fun wl -> wl |> Seq.length > 1) |> Array.ofSeq
|
|
//printfn "%A" bw
|
|
let countedWords =
|
|
words
|
|
|> Seq.collect (fun s -> s)
|
|
|> Seq.groupBy(fun w -> w)
|
|
|> Seq.map (fun (w,l) -> w, Seq.length l)
|
|
|> Seq.sortBy (fun (_,c) -> -c - 1)
|
|
author, countedWords
|
|
|
|
let r = map_reduce mapfunc reducefunc booksMap
|
|
//printfn "Map Length %A\n" (r |> Map.toSeq |> Seq.length)
|
|
//printfn "%A" (r |> Map.toArray)
|
|
|
|
//let result = (r |> Seq.filter (fun k v -> v |> Seq.exists (fun (w,c) -> c > 3)))
|
|
let result = r |> Seq.filter (fun (k:string,_) -> k.Contains(argv.[0].ToLower()))
|
|
printfn "%A\n" result
|
|
0 // return an integer exit code
|