// Learn more about F# at http://fsharp.net // See the 'F# Tutorial' project for more help. open data open MapReduce open System.Text.RegularExpressions [] let main argv = let stopwords = getStopwords let booksMap = let bookData = (getData @"..\..\data") let bookToTuple book = book.authors |> List.map (fun a -> a, book.title) let booksToMap books = books |> Seq.collect bookToTuple bookData |> booksToMap let wordsRegex = new Regex("(?\w{2,})", RegexOptions.Compiled) let mapfunc (author:string, title) = let words = wordsRegex.Matches(title) |> Seq.cast |> Seq.map (fun m -> m.Groups.["word"].Value.ToLower()) |> Seq.filter (fun w -> not (Seq.exists ((=) w) stopwords)) [ author.ToLower() ,words ] |> Seq.ofList let reducefunc (author, words: seq>) = //let bw = words |> Seq.filter (fun wl -> wl |> Seq.length > 1) |> Array.ofSeq //printfn "%A" bw let countedWords = words |> Seq.collect (fun s -> s) |> Seq.groupBy(fun w -> w) |> Seq.map (fun (w,l) -> w, Seq.length l) |> Seq.sortBy (fun (_,c) -> -c - 1) author, countedWords let r = map_reduce mapfunc reducefunc booksMap //printfn "Map Length %A\n" (r |> Map.toSeq |> Seq.length) //printfn "%A" (r |> Map.toArray) //let result = (r |> Seq.filter (fun k v -> v |> Seq.exists (fun (w,c) -> c > 3))) let result = r |> Seq.filter (fun (k:string,_) -> k.Contains(argv.[0].ToLower())) printfn "%A\n" result 0 // return an integer exit code