Files
bigdata-coursera/hw3/AuthorTerms/MapReduce.fs
T

39 lines
1.8 KiB
FSharp

module MapReduce
//let map_reduce
// // Map function take pair and create sequence of key/value pairs
// (m:'k1 -> 'v1 -> seq<'k2 * 'v2>)
// // Reduce function takes key and sequence to produce optional value
// (r:'k2 -> seq<'v2> -> 'v3)
// // Takes an input of key/value pairs to produce an output key/value pairs
// : Map<'k1, 'v1> -> Map<'k2, 'v3> =
//
// let map_per_key : Map<'k1, 'v1> -> seq<('k2 * 'v2)> =
// Map.toSeq >> // 1. Map into a sequence
// Seq.map (fun (k, v) -> m k v) >> // 2. Map m over a list of pairs
// Seq.concat // 3. Concat per-key lists
//
// let group_by_key (l:seq<('k2 * 'v2)>) : Map<'k2,seq<'v2>> =
// l
// |> Seq.groupBy fst
// |> Seq.map (fun(k,vs) -> k, Seq.map snd vs)
// |> Map.ofSeq
//
// let reduce_per_key : Map<'k2, seq<'v2>> -> Map<'k2,'v3> =
// let un_some k (Some v) = v // Remove optional type
// let is_some k = function
// | Some _ -> true // Keep entires
// | None -> false // Remove entries
// Map.map r //>> // 1. Apply reduce per key
// //Map.filter is_some >> // 2. Remove None entries
// //Map.map un_some // 3. Transform to remove option
//
// map_per_key >> // 1. Apply map function to each key/value pair
// group_by_key >> // 2. Group intermediate data per key
// reduce_per_key // 3. Apply reduce to each group
let map_reduce map reduce (inputs:seq<_*_>) =
let intermediates = inputs |> Seq.map map |> Seq.concat
let groupings = intermediates |> Seq.groupBy fst |> Seq.map (fun(x,y) -> x, Seq.map snd y)
let results = groupings |> Seq.map reduce
results