Initial commit — Coursera Big Data coursework
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
module MapReduce
|
||||
|
||||
//let map_reduce
|
||||
// // Map function take pair and create sequence of key/value pairs
|
||||
// (m:'k1 -> 'v1 -> seq<'k2 * 'v2>)
|
||||
// // Reduce function takes key and sequence to produce optional value
|
||||
// (r:'k2 -> seq<'v2> -> 'v3)
|
||||
// // Takes an input of key/value pairs to produce an output key/value pairs
|
||||
// : Map<'k1, 'v1> -> Map<'k2, 'v3> =
|
||||
//
|
||||
// let map_per_key : Map<'k1, 'v1> -> seq<('k2 * 'v2)> =
|
||||
// Map.toSeq >> // 1. Map into a sequence
|
||||
// Seq.map (fun (k, v) -> m k v) >> // 2. Map m over a list of pairs
|
||||
// Seq.concat // 3. Concat per-key lists
|
||||
//
|
||||
// let group_by_key (l:seq<('k2 * 'v2)>) : Map<'k2,seq<'v2>> =
|
||||
// l
|
||||
// |> Seq.groupBy fst
|
||||
// |> Seq.map (fun(k,vs) -> k, Seq.map snd vs)
|
||||
// |> Map.ofSeq
|
||||
//
|
||||
// let reduce_per_key : Map<'k2, seq<'v2>> -> Map<'k2,'v3> =
|
||||
// let un_some k (Some v) = v // Remove optional type
|
||||
// let is_some k = function
|
||||
// | Some _ -> true // Keep entires
|
||||
// | None -> false // Remove entries
|
||||
// Map.map r //>> // 1. Apply reduce per key
|
||||
// //Map.filter is_some >> // 2. Remove None entries
|
||||
// //Map.map un_some // 3. Transform to remove option
|
||||
//
|
||||
// map_per_key >> // 1. Apply map function to each key/value pair
|
||||
// group_by_key >> // 2. Group intermediate data per key
|
||||
// reduce_per_key // 3. Apply reduce to each group
|
||||
|
||||
let map_reduce map reduce (inputs:seq<_*_>) =
|
||||
let intermediates = inputs |> Seq.map map |> Seq.concat
|
||||
let groupings = intermediates |> Seq.groupBy fst |> Seq.map (fun(x,y) -> x, Seq.map snd y)
|
||||
let results = groupings |> Seq.map reduce
|
||||
results
|
||||
Reference in New Issue
Block a user