## Friday, July 18, 2014

### Merge sort

Here's another divide and conquer algorithm. This one is for sorting a sequence.

Conceptually, a merge sort works like this (see http://en.wikipedia.org/wiki/Merge_sort):

• Divide the unsorted list into n sub-lists, each containing one element (a list of one element is trivially sorted);
• Repeatedly merge sublists to produce new sorted sub-lists until there is only one sub-list remaining : this will be the sorted list.

In the following OCaml, we are drawing on inspiration from the the Haskell standard prelude for the part of the algorithm concerned with dividing the unsorted list : functions take, drop and split.

(**{b Merge sort}, an {i O(n log n)} comparison based sorting
algorithm *)
module type MERGESORT = sig

(**[take n] applied to a list [xs], returns the prefix of [xs] of
length [n] or [xs] itself if [n > List.length xs] e.g. [take 2
[1; 2; 3]] {i = } [[1; 2]]*)
val take : int -> 'a list -> 'a list

(**[drop n] applied to a list [xs], returns the suffix of [xs]
after the first [n] elements or, [[]] if [n > List.length xs]
e.g. [drop 2 [1; 2; 3]] {i = } [[3]]*)
val drop : int -> 'a list -> 'a list

(**[split n xs] is equivalent to [take n xs, drop n xs] e.g.
[split 2 [1; 2; 3]] {i = } [([1; 2], [3])]*)
val split : int -> 'a list -> 'a list * 'a list

(**[merge] given two {b sorted} sequences [xs] and [ys] returns a
single sorted sequence of the elements of [xs] and [ys]
e.g. [merge [1; 3] [2; 4]] {i = } [[1; 2; 3; 4]]*)
val merge : 'a list -> 'a list -> 'a list

(**[merge_sort] works by splitting a sequence into two parts,
recursively sorting the two parts and merging the results into
a single sorted sequence e.g. [merge_sort [1; 2; -1; 0; 3]]
{i = } [[-1; 0; 1; 2; 3]]*)
val merge_sort : 'a list -> 'a list
end

module Merge_sort : MERGESORT = struct

let rec take k l =
match (k, l) with
| n, _ when n <= 0 -> []
| _, [] -> []
| n, (x :: xs) -> x :: take (n - 1) xs

let rec drop k l =
match (k, l) with
| n, xs when n <= 0 -> xs
| _, [] -> []
| n, (_ :: xs) -> drop (n - 1) xs

let rec split k l = take k l, drop k l

let rec merge l m =
match (l, m) with
| [], ys -> ys
| xs, [] -> xs
| ((x :: xs) as t), ((y :: ys) as s) ->
if x <= y then x :: (merge xs s) else y :: (merge t ys)

let rec merge_sort l =
let i = (List.length l) / 2 in
if i = 0 then l
else
let u, v = split i l in
let xs, ys = merge_sort u, merge_sort v in
merge xs ys

end


We can test our little program in the top-level like this:

# let l = Merge_sort.merge_sort [-1; 2; 0; 4; 3; 5];;
val l : int list = [-1; 0; 2; 3; 4; 5]


Here are the functions in C++ phrased as range algorithms.

//"c:/users/fletch/desktop/setenv.cmd"
//cl /EHsc /Femerge.exe /I c:/project/boost_1_55_0 merge.cpp

#include <boost/next_prior.hpp>
#include <boost/range.hpp>
#include <boost/range/algorithm/copy.hpp>

#include <vector>
#include <cstdlib>

namespace algo
{
//take

template <class S, class D>
D take (std::size_t n, S src, D dst)
{
typedef boost::range_iterator<S>::type it_t;
it_t curr = boost::begin (src), last = boost::end (src);
if (n <= 0) return dst;
if (boost::empty (src))  return dst;
take (n-1, S (boost::next (curr), last), *dst++ = *curr);
return dst;
}

//drop

template <class S, class D>
D drop (std::size_t n, S src, D dst)
{
typedef boost::range_iterator<S>::type it_t;
it_t curr = boost::begin (src), last = boost::end (src);
if (n <= 0) return boost::range::copy (src, dst);
if (boost::empty (src))return dst;
drop (n-1, S (boost::next (curr), last), dst);
return dst;
}

//split

template <class S, class D1, class D2>
void split (int n, S src, D1 dst1, D2 dst2)
{ take (n, src, dst1); drop (n, src, dst2); }

//merge

template <class S1, class S2, class D>
D merge (S1 src1, S2 src2, D dst)
{
typedef boost::range_iterator<S1>::type it1_t;
typedef boost::range_iterator<S2>::type it2_t;
typedef std::iterator_traits<it1_t>::value_type value1_t;
typedef std::iterator_traits<it2_t>::value_type value2_t;
if (boost::empty (src1)) return boost::copy (src2, dst);
if (boost::empty (src2)) return boost::copy (src1, dst);
it1_t curr1 = boost::begin (src1), last1 = boost::end (src1);
it2_t curr2 = boost::begin (src2), last2 = boost::end (src2);
value1_t x = *curr1;
value2_t y = *curr2;
if (x <= y)
merge (S1 (boost::next (curr1), last1), src2, *dst++ = x);
else
merge (src1, S2 (boost::next (curr2), last2), *dst++ = y);
return dst;
}

//merge_sort

template <class S, class D>
D merge_sort (S src, D dst)
{
typedef boost::range_iterator<S>::type it_t;
typedef std::iterator_traits<it_t>::value_type value_t;
std::size_t i = boost::size (src)/2;
if (i == 0) return boost::range::copy (src, dst);
std::vector<value_t> u, v;
split (i, src, std::back_inserter (u), std::back_inserter (v));
std::vector<value_t> xs, ys;
merge_sort (u, std::back_inserter (xs));
merge_sort (v, std::back_inserter (ys));
merge (xs, ys, dst);
return dst;
}

}//namespace algo


The following usage example provides a lightweight test.

#include <boost/assign/list_of.hpp>

#include <utility>
#include <iterator>
#include <iostream>

int main ()
{
using std::pair;
using std::make_pair;
using std::ostream_iterator;
using boost::assign::list_of;

int ord[] = {1, 2, 3, 4};

auto src=make_pair(ord, ord + 4);
auto dst=ostream_iterator<int>(std::cout, ", ");

std::cout << "\ntake ():\n";

algo::take (0u, src, dst); std::cout << std::endl;
algo::take (1u, src, dst); std::cout << std::endl;
algo::take (2u, src, dst); std::cout << std::endl;
algo::take (3u, src, dst); std::cout << std::endl;
algo::take (4u, src, dst); std::cout << std::endl;
algo::take (5u, src, dst); std::cout << std::endl;

std::cout << "\ndrop ():\n";

algo::drop (5u, src, dst); std::cout << std::endl;
algo::drop (4u, src, dst); std::cout << std::endl;
algo::drop (3u, src, dst); std::cout << std::endl;
algo::drop (2u, src, dst); std::cout << std::endl;
algo::drop (1u, src, dst); std::cout << std::endl;
algo::drop (0u, src, dst); std::cout << std::endl;

std::cout << "\nsplit ():\n\n";

algo::split (0u, src, dst, dst); std::cout << std::endl;
algo::split (1u, src, dst, dst); std::cout << std::endl;
algo::split (2u, src, dst, dst); std::cout << std::endl;
algo::split (3u, src, dst, dst); std::cout << std::endl;
algo::split (4u, src, dst, dst); std::cout << std::endl;
algo::split (5u, src, dst, dst); std::cout << std::endl;

std::cout << "\nmerge ():\n\n";

std::vector <int> l=list_of(-1)(2), r=list_of(0)(1);
algo::merge (l, r, dst); std::cout << std::endl;

std::cout << "\nmerge_sort ():\n\n";

int unord[] = {-1, 2, 0, 4, 3, 5};
algo::merge_sort (make_pair (unord, unord + 6), dst);

return 0;
}

The above program produces the following output.
take ():

1,
1, 2,
1, 2, 3,
1, 2, 3, 4,
1, 2, 3, 4,

drop ():

4,
3, 4,
2, 3, 4,
1, 2, 3, 4,

split ():

1, 2, 3, 4,
1, 2, 3, 4,
1, 2, 3, 4,
1, 2, 3, 4,
1, 2, 3, 4,
1, 2, 3, 4,

merge ():

-1, 0, 1, 2,

merge_sort ():

-1, 0, 2, 3, 4, 5,