Merge sort
Here's another divide and conquer algorithm. This one is for sorting a sequence.
Conceptually, a merge sort works like this (see http://en.wikipedia.org/wiki/Merge_sort):
- Divide the unsorted list into n sub-lists, each containing one element (a list of one element is trivially sorted);
- Repeatedly merge sublists to produce new sorted sub-lists until there is only one sub-list remaining : this will be the sorted list.
In the following OCaml, we are drawing on inspiration from the the Haskell standard prelude for the part of the algorithm concerned with dividing the unsorted list : functions take
, drop
and split
.
(**{b Merge sort}, an {i O(n log n)} comparison based sorting algorithm *) module type MERGESORT = sig (**[take n] applied to a list [xs], returns the prefix of [xs] of length [n] or [xs] itself if [n > List.length xs] e.g. [take 2 [1; 2; 3]] {i = } [[1; 2]]*) val take : int -> 'a list -> 'a list (**[drop n] applied to a list [xs], returns the suffix of [xs] after the first [n] elements or, [[]] if [n > List.length xs] e.g. [drop 2 [1; 2; 3]] {i = } [[3]]*) val drop : int -> 'a list -> 'a list (**[split n xs] is equivalent to [take n xs, drop n xs] e.g. [split 2 [1; 2; 3]] {i = } [([1; 2], [3])]*) val split : int -> 'a list -> 'a list * 'a list (**[merge] given two {b sorted} sequences [xs] and [ys] returns a single sorted sequence of the elements of [xs] and [ys] e.g. [merge [1; 3] [2; 4]] {i = } [[1; 2; 3; 4]]*) val merge : 'a list -> 'a list -> 'a list (**[merge_sort] works by splitting a sequence into two parts, recursively sorting the two parts and merging the results into a single sorted sequence e.g. [merge_sort [1; 2; -1; 0; 3]] {i = } [[-1; 0; 1; 2; 3]]*) val merge_sort : 'a list -> 'a list end module Merge_sort : MERGESORT = struct let rec take k l = match (k, l) with | n, _ when n <= 0 -> [] | _, [] -> [] | n, (x :: xs) -> x :: take (n - 1) xs let rec drop k l = match (k, l) with | n, xs when n <= 0 -> xs | _, [] -> [] | n, (_ :: xs) -> drop (n - 1) xs let rec split k l = take k l, drop k l let rec merge l m = match (l, m) with | [], ys -> ys | xs, [] -> xs | ((x :: xs) as t), ((y :: ys) as s) -> if x <= y then x :: (merge xs s) else y :: (merge t ys) let rec merge_sort l = let i = (List.length l) / 2 in if i = 0 then l else let u, v = split i l in let xs, ys = merge_sort u, merge_sort v in merge xs ys end
We can test our little program in the top-level like this:
# let l = Merge_sort.merge_sort [-1; 2; 0; 4; 3; 5];; val l : int list = [-1; 0; 2; 3; 4; 5]
Here are the functions in C++ phrased as range algorithms.
//"c:/users/fletch/desktop/setenv.cmd" //cl /EHsc /Femerge.exe /I c:/project/boost_1_55_0 merge.cpp #include <boost/next_prior.hpp> #include <boost/range.hpp> #include <boost/range/algorithm/copy.hpp> #include <vector> #include <cstdlib> namespace algo { //take template <class S, class D> D take (std::size_t n, S src, D dst) { typedef boost::range_iterator<S>::type it_t; it_t curr = boost::begin (src), last = boost::end (src); if (n <= 0) return dst; if (boost::empty (src)) return dst; take (n-1, S (boost::next (curr), last), *dst++ = *curr); return dst; } //drop template <class S, class D> D drop (std::size_t n, S src, D dst) { typedef boost::range_iterator<S>::type it_t; it_t curr = boost::begin (src), last = boost::end (src); if (n <= 0) return boost::range::copy (src, dst); if (boost::empty (src))return dst; drop (n-1, S (boost::next (curr), last), dst); return dst; } //split template <class S, class D1, class D2> void split (int n, S src, D1 dst1, D2 dst2) { take (n, src, dst1); drop (n, src, dst2); } //merge template <class S1, class S2, class D> D merge (S1 src1, S2 src2, D dst) { typedef boost::range_iterator<S1>::type it1_t; typedef boost::range_iterator<S2>::type it2_t; typedef std::iterator_traits<it1_t>::value_type value1_t; typedef std::iterator_traits<it2_t>::value_type value2_t; if (boost::empty (src1)) return boost::copy (src2, dst); if (boost::empty (src2)) return boost::copy (src1, dst); it1_t curr1 = boost::begin (src1), last1 = boost::end (src1); it2_t curr2 = boost::begin (src2), last2 = boost::end (src2); value1_t x = *curr1; value2_t y = *curr2; if (x <= y) merge (S1 (boost::next (curr1), last1), src2, *dst++ = x); else merge (src1, S2 (boost::next (curr2), last2), *dst++ = y); return dst; } //merge_sort template <class S, class D> D merge_sort (S src, D dst) { typedef boost::range_iterator<S>::type it_t; typedef std::iterator_traits<it_t>::value_type value_t; std::size_t i = boost::size (src)/2; if (i == 0) return boost::range::copy (src, dst); std::vector<value_t> u, v; split (i, src, std::back_inserter (u), std::back_inserter (v)); std::vector<value_t> xs, ys; merge_sort (u, std::back_inserter (xs)); merge_sort (v, std::back_inserter (ys)); merge (xs, ys, dst); return dst; } }//namespace algo
The following usage example provides a lightweight test.
#include <boost/assign/list_of.hpp> #include <utility> #include <iterator> #include <iostream> int main () { using std::pair; using std::make_pair; using std::ostream_iterator; using boost::assign::list_of; int ord[] = {1, 2, 3, 4}; auto src=make_pair(ord, ord + 4); auto dst=ostream_iterator<int>(std::cout, ", "); std::cout << "\ntake ():\n"; algo::take (0u, src, dst); std::cout << std::endl; algo::take (1u, src, dst); std::cout << std::endl; algo::take (2u, src, dst); std::cout << std::endl; algo::take (3u, src, dst); std::cout << std::endl; algo::take (4u, src, dst); std::cout << std::endl; algo::take (5u, src, dst); std::cout << std::endl; std::cout << "\ndrop ():\n"; algo::drop (5u, src, dst); std::cout << std::endl; algo::drop (4u, src, dst); std::cout << std::endl; algo::drop (3u, src, dst); std::cout << std::endl; algo::drop (2u, src, dst); std::cout << std::endl; algo::drop (1u, src, dst); std::cout << std::endl; algo::drop (0u, src, dst); std::cout << std::endl; std::cout << "\nsplit ():\n\n"; algo::split (0u, src, dst, dst); std::cout << std::endl; algo::split (1u, src, dst, dst); std::cout << std::endl; algo::split (2u, src, dst, dst); std::cout << std::endl; algo::split (3u, src, dst, dst); std::cout << std::endl; algo::split (4u, src, dst, dst); std::cout << std::endl; algo::split (5u, src, dst, dst); std::cout << std::endl; std::cout << "\nmerge ():\n\n"; std::vector <int> l=list_of(-1)(2), r=list_of(0)(1); algo::merge (l, r, dst); std::cout << std::endl; std::cout << "\nmerge_sort ():\n\n"; int unord[] = {-1, 2, 0, 4, 3, 5}; algo::merge_sort (make_pair (unord, unord + 6), dst); return 0; }The above program produces the following output.
take (): 1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, drop (): 4, 3, 4, 2, 3, 4, 1, 2, 3, 4, split (): 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, merge (): -1, 0, 1, 2, merge_sort (): -1, 0, 2, 3, 4, 5,