Saturday, July 23, 2016

Simple category theory constructions

This post is about some very simple category theory constructions and how one can model them in C++.

First a type for binary products.

template <class A, class B>
using product = std::pair<A, B>;

//`fst (x, y)` is the projection `x`
template <class A, class B>
inline auto fst (product<A, B> const& p) {
  return p.first;
}

//`snd (x, y)` is the projection `y`
template <class A, class B>
inline auto snd (product<A, B> const& p) {
  return p.second;
}

//`mk_product (a, b) computes the product of `a` and `b`
template <class A, class B>
inline product<A, B> mk_product (A&& a, B&& b) {
  return  std::make_pair (std::forward<A> (a), std::forward<B> (b));
}

Now $dup : X \rightarrow X \times X$ defined by $x \mapsto (x,\; x)$.

//`dup a` computes the product `(a, a)`
template <class A>
inline product<A, A> dup (A const& x) { return mk_product (x, x); }

Next up, $twist : X \times Y \rightarrow Y \times X$ defined by $(x,\; y) \mapsto (y,\; x)$.

//`twist (x, y)` is the product `(y, x)`
template <class A, class B>
inline product<B, A> twist (product<A, B> const& p) {
  return mk_product (snd (p), fst (p));
}

If $f : U \rightarrow R$ and $g : V \rightarrow S$ then we have $ravel : U \times V \rightarrow R \times S$ defined by $(x,\; y) \mapsto (f (x),\; g (y))$.

//`ravel f g (x, y)` is the product `(f x, g y)` (credit to Max
//Skaller on the name)
auto ravel = [=](auto f) {
  return [=](auto g) {
    return [=](auto const& x) { 
      return mk_product (f (fst (x)), g (snd (x))); 
    };
  };
};

If $X \times Y$ is a (binary) product with projections $\pi_{x}$ and $\pi_{y}$, $Z$ an object, morphisms $f : Z \rightarrow X$, $g : Z \rightarrow Y$ then $\left\langle f, g \right\rangle : Z \rightarrow X \times Y$ is the mediating arrow $z \mapsto (f (z),\;g (z))$.

//The product of morphisms <`f`, `g`> (see
//https://en.wikipedia.org/wiki/Product_(category_theory))
auto prod = [=](auto f, auto g) {
  return [=](auto const& z) { return mk_product (f (z), g (z)); };
};

We can use the Pretty good sum type library to define a type suitable for modeling (binary) coproducts.

template <class A>
struct Left { 
  A a; 
  template <class U> explicit Left (U&& u) : a {std::forward<U> (u)} {}
  A const& value () const { return a; }
};

template <class B>
struct Right { 
  B b; 
  template <class U> explicit Right (U&& u) : b {std::forward<U> (u)} {}
  B const& value () const { return b; }
};

template <class A, class B>
using sum = pgs::sum_type<Left<A>, Right<B>>;
template <class> struct sum_fst_type;
template <class A, class B>
  struct sum_fst_type<sum<A, B>> { using type = A; };
template <class S> struct sum_snd_type;
template <class A, class B>
  struct sum_snd_type<sum<A, B>> { using type = B; };

template <class S>
using sum_fst_t = typename sum_fst_type<S>::type;
template <class S>
using sum_snd_t = typename sum_snd_type<S>::type;

If $X + Y$ is a (binary) sum with injections $i_{x}$ and $i_{y}$, $Z$ an object, morphisms $f : X \rightarrow Z$, $g : Y \rightarrow Z$ then $\left[ f, g \right] : X \times Y \rightarrow Z $ is the mediating arrow $\begin{align*} e \mapsto \begin{cases} f (e) & \text{if $e \in X$} \\ g (e) & \text{if $e \in Y$} \end{cases} \end{align*}$.

//The coproduct of morphisms [`f, `g`] (see
//https://en.wikipedia.org/wiki/Coproduct)
template <class S>
auto co_product = [=](auto f) {
  return [=](auto g) {
    return [=](S const& s) {
      using A = sum_fst_t<S>;
      using B = sum_snd_t<S>;
      using lres_t = decltype (f (std::declval<A>()));
      using rres_t = decltype (g (std::declval<B>()));
      static_assert (
        std::is_same<lres_t, rres_t>::value
       , "co_product : result types differ");
      using res_t = lres_t;
      return s.match<lres_t>(
        [=](Left<A> const& l) { return f (l.value ()); },
        [=](Right<B> const& r) { return g (r.value ()); }
       );
    };
  };
};

That's it. Here's some example usage of all this stuff.

  auto succ=[](int i) { return i + 1; };
  auto pred=[](int i) { return i - 1; };
  auto twice=[](int i) { return 2 * i; };
  auto square=[](int i) { return i * i; };
  auto add=[](product<int, int> const& s) { return (fst (s) + snd (s)); };

  //--

  //Products

  //`dup`
  auto p = dup (1);
  std::cout << p << std::endl; //Prints '(1, 1)'

  //`prod`
  p = prod (succ, pred) (4);
  std::cout << p << std::endl; //Prints '(5, 3)'

  //`twist`
  p = twist (p);
  std::cout << p << std::endl; //Prints '(3, 5)'
  
  //`ravel`
  p = ravel (succ) (pred) (p);
  std::cout << p << std::endl; //Prints '(4, 4)'

  //--

  //Sums

  sum<int, float> l{pgs::constructor<Left<int>>{}, 1};
  sum<int, float> r{pgs::constructor<Right<float>>{}, 1.0f};
  std::cout << 
    co_product<sum<int, float>> 
      ([=](int i) { return std::to_string(i); })
      ([=](float f) { return std::to_string(f); })
      (l)
    << std::endl;
  ;//Prints '1'
  std::cout << 
    co_product<sum<int, float>> 
      ([=](int i) { return std::to_string(i); })
      ([=](float f) { return std::to_string(f); })
      (r)
    << std::endl;
  ;//Prints '1.000000'

  //--

Friday, June 17, 2016

Generic mappings over pairs

Browsing around on Oleg Kiselyov's excellent site, I came across a very interesting paper about "Advanced Polymorphism in Simpler-Typed Languages". One of the neat examples I'm about to present is concerned with expressing mappings over pairs that are generic not only in the datatypes involved but also over the number of arguments. The idea is to produce a family of functions $pair\_map_{i}$ such that

pair_map_1 f g (x, y) (x', y') → (f x, g y) 
pair_map_2 f g (x, y) (x', y') → (f x x', g y y') 
pair_map_3 f g (x, y) (x', y') (x'', y'', z'') → (f x x' x'', g y y' y'')
       .
       .
       .
The technique used to achieve this brings a whole bunch of functional programming ideas together : higher order functions, combinators and continuation passing style (and also leads into topics like the "value restriction" typing rules in the Hindley-Milner system).
let ( ** ) app k = fun x y -> k (app x y)
let pc k a b = k (a, b)
let papp (f1, f2) (x1, x2) = (f1 x1, f2 x2)
let pu x = x
With the above definitions, $pair\_map_{i}$ is generated like so.
(*The argument [f] in the below is for the sake of value restriction*)
let pair_map_1 f = pc (papp ** pu) (f : α -> β)
let pair_map_2 f = pc (papp ** papp ** pu) (f : α -> β -> γ)
let pair_map_3 f = pc (papp ** papp ** papp ** pu) (f : α -> β -> γ -> δ)
For example,
# pair_map_2 ( + ) ( - ) (1, 2) (3, 4) ;;
- : int * int = (4, -2)

Reverse engineering how this works requires a bit of algebra.

Let's tackle $pair\_map_{1}$. First

pc (papp ** pu) = (λk f g. k (f, g)) (papp ** pu) = λf g. (papp ** pu) (f, g)
and,
papp ** pu = λx y. pu (papp x y) = λx y. papp x y
so,
λf g. (papp ** pu) (f, g) =
    λf g. (λ(a, b) (x, y). (a x, b y)) (f, g) =
    λf g (x, y). (f x, g y)
that is, pair_map_1 = pc (papp ** pu) = λf g (x, y). (f x, g y) and, we can read the type off from that last equation as (α → β) → (γ → δ) → α * γ → β * δ.

Now for $pair\_map_{2}$. We have

pc (papp ** papp ** pu) =
    (λk f g. k (f, g)) (papp ** papp ** pu) =
    λf g. (papp ** papp ** pu) (f, g)
where,
papp ** papp ** pu = papp ** (papp ** pu) =
    papp ** (λa' b'. pu (papp a' b')) =
    papp ** (λa' b'. papp a' b') = 
    λa b. (λa' b'. papp a' b') (papp a b)
which means,
pc (papp ** papp ** pu) = 
    λf g. (papp ** papp ** pu) (f, g) =
    λf g. (λa b.(λa' b'. papp a' b') (papp a b)) (f, g) =
    λf g. (λb. (λa' b'. papp a' b') (papp (f, g) b)) =
    λf g. λ(x, y). λa' b'. (papp a' b') (papp (f, g) (x, y)) =
    λf g. λ(x, y). λa' b'. (papp a' b') (f x, g y) =
    λf g. λ(x, y). λb'. papp (f x, g y) b' =
    λf g. λ(x, y). λ(x', y'). papp (f x, g y) (x', y') =
    λf g (x, y) (x', y'). (f x x', g y y')
that is, a function in two binary functions and two pairs as we expect. Phew! The type in this instance is (α → β → γ) → (δ → ε → ζ) → α * δ → β * ε → γ * ζ.

To finish off, here's the program transliterated into C++(14).

#include <utility>
#include <iostream>

//let pu x = x
auto pu = [](auto x) { return x; };

//let ( ** ) app k  = fun x y -> k (app x y)
template <class F, class K>
auto operator ^ (F app, K k) {
  return [=](auto x) {
    return [=] (auto y) {
      return k ((app (x)) (y));
    };
  };
}

//let pc k a b = k (a, b)
auto pc = [](auto k) {
  return [=](auto a) {
    return [=](auto b) { 
      return k (std::make_pair (a, b)); };
  };
};

//let papp (f, g) (x, y) = (f x, g y)
auto papp = [](auto f) { 
  return [=](auto x) { 
    return std::make_pair (f.first (x.first), f.second (x.second)); };
};

int main () {

  auto pair = &std::make_pair<int, int>;

  {
  auto succ= [](int x){ return x + 1; };
  auto pred= [](int x){ return x - 1; };
  auto p  = (pc (papp ^ pu)) (succ) (pred) (pair (1, 2));
  std::cout << p.first << ", " << p.second << std::endl;
  }

  {
  auto add = [](int x) { return [=](int y) { return x + y; }; };
  auto sub = [](int x) { return [=](int y) { return x - y; }; };
  auto p = pc (papp ^ papp ^ pu) (add) (sub) (pair(1, 2)) (pair (3, 4));
  std::cout << p.first << ", " << p.second << std::endl;
  }

  return 0;
}

Thursday, April 21, 2016

Oh! Pascal!

I can't help but want to share my joy at coming across this pearl of a program from the "Pascal User Manual and Report" - Jensen and Wirth (circa 1974). In my edition, it's program 4.7 (graph1.pas).

This is it, rephrased in OCaml.

(* Graph of f x = exp (-x) * sin (2 * pi * x)

  Program 4.7, Pascal User Manual and Report, Jensen & Wirth
*)

let round (x : float) : int =
  let f, i = 
    let t = modf x in 
    (fst t, int_of_float@@ snd t) in
  if f = 0.0 then i
  else if i >= 0 then
    if f >= 0.5 then i + 1 else i
  else if -.f >= 0.5 then i - 1 else i

let graph (oc : out_channel) : unit =
  (*The x-axis runs vertically...*)
  let s = 32. in (*32 char widths for [y, y + 1]*)
  let h = 34 in (*char position of x-axis*)
  let d = 0.0625 in (*1/16, 16 lines for [x, x + 1]*)
  let c = 6.28318 in (* 2pi *)
  let lim = 32 in
  for i = 0 to lim do
    let x = d *. (float_of_int i) in
    let y = exp (-.x) *. sin (c *. x) in
    let n = round (s *. y) + h in
    for _ = n downto 0 do output_char oc ' '; done;
    output_string oc "*\n"
  done

let () = print_newline (); graph stdout; print_newline ()

The output from the above is wonderful :)

                                   *
                                               *
                                                       *
                                                            *
                                                            *
                                                         *
                                                   *
                                           *
                                   *
                            *
                       *
                    *
                    *
                      *
                          *
                              *
                                   *
                                       *
                                          *
                                            *
                                            *
                                           *
                                         *
                                      *
                                   *
                                *
                               *
                              *
                             *
                              *
                                *
                                 *
                                   *

Wednesday, April 13, 2016

Dictionaries as functions

This is an "oldie but a goodie". It's super easy.

A dictionary is a data structure that represents a map from keys to values. The question is, can this data structure and its characteristic operations be encoded using only functions?

The answer of course is yes and indeed, here's one such an encoding in OCaml.

(*The type of a dictionary with keys of type [α] and values of type
  [β]*)
type (α, β) dict = α -> β option

(*The empty dictionary maps every key to [None]*)
let empty (k : α) : β option = None

(*[add d k v] is the dictionary [d] together with a binding of [k] to
  [v]*)
let add (d : (α, β) dict) (k : α) (v : β) : (α, β) dict = 
  fun l -> 
    if l = k then Some v else d l

(*[find d k] retrieves the value bound to [k]*)
let find (d : (α, β) dict) (k : α) : β option = d k
Test it like this.
(*e.g.

  Name                            | Age
  ================================+====
  "Felonius Gru"                  |  53
  "Dave the Minion"               | 4.54e9
  "Dr. Joseph Albert Nefario"     |  80

*)
let despicable = 
  add 
    (add 
       (add 
          empty "Felonius Gru" 53
       ) 
       "Dave the Minion" (int_of_float 4.54e9)
    )
    "Dr. Nefario" 80 

let _ = 
  find despicable "Dave the Minion" |> 
      function | Some x -> x | _ -> failwith "Not found"

Moving on, can we implement this in C++? Sure. Here's one way.

#include <pgs/pgs.hpp>

#include <functional>
#include <iostream>
#include <cstdint>

using namespace pgs;

// -- A rough and ready `'a option` (given the absence of
// `std::experimental::optional`

struct None {};

template <class A>
struct Some { 
  A val;
  template <class Arg>
  explicit Some (Arg&& s) : val { std::forward<Arg> (s) }
  {}
};

template <class B>
using option = sum_type<None, Some<B>>;

template <class B>
std::ostream& operator << (std::ostream& os, option<B> const& o) {
  return o.match<std::ostream&>(
    [&](Some<B> const& a) -> std::ostream& { return os << a.val; },
    [&](None) -> std::ostream& { return os << "<empty>"; }
  );
}

//-- Encoding of dictionaries as functions

template <class K, class V>
using dict_type = std::function<option<V>(K)>;

//`empty` is a dictionary constant (a function that maps any key to
//`None`)
template <class A, class B>
dict_type<A, B> empty = 
  [](A const&) { 
    return option<B>{ constructor<None>{} }; 
};

//`add (d, k, v)` extends `d` with a binding of `k` to `v`
template <class A, class B>
dict_type<A, B> add (dict_type<A, B> const& d, A const& k, B const& v) {
  return [=](A const& l) {
    return (k == l) ? option<B>{ constructor<Some<B>>{}, v} : d (l);
  };
}

//`find (d, k)` searches for a binding in `d` for `k`
template <class A, class B>
option<B> find (dict_type<A, B> const& d, A const& k) {
  return d (k);
}

//-- Test driver

int main () {

  using dict_t = dict_type<std::string, std::int64_t>;

  auto nil = empty<std::string, std::int64_t>;
  dict_t(*insert)(dict_t const&, std::string const&, std::int64_t const&) = &add;


  dict_t despicable = 
    insert (
      insert (
        insert (nil
           , std::string {"Felonius Gru"}, std::int64_t{53})
           , std::string {"Dave the Minion"}, std::int64_t{4530000000})
          , std::string {"Dr. Joseph Albert Nefario"}, std::int64_t{80})
     ;

  std::cout << 
    find (despicable, std::string {"Dave the Minion"}) << std::endl;

  return 0;
}

Sunday, April 3, 2016

C++ : Streams

In this blog post, types and functions were presented in OCaml for modeling streams. This post takes the action to C++.

First, the type definition for a stream.

struct Nil {};
template <class T> class Cons;

template <class T>
using stream = sum_type <
    Nil
  , recursive_wrapper<Cons<T>>
>;
The definition is in terms of the sum_type<> type from the "pretty good sum" library talked about here.

The definition of Cons<>, will be in terms of "thunks" (suspensions). They're modeled as procedures that when evaluated, compute streams.

template <class T>
using stream_thunk = std::function<stream<T>()>;
To complete the abstraction, a function that given a suspension, "thaws" it.
template <class T> inline 
stream<T> force (stream_thunk<T> const& s) { 
  return s (); 
}

The above choices made, here is the definition for Cons<>.

template <class T>
class Cons {
public:
  using value_type = T;
  using reference = value_type&;
  using const_reference = value_type const&;
  using stream_type = stream<value_type>;

private:
  using stream_thunk_type = stream_thunk<value_type>;

public:
  template <class U, class V>
  Cons (U&& h, V&& t) : 
    h {std::forward<U> (h)}, t {std::forward<V> (t)}
  {}

  const_reference hd () const { return h; }
  stream_type tl () const { return force (t); }

private:
  value_type h;
  stream_thunk_type t;
};

Next, utility functions for working with streams.

The function hd () gets the head of a stream and tl () gets the stream that remains when the head is stripped off.

template <class T>
T const hd (stream<T> const& s) {
  return s.template match<T const&> (
      [](Cons<T> const& l) -> T const& { return l.hd (); }
    , [](otherwise) -> T const & { throw std::runtime_error { "hd" }; }
  );
}

template <class T>
stream<T> tl (stream<T> const& l) {
  return l.template match <stream<T>> (
    [] (Cons<T> const& s) -> stream <T> { return s.tl (); }
  , [] (otherwise) -> stream<T> { throw std::runtime_error{"tl"}; }
  );
}

The function take () returns the the first $n$ values of a stream.

template <class T, class D>
D take (unsigned int n, stream <T> const& s, D dst) {
  return (n == 0) ? dst :
    s.template match<D>(
       [&](Nil const& _) -> D { return  dst; },
       [&](Cons<T> const& l) -> D { 
         return take (n - 1, l.tl (), *dst++ = l.hd ()); }
    );
}

It's time to share a little "hack" I picked up for writing infinite lists.

  • To start, forget about streams;
  • Write your list using regular lists;
  • Ignore the fact that it won't terminate;
  • Rewrite in terms of Cons and convert the tail to a thunk.

For example, in OCaml the (non-terminating!) code

  let naturals = 
    let rec loop x = x :: loop (x + 1) in
  next 0
leads to this definition of the stream of natural numbers.
let naturals =
 let rec loop x = Cons (x, lazy (loop (x + 1))) in
loop 0

Putting the above to work, a generator for the stream of natural numbers can be written like this.

class natural_numbers_gen {
private:
  using int_stream = stream<int>;
    
private:
  int start;

private:
  int_stream from (int x) const {
    return int_stream{
      constructor<Cons<int>>{}, x, [=]() { return this->from (x + 1); }
    };
  }
  
public:
  explicit natural_numbers_gen (int start) : start (start) 
  {}

  explicit operator int_stream() const { return from (start); }
};
The first $10$ (say) natural numbers can then be harvested like this.
std::vector<int> s;
take (10, stream<int> (natural_numbers_gen{0}), std::back_inserter (s));

The last example, a generator of the Fibonacci sequence. Applying the hack, start with the following OCaml code.

  let fibonacci_numbers = 
    let rec fib a b = a :: fib b (a + b) in
    fib 0 1
The rewrite of this code into streams then leads to this definition.
let fibonnaci_sequence = 
  let rec fib a b = Cons (a, lazy (fib b (a + b))) in
fib 0 1
Finally, casting the above function into C++ yields the following.
class fibonacci_numbers_gen {
private:
  using int_stream = stream<int>;
    
private:
  int start;

private:
  int_stream loop (int a, int b) const {
    return int_stream{
      constructor<Cons<int>>{}, a, [=]() {return this->loop (b, a + b); }
    };
  }
    
public:
  explicit fibonacci_numbers_gen () 
  {}

  explicit operator int_stream() const { return loop (0, 1); }
  };

Saturday, April 2, 2016

Rotate

Rotate

This post is inspired by one of those classic "99 problems in Prolog".What we are looking for here are two functions that satisfy these signatures.

val rotate_left : int -> α list -> α list
val rotate_right : int -> α list -> α list 
rotate_left n rotates a list $n$ places to the left, rotate_right n rotates a list $n$ places to the right. Examples:
# rotate_left 3 ['a';'b';'c';'d';'e';'f';'g';'h'] ;;
- : char list = ['d'; 'e'; 'f'; 'g'; 'h'; 'a'; 'b'; 'c']

# rotate_left (-2) ['a';'b';'c';'d';'e';'f';'g';'h'] ;;
- : char list = ['g'; 'h'; 'a'; 'b'; 'c'; 'd'; 'e'; 'f']
Of course, rotate_left and rotate_right are inverse functions of each other so we expect, for any int $x$ and list $l$, rotate_right x @@ rotate_left x l $=$ rotate_left x @@ rotate_right x l $=$ l.

Well, there are a variety of solutions to this problem with differing degrees of verbosity, complexity and efficiency. My own attempt at a solution resulted in this.

let rec drop (k : int) (l : α list) : α list =
  match k, l with
  | i, _ when i <= 0 -> l
  | _, [] -> []
  | _, (_ :: xs) -> drop (k - 1) xs

let rec take (k : int) (l : α list) : α list =
  match k, l with
  | i, _ when i <= 0 -> []
  | _, [] -> []
  | _, (x :: xs)  -> x :: take (k - 1) xs

let split_at (n : int) (l : α list) : α list * α list = 
  (take n l), (drop n l)

let rec rotate_left (n : int) (l : α list) : α list =
  match n with
  | _ when n = 0 -> l
  | _ when n < 0 ->  rotate_right (-n) l
  | _ -> 
    let m : int = List.length l in
    let k : int = n mod m in
    let (l : α list), (r : α list) = split_at k l in 
    r @ l

and rotate_right (n : int) (l : α list) : α list =
  match n with
  | _ when n = 0 -> l
  | _ when n < 0 ->  rotate_left (-n) l
  | _ -> 
    let m : int = List.length l in
    let k : int = m - n mod m in
    let (l : α list), (r : α list) = split_at k l in 
    r @ l

So far so good, but then I was shown the following solution in Haskell.

rotateLeft n xs 
  | n >= 0     = take (length xs) $ drop n $ concat $ repeat xs
  | otherwise  = rotateLeft (length xs + n) xs

rotateRight n = rotateLeft (-n)
I found that pretty nifty! See, in the function rotateLeft, repeat xs creates an infinite list of lists, (each a copy of xs), "joins" that infinite list of lists into one infinite list, then the first $n$ elements are dropped from that the list and we take the next length xs which gets us the original list rotated left $n$ places.

I felt compelled to attempt to emulate the program above in OCaml.

The phrasing "works" in Haskell due to the feature of lazy evaluation. OCaml on the other hand is eagerly evaluated. Lazy evaluation is possible in OCaml however, you just need to be explicit about it. Here's a type for "lazy lists" aka "streams".

type α stream =  Nil | Cons of α * α stream Lazy.t
A value of type α Lazy.t is a deferred computation, called a suspension that has the result type α. The syntax lazy$(expr)$ makes a suspension of $expr$, without yet evaluating $expr$. "Forcing" the suspension (using Lazy.force) evaluates $expr$ and returns its result.

Next up, functions to get the head and tail of a stream.

let hd = function | Nil -> failwith "hd" | Cons (h, _) -> h
let tl = function | Nil -> failwith "tl" | Cons (_, t) -> Lazy.force t
Also useful, a function to lift an α list to an α stream.
let from_list (l : α list) : α stream =
  List.fold_right (fun x s -> Cons (x, lazy s)) l Nil

Those are the basic building blocks. Now we turn attention to implementing repeat x to create infinite lists of the repeated value $x$.

let rec repeat (x : α) : α stream = Cons (x, lazy (repeat x))

Now to implement concat (I prefer to call this function by its alternative name flatten).

The characteristic operation of flatten is the joining together of two lists. For eager lists, we can write a function join that appends two lists like this.

let rec join l m =
  match l with
  | [] -> m
  | h :: t -> h :: (join t m)
This generalizes naturally to streams.
let rec join (l : α stream) (m : α stream) =
  match l with
  | Nil -> m
  | Cons (h, t) -> Cons (h, lazy (join (Lazy.force t) m))
For eager lists, we can write flatten in terms of join.
let rec flatten : α list list -> α list = function
   | [] -> []
   | (h :: tl) -> join h (flatten tl)
Emboldened by our earlier success we might try to generalize it to streams like this.
let rec flatten (l : α stream stream) : α stream =
   match l with
   | Nil -> lazy Nil
   | Cons (l, r) ->  join l (flatten (Lazy.force r))
Sadly, no. This definition is going to result in stack overflow. There is an alternative phrasing of flatten we might try.
let rec flatten = function
  | [] -> []
  | [] :: t -> flatten t
  | (x :: xs) :: t -> x :: (flatten (xs :: t))
Happy to say, this one generalizes and gets around the eager evaluation problem that causes the unbounded recursion.
let rec flatten : α stream stream -> α stream = function
  | Nil -> Nil
  | Cons (Nil, t) -> flatten (Lazy.force t)
  | Cons (Cons (x, xs), t) ->
      Cons (x, lazy (flatten (Cons (Lazy.force xs, t))))

take and drop are straight forward generalizations of their eager counterparts.

let rec drop (n : int) (lst : α stream ) : α stream = 
  match (n, lst) with
  | (n, _) when n < 0 -> invalid_arg "negative index in drop"
  | (n, xs) when n = 0 -> xs
  | (_, Nil) -> Nil
  | (n, Cons (_, t)) -> drop (n - 1) (Lazy.force t)

let rec take (n : int) (lst : α stream) : α list = 
  match (n, lst) with
  | (n, _) when n < 0 -> invalid_arg "negative index in take"
  | (n, _) when n = 0 -> []
  | (_, Nil) -> []
  | (n, Cons (h, t)) -> h :: (take (n - 1) (Lazy.force t))

Which brings us to the lazy version of rotate expressed in about the same number of lines of code!

let rec rotate_left (k : int) (l : α list) : α list =
  let n = List.length l in
  if k >= 0 then
    l |> from_list |> repeat |> flatten |> drop k |> take n
  else rotate_left (n + k) l

let rotate_right (n : int) : α list -> α list = rotate_left (-n)

Saturday, March 26, 2016

REPLs

Reusable interactive REPL

I play around with little languages enough that I've found it useful to have a minimal language independent REPL for use as a library component. It is expressed as a single higher order function satisfying this signature:
val repl
    (eval : α list -> β -> γ)
    (pe : δ -> Lexing.lexbuf -> β)
    (tok : δ)
    (string_of_t : γ -> ε) : unit
The first argument eval is a function to evaluate expressions in the language, the second argument pe the parser entry point, tok is the lexer start token and the last argument string_of_t is a function that can compute a string from the result of an evaluated expression.

Here's an example session with a concrete repl for a basic $\lambda$ calculus interpreter (more on that later in this post):

  d:\lambda>.\lambda.exe
  ? (\p x y.p x y) (\x y. x) a b
  a
  ? (\p x y.p x y) (\x y. y) a b
  b
The driver for this program ('lambda_repl.ml') is as simple as this:
let main = 
  Repl.repl
    Lambda.eval
    Lambda_parser.main 
    Lambda_lexer.token
    Lambda.string_of_t
The module Repl follows ('repl.ml').
let parse 
    (pe : α -> Lexing.lexbuf -> β) 
    (le : α)
    (lexbuf : Lexing.lexbuf)  : β =
  try 
    pe le lexbuf
  with 
  | Parsing.Parse_error ->
    begin
      let curr = lexbuf.Lexing.lex_curr_p in
      let line = curr.Lexing.pos_lnum in
      let cnum = curr.Lexing.pos_cnum - curr.Lexing.pos_bol in
      let tok = Lexing.lexeme lexbuf in
      raise 
       (Failure (
          Printf.sprintf 
"file \"\", line %d, character %d\nError : Syntax error \"%s\"" 
            line cnum tok))
    end

let from_bytes ?(file : string = "") (str : string) 
    (pe : α -> Lexing.lexbuf -> β) 
    (le : α) : β =
  let set_filename lexbuf name =
    let open Lexing in
    lexbuf.lex_curr_p <-  {
      lexbuf.lex_curr_p with pos_fname = name
    } in
  let lexbuf = Lexing.from_string str in
  set_filename lexbuf file ;
  parse pe le lexbuf

let prompt (continuing:bool) =
  (print_string (if (not continuing)
    then "? " else "... ");(flush stdout))
let read (continuing:bool)=prompt continuing; input_line stdin

let handle_interpreter_error ?(finally=(fun () -> ())) ex =
  match ex with
  | Failure s -> finally () ; (Printf.printf "%s\n" s)
  | Stack_overflow -> finally () ; Printf.printf "Stack overflow\n"
  | Division_by_zero -> finally () ; Printf.printf "Division by zero\n"
  | End_of_file -> finally (); raise ex
  | _  as e -> 
    finally (); 
    Printf.printf "Unknown exception : %s\n" (Printexc.to_string e); 
    raise e

let safe_proc ?finally f =
  try f ()
  with exn -> handle_interpreter_error ?finally exn

let reduce 
    (eval : α list -> β -> γ)
    (pe : δ -> Lexing.lexbuf -> β)
    (le : δ)
    (to_bytes : γ -> ε) 
    (buf : Buffer.t) : ε =
  let t = 
    eval [] (from_bytes (Buffer.contents buf) pe le) in
  to_bytes t

let repl
    (eval : α list -> β -> γ)
    (pe : δ -> Lexing.lexbuf -> β)
    (tok : δ)
    (to_bytes : γ -> ε) : unit = 
  let initial_capacity = 4*1024 in
  let buf = Buffer.create initial_capacity in
  try 
    while true do
      let f () =
        let l = read ((Buffer.length buf)!=0) in
        let len = String.length l in
        if len > 0 then
          if l.[0] = '%' then ()
          else
            if l.[len - 1] = '\\' then
              (Buffer.add_string buf ((String.sub l 0 (len-1))^"\n"))
            else
              if l.[len-1] = (char_of_int 7) then Buffer.clear buf
              else
                let _ = Buffer.add_string buf l in
                let s = reduce eval pe tok to_bytes buf in
                Buffer.clear buf; print_endline s
      in (safe_proc ~finally:(fun () -> Buffer.clear buf) f)
    done
  with
  | End_of_file -> print_string "\n"

$\lambda$ calculus interpreter

This implementation is based on the techniques explained in Garrigue's excellent article "Code reuse through polymorphic variants".

First, a module implementing the simple language of variables ('var.ml').

type α impl = [`Var of string]
type t = α impl as α

let mk_var : string -> [> `Var of string] = fun s -> `Var s

let string_of_impl (_ : α -> string) : α impl -> string = function 
  | `Var s -> s

let rec string_of_t : t -> string = fun v -> string_of_impl string_of_t v

let eval_impl
    (env : (string * ([> `Var of string ] as α)) list) 
    (`Var s as v : α impl) : α =
  try
    List.assoc s env
  with
  | Not_found -> v

Next, a module extending this base language to implement the language of $\lambda$ expressions ('lambda.ml').

type α impl = [α Var.impl | `Abs of string * α | `App of α * α]
type t = α impl as α

let mk_app : α * β -> [> `App of α * β] = 
  fun (u, v) -> `App (u, v)
let mk_abs : string * β -> [> `Abs of string * β] = 
  fun (s, t) -> `Abs (s, t) 

module Detail = struct
  let gen_sym =
    let n = ref 0 in
    fun () -> incr n; "_" ^ string_of_int !n

  let rec strip (bs : string list) t =
    match t with 
    | `Abs (b, t) -> strip (b :: bs) t
    | _ as u -> (List.rev bs, u)

end (*module Detail*)

let string_of_impl (string_of_rec : α -> string) : α impl -> string = 
  function
  | #Var.t as v -> Var.string_of_impl string_of_rec v
  | `App (u, v) -> 
    "("^(string_of_rec u) ^ ") (" ^ (string_of_rec v)^")"
  | `Abs _ as t -> 
    match (Detail.strip [] t) with
    | (b :: bs, u)  ->
      let binder = 
        "\\" ^ b ^ (List.fold_right  (fun z b -> " " ^ z ^ b) bs ". ") in
      binder ^ (string_of_rec u)
    | ([], _) -> assert false

let rec string_of_t : t -> string = fun v -> string_of_impl string_of_t v

let eval_impl eval_rec 
    (env : (string *
              ([> 
                 `Abs of string * α 
               | `App of α * α 
               | `Var of string ] as α))
       list) : α impl -> α = function
    | #Var.t as v -> Var.eval_impl env v
    | `App (u, v) ->
      let v' = eval_rec env v in
      begin match eval_rec env u with
      | `Abs (s, body) -> eval_rec [s, v'] body
      | u' -> `App (u', v')
      end
    | `Abs (s, u) ->
      let s' = Detail.gen_sym () in
      `Abs (s', eval_rec ((s, `Var s') :: env) u)

let rec eval (env : (string * t) list) : t -> t = 
  eval_impl eval env

That's it for the interpreter. All that remains is to put a "front end" on it. First the parser ('lambda_parser.mly').

%{
type t = Lambda.t

let var (s : string) : t = 
  Var.mk_var s

let abs ((bs : string list), (t : t)) : t =
  List.fold_right (fun b u -> Lambda.mk_abs (b, u)) bs t
let app ((t0 : t), (us : t list)) : t =
  List.fold_left (fun t u -> Lambda.mk_app (t, u)) t0 us

%}
 
%token  Tident
%token  Tnum
%token Tlambda Tdot Tlparen Trparen Teof

%nonassoc Tident Tdot Tlparen Trparen Teof

%start main
%type  main

%%
main:
  | term Teof { $1 }
  ;
term:
  | Tlambda id id_list Tdot term { abs (($2 :: $3), $5) }
  | atom atom_list { app ($1, $2) }
  ;
atom_list:
  | { [] }
  | atom atom_list { $1 :: $2 }
  ;
atom:
  | id { var $1 }
  | Tlparen term Trparen { $2 }
  ;
id_list:
  | { [] }
  | id id_list { $1 :: $2 }
  ;
id:
  | Tident { $1 }
  ;
%%
Lastly, the lexical analyzer ('lambda_lexer.mll').
{
  open Lambda_parser
}

let alpha=['a'-'z' 'A'-'Z']

rule token = parse
    ['\r' ' ' '\t' '\n']              { token lexbuf }
  | '('                                    { Tlparen }
  | ')'                                    { Trparen }
  | '\\'                                   { Tlambda }
  | '.'                                       { Tdot }
  | ((alpha)(alpha)*) as s                { Tident s }
  | eof                                       { Teof }