Saturday, March 26, 2016

REPLs

Reusable interactive REPL

I play around with little languages enough that I've found it useful to have a minimal language independent REPL for use as a library component. It is expressed as a single higher order function satisfying this signature:
val repl
    (eval : α list -> β -> γ)
    (pe : δ -> Lexing.lexbuf -> β)
    (tok : δ)
    (string_of_t : γ -> ε) : unit
The first argument eval is a function to evaluate expressions in the language, the second argument pe the parser entry point, tok is the lexer start token and the last argument string_of_t is a function that can compute a string from the result of an evaluated expression.

Here's an example session with a concrete repl for a basic $\lambda$ calculus interpreter (more on that later in this post):

  d:\lambda>.\lambda.exe
  ? (\p x y.p x y) (\x y. x) a b
  a
  ? (\p x y.p x y) (\x y. y) a b
  b
The driver for this program ('lambda_repl.ml') is as simple as this:
let main = 
  Repl.repl
    Lambda.eval
    Lambda_parser.main 
    Lambda_lexer.token
    Lambda.string_of_t
The module Repl follows ('repl.ml').
let parse 
    (pe : α -> Lexing.lexbuf -> β) 
    (le : α)
    (lexbuf : Lexing.lexbuf)  : β =
  try 
    pe le lexbuf
  with 
  | Parsing.Parse_error ->
    begin
      let curr = lexbuf.Lexing.lex_curr_p in
      let line = curr.Lexing.pos_lnum in
      let cnum = curr.Lexing.pos_cnum - curr.Lexing.pos_bol in
      let tok = Lexing.lexeme lexbuf in
      raise 
       (Failure (
          Printf.sprintf 
"file \"\", line %d, character %d\nError : Syntax error \"%s\"" 
            line cnum tok))
    end

let from_bytes ?(file : string = "") (str : string) 
    (pe : α -> Lexing.lexbuf -> β) 
    (le : α) : β =
  let set_filename lexbuf name =
    let open Lexing in
    lexbuf.lex_curr_p <-  {
      lexbuf.lex_curr_p with pos_fname = name
    } in
  let lexbuf = Lexing.from_string str in
  set_filename lexbuf file ;
  parse pe le lexbuf

let prompt (continuing:bool) =
  (print_string (if (not continuing)
    then "? " else "... ");(flush stdout))
let read (continuing:bool)=prompt continuing; input_line stdin

let handle_interpreter_error ?(finally=(fun () -> ())) ex =
  match ex with
  | Failure s -> finally () ; (Printf.printf "%s\n" s)
  | Stack_overflow -> finally () ; Printf.printf "Stack overflow\n"
  | Division_by_zero -> finally () ; Printf.printf "Division by zero\n"
  | End_of_file -> finally (); raise ex
  | _  as e -> 
    finally (); 
    Printf.printf "Unknown exception : %s\n" (Printexc.to_string e); 
    raise e

let safe_proc ?finally f =
  try f ()
  with exn -> handle_interpreter_error ?finally exn

let reduce 
    (eval : α list -> β -> γ)
    (pe : δ -> Lexing.lexbuf -> β)
    (le : δ)
    (to_bytes : γ -> ε) 
    (buf : Buffer.t) : ε =
  let t = 
    eval [] (from_bytes (Buffer.contents buf) pe le) in
  to_bytes t

let repl
    (eval : α list -> β -> γ)
    (pe : δ -> Lexing.lexbuf -> β)
    (tok : δ)
    (to_bytes : γ -> ε) : unit = 
  let initial_capacity = 4*1024 in
  let buf = Buffer.create initial_capacity in
  try 
    while true do
      let f () =
        let l = read ((Buffer.length buf)!=0) in
        let len = String.length l in
        if len > 0 then
          if l.[0] = '%' then ()
          else
            if l.[len - 1] = '\\' then
              (Buffer.add_string buf ((String.sub l 0 (len-1))^"\n"))
            else
              if l.[len-1] = (char_of_int 7) then Buffer.clear buf
              else
                let _ = Buffer.add_string buf l in
                let s = reduce eval pe tok to_bytes buf in
                Buffer.clear buf; print_endline s
      in (safe_proc ~finally:(fun () -> Buffer.clear buf) f)
    done
  with
  | End_of_file -> print_string "\n"

$\lambda$ calculus interpreter

This implementation is based on the techniques explained in Garrigue's excellent article "Code reuse through polymorphic variants".

First, a module implementing the simple language of variables ('var.ml').

type α impl = [`Var of string]
type t = α impl as α

let mk_var : string -> [> `Var of string] = fun s -> `Var s

let string_of_impl (_ : α -> string) : α impl -> string = function 
  | `Var s -> s

let rec string_of_t : t -> string = fun v -> string_of_impl string_of_t v

let eval_impl
    (env : (string * ([> `Var of string ] as α)) list) 
    (`Var s as v : α impl) : α =
  try
    List.assoc s env
  with
  | Not_found -> v

Next, a module extending this base language to implement the language of $\lambda$ expressions ('lambda.ml').

type α impl = [α Var.impl | `Abs of string * α | `App of α * α]
type t = α impl as α

let mk_app : α * β -> [> `App of α * β] = 
  fun (u, v) -> `App (u, v)
let mk_abs : string * β -> [> `Abs of string * β] = 
  fun (s, t) -> `Abs (s, t) 

module Detail = struct
  let gen_sym =
    let n = ref 0 in
    fun () -> incr n; "_" ^ string_of_int !n

  let rec strip (bs : string list) t =
    match t with 
    | `Abs (b, t) -> strip (b :: bs) t
    | _ as u -> (List.rev bs, u)

end (*module Detail*)

let string_of_impl (string_of_rec : α -> string) : α impl -> string = 
  function
  | #Var.t as v -> Var.string_of_impl string_of_rec v
  | `App (u, v) -> 
    "("^(string_of_rec u) ^ ") (" ^ (string_of_rec v)^")"
  | `Abs _ as t -> 
    match (Detail.strip [] t) with
    | (b :: bs, u)  ->
      let binder = 
        "\\" ^ b ^ (List.fold_right  (fun z b -> " " ^ z ^ b) bs ". ") in
      binder ^ (string_of_rec u)
    | ([], _) -> assert false

let rec string_of_t : t -> string = fun v -> string_of_impl string_of_t v

let eval_impl eval_rec 
    (env : (string *
              ([> 
                 `Abs of string * α 
               | `App of α * α 
               | `Var of string ] as α))
       list) : α impl -> α = function
    | #Var.t as v -> Var.eval_impl env v
    | `App (u, v) ->
      let v' = eval_rec env v in
      begin match eval_rec env u with
      | `Abs (s, body) -> eval_rec [s, v'] body
      | u' -> `App (u', v')
      end
    | `Abs (s, u) ->
      let s' = Detail.gen_sym () in
      `Abs (s', eval_rec ((s, `Var s') :: env) u)

let rec eval (env : (string * t) list) : t -> t = 
  eval_impl eval env

That's it for the interpreter. All that remains is to put a "front end" on it. First the parser ('lambda_parser.mly').

%{
type t = Lambda.t

let var (s : string) : t = 
  Var.mk_var s

let abs ((bs : string list), (t : t)) : t =
  List.fold_right (fun b u -> Lambda.mk_abs (b, u)) bs t
let app ((t0 : t), (us : t list)) : t =
  List.fold_left (fun t u -> Lambda.mk_app (t, u)) t0 us

%}
 
%token  Tident
%token  Tnum
%token Tlambda Tdot Tlparen Trparen Teof

%nonassoc Tident Tdot Tlparen Trparen Teof

%start main
%type  main

%%
main:
  | term Teof { $1 }
  ;
term:
  | Tlambda id id_list Tdot term { abs (($2 :: $3), $5) }
  | atom atom_list { app ($1, $2) }
  ;
atom_list:
  | { [] }
  | atom atom_list { $1 :: $2 }
  ;
atom:
  | id { var $1 }
  | Tlparen term Trparen { $2 }
  ;
id_list:
  | { [] }
  | id id_list { $1 :: $2 }
  ;
id:
  | Tident { $1 }
  ;
%%
Lastly, the lexical analyzer ('lambda_lexer.mll').
{
  open Lambda_parser
}

let alpha=['a'-'z' 'A'-'Z']

rule token = parse
    ['\r' ' ' '\t' '\n']              { token lexbuf }
  | '('                                    { Tlparen }
  | ')'                                    { Trparen }
  | '\\'                                   { Tlambda }
  | '.'                                       { Tdot }
  | ((alpha)(alpha)*) as s                { Tident s }
  | eof                                       { Teof }