16 Mug: matching, throwing, quoting
16.1 Scaling up with syntax
We have developed a small, but representative functional programming language. But there’s still a long way to go from our Loot language to the kind of constructs we expect in a modern, expressive programming language. In particular, there’s a fairly large gap between Loot and the subset of Racket we’ve explored so far in this class.
For example, our programs have made extensive use of pattern matching, quotation, quasi-quotation, and lots of built-in functions. In this section, we’ll examine how to scale Loot up to a language that’s nicer to program in. As we’ll see, much of this can be accomplished without extending the compiler. Rather we can explain these language features by elaboration of fancier language syntax into the existing core forms.
In this chapter, we’ll explore several ideas at the level of an interpreter, but the techniques should work just as well for the compiler.
16.2 The Loot+ interpreter
Let us start with an interprter for the Loot language, plus all of the extensions considered in the various assignments up through Assignment 7.
#lang racket (provide (all-defined-out)) (require (only-in "syntax.rkt" prim?)) ;; type Expr = ;; | Integer ;; | Boolean ;; | Character ;; | String ;; | Symbol ;; | Variable ;; | '() ;; | `(box ,Expr) ;; | `(if ,Expr ,Expr ,Expr) ;; | `(let ,(List Variable Expr) ... ,Expr) ;; | `(letrec ,(List Variable Lambda) ... ,Expr) ;; | `(λ ,Bindings ,Expr) ;; | `(apply ,Expr ,Expr) ;; | `(,Prim ,Expr ...) ;; | `(,Expr ,Expr ...) ;; type Value = ;; | Integer ;; | Boolean ;; | Character ;; | String ;; | Symbol ;; | '() ;; | (Box Value) ;; | (Cons Value Value) ;; | Function ;; type Function = ;; | (Values ... -> Answer) ;; type Answer = Value | 'err ;; type REnv = (Listof (List Variable Value)) ;; Expr REnv -> Answer (define (interp-env e r) (match e ;; produce fresh strings each time a literal is eval'd [(? string? s) (string-copy s)] [(? value? v) v] [''() '()] [`',(? symbol? s) s] [`(if ,e0 ,e1 ,e2) (match (interp-env e0 r) ['err 'err] [v (if v (interp-env e1 r) (interp-env e2 r))])] [(? symbol? x) (lookup r x)] [`(let (,`(,xs ,es) ...) ,e) (match (interp-env* es r) ['err 'err] [vs (interp-env e (append (zip xs vs) r))])] [`(letrec (,`(,xs ,es) ...) ,e) (letrec ((r* (λ () (append (zip xs (map (λ (l) (λ vs (apply (interp-env l (r*)) vs))) es)) r)))) (interp-env e (r*)))] [`(λ (,xs ...) ,e) (λ vs (if (= (length vs) (length xs)) (interp-env e (append (zip xs vs) r)) 'err))] [`(λ (,xs ... . ,x) ,e) (λ vs (if (>= (length vs) (length xs)) (interp-env e (append (zip/remainder xs vs x) r)) 'err))] [`(apply ,e0 ,e1) (let ((v0 (interp-env e0 r)) (vs (interp-env e1 r))) (if (list? vs) (apply v0 vs) 'err))] [`(,(? prim? p) ,es ...) (let ((as (interp-env* es r))) (interp-prim p as))] [`(,e ,es ...) (match (interp-env* (cons e es) r) [(list f vs ...) (if (procedure? f) (apply f vs) 'err)] ['err 'err])] [_ 'err])) ;; (Listof Expr) REnv (Listof Defn) -> (Listof Value) | 'err (define (interp-env* es r) (match es ['() '()] [(cons e es) (match (interp-env e r) ['err 'err] [v (cons v (interp-env* es r))])])) ;; Any -> Boolean (define (value? x) (or (integer? x) (boolean? x) (char? x) (string? x))) ;; Prim (Listof Answer) -> Answer (define (interp-prim p as) (match (cons p as) [(list p (? value?) ... 'err _ ...) 'err] [(list '- (? integer? i0)) (- i0)] [(list '- (? integer? i0) (? integer? i1)) (- i0 i1)] [(list 'abs (? integer? i0)) (abs i0)] [(list 'add1 (? integer? i0)) (+ i0 1)] [(list 'sub1 (? integer? i0)) (- i0 1)] [(list 'zero? (? integer? i0)) (zero? i0)] [(list 'char? v0) (char? v0)] [(list 'integer? v0) (integer? v0)] [(list 'boolean? v0) (boolean? v0)] [(list 'integer->char (? codepoint? i0)) (integer->char i0)] [(list 'char->integer (? char? c)) (char->integer c)] [(list '+ (? integer? i0) (? integer? i1)) (+ i0 i1)] [(list 'cons v0 v1) (cons v0 v1)] [(list 'car (? cons? v0)) (car v0)] [(list 'cdr (? cons? v0)) (cdr v0)] [(list 'string? v0) (string? v0)] [(list 'box? v0) (box? v0)] [(list 'empty? v0) (empty? v0)] [(list 'cons? v0) (cons? v0)] [(list 'cons v0 v1) (cons v0 v1)] [(list 'box v0) (box v0)] [(list 'unbox (? box? v0)) (unbox v0)] [(list 'string-length (? string? v0)) (string-length v0)] [(list 'make-string (? natural? v0) (? char? v1)) (make-string v0 v1)] [(list 'string-ref (? string? v0) (? natural? v1)) (if (< v1 (string-length v0)) (string-ref v0 v1) 'err)] [(list '= (? integer? v0) (? integer? v1)) (= v0 v1)] [(list '< (? integer? v0) (? integer? v1)) (< v0 v1)] [(list '<= (? integer? v0) (? integer? v1)) (<= v0 v1)] [(list 'char=? (? char? v0) (? char? v1)) (char=? v0 v1)] [(list 'boolean=? (? boolean? v0) (? boolean? v1)) (boolean=? v0 v1)] [(list 'eq? v0 v1) (eq? v0 v1)] [(list 'gensym) (gensym)] [(list 'symbol? v0) (symbol? v0)] [(list 'procedure? v0) (procedure? v0)] [_ 'err])) ;; REnv Variable -> Answer (define (lookup env x) (match env ['() 'err] [(cons (list y v) env) (match (symbol=? x y) [#t v] [#f (lookup env x)])])) ;; REnv Variable Value -> Value (define (ext r x v) (cons (list x v) r)) ;; Any -> Boolean (define (codepoint? x) (and (integer? x) (<= 0 x #x10FFFF) (not (<= #xD800 x #xDFFF)))) ;; (Listof A) (Listof B) -> (Listof (List A B)) (define (zip xs ys) (match* (xs ys) [('() '()) '()] [((cons x xs) (cons y ys)) (cons (list x y) (zip xs ys))])) ;; like zip but ys can be longer and remainder is associated with r (define (zip/remainder xs ys r) (match* (xs ys) [('() ys) (list (list r ys))] [((cons x xs) (cons y ys)) (cons (list x y) (zip/remainder xs ys r))]))
16.3 A bit more sugar
As we saw in Loot, we can consider syntaxtic extensions of language that elaborate into the core Expr form of a language. We saw this with the define-form that we rewrote into letrec. We can consider further extensions such as and, or, and even cond.
Here are functions for transforming each of these forms into simpler forms:
Examples
> (define (cond->if c) (match c [`(cond (else ,e)) e] [`(cond (,c ,e) . ,r) `(if ,c ,e (cond ,@r))]))
> (define (and->if c) (match c [`(and) #t] [`(and ,e) e] [`(and ,e . ,r) `(if ,e (and ,@r) #f)]))
> (define (or->if c) (match c [`(or) #f] [`(or ,e) e] [`(or ,e . ,r) (let ((x (gensym))) `(let ((,x ,e)) (if ,x ,x (or ,@r))))]))
Note that these functions do not necessarily eliminate all cond, and, or or forms, but rather eliminate one occurrence, potentially creating a new occurrence within a subexpression:
Examples
> (cond->if '(cond [(even? x) 8] [else 9])) '(if (even? x) 8 (cond (else 9)))
> (cond->if '(cond [else 9])) 9
> (and->if '(and)) #t
> (and->if '(and 8)) 8
> (and->if '(and 8 9)) '(if 8 (and 9) #f)
> (or->if '(or)) #f
> (or->if '(or 8)) 8
> (or->if '(or 8 9)) '(let ((g5637 8)) (if g5637 g5637 (or 9)))
The idea is that another function will drive the repeated use of these functions until all these extended forms are eliminated.
You may wonder why the or elaboration is complicated by the let-binding. Consider a potential simpler approach:
Examples
> (define (or->if-simple c) (match c [`(or) #f] [`(or ,e) e] [`(or ,e . ,r) `(if ,e ,e (or ,@r))]))
But compare the elaboration of the following exmample:
Examples
> (or->if-simple '(or (some-expensive-function) #t)) '(if (some-expensive-function) (some-expensive-function) (or #t))
> (or->if '(or (some-expensive-function) #t)) '(let ((g5641 (some-expensive-function))) (if g5641 g5641 (or #t)))
The second program is much more efficient. Moreover, if some-expensive-function had side-effects, the first program would duplicate them, thereby changing the program’s intended behavior.
We can incorporate these new functions into the desugar function, which will transform extended programs into “core” expressions:
Examples
; Expr+ -> Expr
> (define (desugar e+) (match e+ [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) ,(desugar e))] [(? symbol? x) x] [(? imm? i) i] [`',(? symbol? s) `',s] [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] [`(letrec ,bs ,e0) `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) ,(desugar e0))] [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] [`(cond . ,_) (desugar (cond->if e+))] [`(and . ,_) (desugar (and->if e+))] [`(or . ,_) (desugar (or->if e+))] [`(,e . ,es) `(,(desugar e) ,@(map desugar es))]))
Note how a cond, and, or or form are transformed and then desugared again. This will take care of eliminating any derived forms introduced by the transformation, which is useful so that derived forms can be defined in terms of other derived forms, including itself!
Examples
> (desugar '(cond [(even? x) 8] [else 9])) '(if (even? x) 8 9)
> (desugar '(cond [else 9])) 9
> (desugar '(and)) #t
> (desugar '(and 8)) 8
> (desugar '(and 8 9)) '(if 8 9 #f)
> (desugar '(or)) #f
> (desugar '(or 8)) 8
> (desugar '(or 8 9)) '(let ((g5657 8)) (if g5657 g5657 9))
Derived forms that can be elaborated away by rewriting into more primitive forms are sometimes called syntactic sugar since they are not fundamental but “sweeten” the experience of writing programs with useful shorthands. We call the elaboration function desugar to indicate that it is eliminating the syntactic sugar.
16.4 Exceptional behavior
To see an example of taking the idea of program transformation as a method for implementing language features, let’s consider the case of exceptions and exception handlers, a common feature of modern high-level languages.
Consider the following program for computing the product of all the elements in a binary tree:
Examples
; BT -> Number ; Multiply all the numbers in given binary tree
> (define (prod bt) (match bt ['leaf 1] [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) > (prod 'leaf) 1
> (prod '(node 8 leaf leaf)) 8
> (prod '(node 8 (node 2 leaf leaf) (node 4 leaf leaf))) 64
Now consider the work done in an example such as this:
Examples
> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) 0
From a quick scan of the elements, we know the answer is 0 without doing any arithmetic. But the prod function will do a bunch of multiplication to actually figure this out.
To see, let’s use a helper function to replace * that prints every it multiplies two numbers:
Examples
; Number Number -> Number
> (define (mult x y) (printf "mult: ~a x ~a\n" x y) (* x y)) ; BT -> Number ; Multiply all the numbers in given binary tree
> (define (prod bt) (match bt ['leaf 1] [`(node ,v ,l ,r) (mult v (mult (prod l) (prod r)))])) > (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))
mult: 1 x 1
mult: 0 x 1
mult: 1 x 1
mult: 2 x 1
mult: 1 x 1
mult: 3 x 1
mult: 2 x 3
mult: 4 x 6
mult: 0 x 24
mult: 9 x 0
0
This could potentially be bad if the tree were quite large.
How can we do better? One option is to detect if the value at a node is zero and simply avoid recurring on the left and right subtrees at that point:
Examples
; BT -> Number ; Multiply all the numbers in given binary tree
> (define (prod bt) (match bt ['leaf 1] [`(node ,v ,l ,r) (if (zero? v) 0 (mult v (mult (prod l) (prod r))))]))
Does this help our answer? Only slightly:
Examples
> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))
mult: 1 x 1
mult: 2 x 1
mult: 1 x 1
mult: 3 x 1
mult: 2 x 3
mult: 4 x 6
mult: 0 x 24
mult: 9 x 0
0
Why?
The problem is that you may encounter the zero element deep within a tree. At that point you not only want to avoid doing the multiplication of subtrees, but also of the elements surrounding the zero. But we seemingly don’t have control over the context surrounding the node with a zero in it, just the subtrees. What can we do?
One option, if the language provides it, is to raise an exception, signalling that a zero element has been found. An outer function can catch that exception and produce zero. Such a program will avoid doing any multiplication in case there’s a zero in the tree.
Racket comes with an exception mechanism that uses raise to signal an exception, which is propagated to the nearest enclosing exception handler. If there is no such handler, an uncaught exception error occurs.
Examples
> (raise 5) uncaught exception: 5
> (mult (raise 5) 2) uncaught exception: 5
> (mult (raise (mult 5 3)) 2) mult: 5 x 3
uncaught exception: 15
The general form of an exception handler uses the with-handlers form that includes a series of predicates and handler expressions. We’ll consider a simpler form called catch that unconditionally catches any exception throw and handles it with a function that takes the raised value as an argument. It can be expressed in terms of the more sophisticated with-handlers form:
Examples
> (define-syntax-rule (catch e f) (with-handlers ([(λ (x) #t) f]) e)) > (catch (raise 5) (λ (x) x)) 5
> (catch (mult (raise 5) 2) (λ (x) x)) 5
> (catch (mult (raise (mult 5 3)) 2) (λ (x) x)) mult: 5 x 3
15
> (catch (mult (mult 5 3) 2) (λ (x) x))
mult: 5 x 3
mult: 15 x 2
30
> (catch (mult (mult 5 3) 2) (λ (x) (mult x x)))
mult: 5 x 3
mult: 15 x 2
30
> (catch (mult (raise (mult 5 3)) 2) (λ (x) (mult x x)))
mult: 5 x 3
mult: 15 x 15
225
Now we can solve our problem:
Examples
; BT -> Number ; Multiply all the numbers in given binary tree
> (define (prod bt) (catch (prod/r bt) (λ (x) 0))) ; BT -> Number ; Throws: 0
> (define (prod/r bt) (match bt ['leaf 1] [`(node ,v ,l ,r) (if (zero? v) (raise 0) (mult v (mult (prod/r l) (prod/r r))))])) > (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) 0
(This code is a bit problematic for reasons that are beside the point of this section, but... the problem is this will catch any exception, including things like system signals, out of memory exceptions, etc. A better solution would have the handler check that the exception value was 0 and re-raise it if not. That way it doesn’t “mask” any other exceptions.)
This code works great for our purposes, but what if the language didn’t provide an exception handling mechanism? Could we achieve the same effect without relying on exceptions?
One solution is to re-write the program in what’s called continuation passing style (CPS). Continuation passing style makes explicit what is implicit in the recursive calls to prod in our original program, which is that after recursively computing the product of the subtree, we have to do more work such as another recursive call and multiplication. By making this work explicit, we gain control over it and have the option to do things like throw away this work.
Here is the basic idea. We will write a version of prod that takes an additional argument which represents “the work to be done after this function call completes.” It will take a single argument, a number, which is the result of this function call, and it will produce some final result for the computation (in this case, a number).
In general, we want (k (prod bt)) ≡ (prod/k bt k) for all functions k and binary trees bt.
Starting from the spec, we have:
Examples
; BT (Number -> Number) -> Number
> (define (prod/k bt k) (k (prod bt)))
We can unroll the definition of prod:
Examples
> (define (prod/k bt k) (match bt ['leaf (k 1)] [`(node ,v ,l ,r) (k (mult v (mult (prod l) (prod r))))]))
Now we’d like to replace the calls to prod with calls to prod/k, which we can do by recognizing the work to be done around the call to prod and placing it in the continuation argument to prod/k. Let’s do the first call:
Examples
> (define (prod/k bt k) (match bt ['leaf (k 1)] [`(node ,v ,l ,r) (prod/k l (λ (pl) (k (mult v (mult pl (prod r))))))]))
Doing this again, we get:
Examples
> (define (prod/k bt k) (match bt ['leaf (k 1)] [`(node ,v ,l ,r) (prod/k l (λ (pl) (prod/k r (λ (pr) (k (mult v (mult pl pr)))))))]))
Now we have a definition of prod/k that is independent of prod that satisfies the spec we started with.
A couple of things to note:
Every call to prod/k is a tail-call,
The context of the recursive calls are given explicitly as continuation arguments.
We can recreate the original function by giving the appropriate initial continuation:
Examples
; BT -> Number
> (define (prod bt) (prod/k bt (λ (x) x)))
Now, this code doesn’t do anything smart on zero elements; it does exactly the same multiplications our first program does:
Examples
> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf))))
mult: 1 x 1
mult: 0 x 1
mult: 1 x 1
mult: 2 x 1
mult: 1 x 1
mult: 3 x 1
mult: 2 x 3
mult: 4 x 6
mult: 0 x 24
mult: 9 x 0
0
However, with a small tweak, we can get the behavior of the exception-handling code.
Consider this definition:
Examples
; BT (Number -> Number) -> Number
> (define (prod/k bt k) (match bt ['leaf (k 1)] [`(node ,v ,l ,r) (if (zero? v) 0 (prod/k l (λ (pl) (prod/k r (λ (pr) (k (mult v (mult pl pr))))))))])) ; BT -> Number
> (define (prod bt) (prod/k bt (λ (x) x)))
Notice that this program, when the value in a node is zero, immediately returns 0. It does not do any of the work represented by k. It does something akin to raising an exception: it blows off all the work of the surround context and returns a value to the “handler” (in this case, prod).
Returning to our example, we can see that no multiplications occur:
Examples
> (prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) 0
We’ve now achieved our original goal without the use of exception handlers. We achieved this by rewriting our program to make explicit the work that remains to do, giving us the ability to avoid doing it when necessary. This is a slighly simplified version of the general exception handling transformation, which we will look at next, since there’s only a single handler and all it does it produce 0. But, the by-hand transformation we did provides a useful blueprint for how can generally transform programs that use exception handling into ones that don’t.
16.5 Exceptional transformation
Let’s consider a very small subset of expressions, extended with raise and catch, and see how we can transform away those added mechanisms:
; An Expr is one of: ; - Integer ; - Variable ; - ‘(if ,Expr ,Expr ,Expr) ; - ‘(,Prim1 ,Expr) ; - ‘(,Prim2 ,Expr ,Expr) ; - ‘(raise ,Expr) ; - ‘(catch ,Expr (λ (,Variable) ,Expr))
Here is the basic idea of the transformation, we transform every expression into a function of two arguments. The two arguments represent the two ways an expression may produce results: either by returning normally or by raising an exception.
'(λ (retn raze) (retn 1))
Why? Because 1 just produces 1; it can’t possibly raise an exception. So given the two ways of producing a value, we choose the ret way and “return” by apply retn to the value we want to return: 1.
'(λ (retn raze) (raze 1))
This is choosing to not return a value, but rather “raise” an exception by calling the raze function.
This is a lot like the by-hand transformation we did, except we now have two continuations: one to represent work to do after returning (normally) and one for work to do after raising an exception.
At the top-level, to run an expression we simply plug in appropriate definitions for retn and raze. The retn function should just produce the result, i.e. it should be (λ (x) x), while raze should signal an uncaught exception. Since our language has such a simple model of errors, we’ll just cause an error to occur, i.e. (λ (x) (add1 #f)). Let’s try our examples.
Examples
> (interp-env '((λ (retn raze) (retn 1)) (λ (x) x) (λ (x) (add1 #f))) '()) 1
> (interp-env '((λ (retn raze) (raze 1)) (λ (x) x) (λ (x) (add1 #f))) '()) 'err
What about something like (add1 e)?
Well if e returns normally, then the whole thing should produce one more than that value. If e raises an exception, then (add1 e) should raise that exception.
Suppose t where the transformed version of e, which means it is a function of two parameters: what to do if e returns and what to do if e raises.
How about something like (catch e0 (λ (x) e1))? If e0 produces a value normally, then the whole expression produces that value normally. However if e0 raises an expression then the whole expression produces whatever e1 with x bound to the raised value produces. Let t0 and t1 be the transformed versions of e0 and e1. Then transformation of the whole expressions should be
(λ (retn raze) (t0 retn (λ (x) (t1 retn raze))))
One thing to notice here is that we are running t0 with a raze function that, if called, will run t1 normally.
Guided by the examples, let’s define the transformation (note: we have to take care of avoiding unintended variable capture):
Examples
; Expr -> Expr
> (define (exn-transform e) (match e [(? integer? i) `(λ (retn raze) (retn ,i))] [(? symbol? x) (let ((retn (gensym 'retn)) (raze (gensym 'raze))) `(λ (,retn ,raze) (,retn ,x)))] [`(if ,e0 ,e1 ,e2) (let ((t0 (exn-transform e0)) (t1 (exn-transform e1)) (t2 (exn-transform e2)) (retn (gensym 'retn)) (raze (gensym 'raze))) `(λ (,retn ,raze) (,t0 (λ (x) ((if x ,t1 ,t2) ,retn ,raze)) ,raze)))] [`(,(? prim? p) ,e0) (let ((t0 (exn-transform e0)) (retn (gensym 'retn)) (raze (gensym 'raze))) `(λ (,retn ,raze) (,t0 (λ (v) (,retn (,p v))) ,raze)))] [`(,(? prim? p) ,e0 ,e1) (let ((t0 (exn-transform e0)) (t1 (exn-transform e1)) (retn (gensym 'retn)) (raze (gensym 'raze)) (v0 (gensym 'v0))) `(λ (,retn ,raze) (,t0 (λ (,v0) (,t1 (λ (v1) (,retn (,p v0 v1))) ,raze)) ,raze)))] [`(raise ,e) (let ((t (exn-transform e)) (retn (gensym 'retn)) (raze (gensym 'raze))) `(λ (,retn ,raze) (,t ,raze ,raze)))] [`(catch ,e0 (λ (,x) ,e1)) (let ((t0 (exn-transform e0)) (t1 (exn-transform e1)) (retn (gensym 'retn)) (raze (gensym 'raze))) `(λ (,retn ,raze) (,t0 ,retn (λ (,x) (,t1 ,retn ,raze)))))]))
Here’s what the transformation looks like on examples:
Examples
> (exn-transform '1) '(λ (retn raze) (retn 1))
> (exn-transform '(raise 1)) '(λ (retn5708 raze5709) ((λ (retn raze) (retn 1)) raze5709 raze5709))
> (exn-transform '(catch (raise 1) (λ (x) x))) '(λ (retn5714 raze5715) ((λ (retn5710 raze5711) ((λ (retn raze) (retn 1)) raze5711 raze5711)) retn5714 (λ (x) ((λ (retn5712 raze5713) (retn5712 x)) retn5714 raze5715))))
> (exn-transform '(catch (raise 1) (λ (x) (add1 x)))) '(λ (retn5722 raze5723) ((λ (retn5716 raze5717) ((λ (retn raze) (retn 1)) raze5717 raze5717)) retn5722 (λ (x) ((λ (retn5720 raze5721) ((λ (retn5718 raze5719) (retn5718 x)) (λ (v) (retn5720 (add1 v))) raze5721)) retn5722 raze5723))))
> (exn-transform '(catch (add1 (raise 1)) (λ (x) 1))) '(λ (retn5728 raze5729) ((λ (retn5726 raze5727) ((λ (retn5724 raze5725) ((λ (retn raze) (retn 1)) raze5725 raze5725)) (λ (v) (retn5726 (add1 v))) raze5727)) retn5728 (λ (x) ((λ (retn raze) (retn 1)) retn5728 raze5729))))
> (exn-transform '(catch (add1 (raise 1)) (λ (x) (raise x)))) '(λ (retn5738 raze5739) ((λ (retn5732 raze5733) ((λ (retn5730 raze5731) ((λ (retn raze) (retn 1)) raze5731 raze5731)) (λ (v) (retn5732 (add1 v))) raze5733)) retn5738 (λ (x) ((λ (retn5736 raze5737) ((λ (retn5734 raze5735) (retn5734 x)) raze5737 raze5737)) retn5738 raze5739))))
Now let’s give it a spin:
Examples
> (define (run e) (interp-env `(,(exn-transform e) (λ (x) x) (λ (x) (add1 #f))) '())) > (run '1) 1
> (run '(raise 1)) 'err
> (run '(catch (raise 1) (λ (x) x))) 1
> (run '(catch (raise 1) (λ (x) (add1 x)))) 2
> (run '(catch (add1 (raise 1)) (λ (x) 1))) 1
> (run '(catch (add1 (raise 1)) (λ (x) (raise x)))) 'err
> (run '(if (raise 0) 1 2)) 'err
> (run '(if (zero? 0) (raise 1) 2)) 'err
16.6 Quotation
We have seen how to interpret limited uses of quote, such as in '() and 'x, i.e. the empty list and symbols.
But we’ve also been using using quote more generally where we can write down an arbitrary constant s-expression within a quote:
Examples
> '#t #t
> '5 5
> '(1 2 3) '(1 2 3)
> '(add1 x) '(add1 x)
> '(car '(1 2 3)) '(car '(1 2 3))
> '(((1) 2) 3) '(((1) 2) 3)
> '(1 . 2) '(1 . 2)
> '("asdf" fred ((one))) '("asdf" fred ((one)))
We can understand the more general quote form as a shorthand for expressions that construct an equivalent list to the one denoted by the s-expression.
'1 is shorthand for 1,
'(1 . 2) is shorthand for (cons '1 '2), which is shorthand for (cons 1 2),
'(1 2 3) is shorthand for (cons '1 '(2 3)), which is shorthand for (cons 1 (cons '2 '(3))), which is shorthand for (cons 1 (cons 2 (cons '3 '()))), which is shorthand for (cons 1 (cons 2 (cons 3 '()))),
'() is as simple as possible (the empty list),
'x is as simple as possible (a symbol), and
5 is as simple as possible.
Guided by these examples, we can write a function that transforms the s-expression inside of a quote into an equivalent expression that only uses quote for constructing symbols and the empty list:
Examples
; S-Expr -> Expr ; Produce an expression that evaluates to given s-expression, without ; use of quote (except for symbols and empty list)
> (define (quote->expr d) (match d [(? boolean?) d] [(? integer?) d] [(? string?) d] [(? char?) d] [(? symbol?) (list 'quote d)] [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] ['() ''()])) > (quote->expr 'x) ''x
> (quote->expr 5) 5
> (quote->expr "Fred") "Fred"
> (quote->expr '(1 . 2)) '(cons 1 2)
> (quote->expr '(1 2 3)) '(cons 1 (cons 2 (cons 3 '())))
> (quote->expr '(car '(1 2 3))) '(cons 'car (cons (cons 'quote (cons (cons 1 (cons 2 (cons 3 '()))) '())) '()))
> (quote->expr '(((1) 2) 3)) '(cons (cons (cons 1 '()) (cons 2 '())) (cons 3 '()))
> (quote->expr '(1 . 2)) '(cons 1 2)
> (quote->expr '("asdf" fred ((one)))) '(cons "asdf" (cons 'fred (cons (cons (cons 'one '()) '()) '())))
We can now incorporate this into desugar to eliminate uses of compound-data quote:
Examples
; Expr+ -> Expr
> (define (desugar e+) (match e+ [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) ,(desugar e))] [(? symbol? x) x] [(? imm? i) i] [`',(? symbol? s) `',s] [`',d (quote->expr d)] [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] [`(letrec ,bs ,e0) `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) ,(desugar e0))] [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] [`(cond . ,_) (desugar (cond->if e+))] [`(and . ,_) (desugar (and->if e+))] [`(or . ,_) (desugar (or->if e+))] [`(,e . ,es) `(,(desugar e) ,@(map desugar es))]))
And now we can desugar programs such as these:
Examples
> (desugar '(map f '(1 2 3))) '(map f (cons 1 (cons 2 (cons 3 '()))))
> (desugar '(map f '(and 1 2))) '(map f (cons 'and (cons 1 (cons 2 '()))))
> (desugar '(if x '(1 . 2) 3)) '(if x (cons 1 2) 3)
And our prior interpterter will work just fine on these programs:
Examples
> (interp-env (desugar '(map f '(1 2 3))) `((map ,map) (f ,add1))) '(2 3 4)
> (interp-env (desugar '(map f '(and 1 2))) `((map ,map) (f ,identity))) '(and 1 2)
> (interp-env (desugar '(if x '(1 . 2) 3)) `((x #t))) '(1 . 2)
And:
Examples
> (interp-env (desugar ''(((1) 2) 3)) '()) '(((1) 2) 3)
> (interp-env (desugar ''(1 . 2)) '()) '(1 . 2)
> (interp-env (desugar ''("asdf" fred ((one)))) '()) '("asdf" fred ((one)))
16.7 Pattern matching
One of the most ubiquitous language features we’ve used, but not implemented, is pattern matching with the match form.
Pattern matching too can be seen as syntactic sugar since it’s easy to imagine how you could rewrite uses of match into equivalent expressions that didn’t involve match.
For example, consider the program:
; BT -> Number ; Multiply all the numbers in given binary tree (define (prod bt) (match bt ['leaf 1] [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))]))
An alternative, equivalent, formulation that doesn’t use match is the following:
; BT -> Number ; Multiply all the numbers in given binary tree (define (prod bt) (cond [(eq? 'leaf bt) 1] [(and (list? bt) (= 4 (length bt)) (eq? 'node (first bt))) (let ((v (second bt)) (l (third bt)) (r (fourth bt))) (* v (* (prod l) (prod r))))] ; corresponds to a match failure [else (add1 #f)]))
This code is less nice to read and write, but essentially does the same thing the pattern-matching code does.
In this example, each match-clause becomes a cond-clause. The question-part of each cond-clause is an expression that determines whether the corresponding pattern-part of the match-clause matches. The answer-part of each cond-clause corresponds to the expression-part of the match-clause, with an add let-form that destructures the scrutinee and binds the pattern variables of the pattern-part.
Let’s consider the following extension to the grammar of Expr+ to include a simplified version of the pattern matchin form we’ve been using:
; type Expr+ = ; .... ; | Match ; type Match = (match ,Expr+ ,(list Pat Expr+) ...) ; type Pat = ; | #t ; | #f ; | Integer ; | String ; | Variable ; | ‘_ ; | ‘'() ; | ‘(quote ,Symbol) ; | ‘(cons ,Pat ,Pat) ; | ‘(list ,Pat ...) ; | ‘(? ,Expr ,Pat ...)
A match form consists of an expression to match against, sometimes callsed the scrutinee, followed by some number of pattern-matching clauses; each one consists of a pattern and expression to evaluate should the pattern match the scrutinee’s value.
Here a pattern can either be a literal boolean, integer, string, empty list, or symbol, or a pattern variable, which matches anything and binds the value to the variable, a “wildcard” which matches anything and binds nothing, a cons pattern which matches pairs of things that match the subpatterns, list pattern which matches lists of a fixed-size where elements matches the subpatterns, or a ? pattern which matches if the results of evaluated the first subexpression applied to scrutinee produces true and all of the subpatterns match.
This doesn’t include the quasiquote-patterns we used above, but still this is a useful subset of pattern matching and allows us to write programs such as:
; BT -> Number ; Multiply all the numbers in given binary tree (define (prod bt) (match bt ['leaf 1] [(list 'node v l r) (* v (* (prod l) (prod r)))]))
As alluded to above, each pattern plays two roles: it used to determine whether the scrutinee matches the pattern, and it used to bind variable names (in the scope of the RHS expression) to sub-parts of the scrutinee when it does match.
rewrite patterns into Boolean valued expressions that answer whether the pattern matches the scrutinee,
rewrite pattern and RHS in to expressions in which the pattern variables of pattern are bound to the appropriately deconstructed parts of the scrutinee.
Assume: the scrutinee is a variable. (It’s easy to establish this assumption in general.)
We need two functions:
; Pat Variable -> Expr ; Produces an expression determining if p matches v (define (pat-match p v) ...) ; Pat Variable Expr -> Expr ; Produce an expression that deconstructs v and binds pattern variables ; of p in scope of e. ; ASSUME: v matches p (define (pat-bind p v e) ...)
Let’s turn to pat-match first.
Suppose the pattern is a literal #t. When does v match it? When v is eq? to #t.
So an expression that produces true when this pattern matches is (eq? #t v).
Handling #f, integers, characters, symbols, and the empty list is similar.
What about variables? Suppose the pattern is x. When does v match it? Always. So #t is an expression that produces true with this pattern matches.
Wildcards are the same.
What about when the pattern is a cons-pattern? Suppose the pattern is (cons p1 p2) for some patterns p1 and p2. When does v match (cons p1 p2)? When v is a pair and (car v) matches p1 and (cdr v) matches p2.
A list pattern is similar, except that the scrunity must be a list with as many elements as there are patterns, and the elements must match the corresponding subpattern.
What about a ? pattern? When does v match it? Suppose the pattern is (? even?). When does v match it? When (even? v) is true. If the pattern had a non-empty list of sub-patterns they would all need to match v, too.
We can now formulate a defintion of pat-match:
; Pat Variable -> Expr ; Produces an expression determining if p matches v (define (pat-match p v) (match p [#t `(eq? #t ,v)] [#f `(eq? #f ,v)] [(? integer? i) `(eq? ,i ,v)] [(? string? s) `(and (string? ,v) (string=? ,s ,v))] [(list 'quote '()) `(eq? '() ,v)] [(? symbol?) #t] [(list 'quote (? symbol? s)) `(eq? ,v ',s)] [(list 'cons p1 p2) (let ((v1 (gensym)) (v2 (gensym))) `(and (cons? ,v) (let ((,v1 (car ,v)) (,v2 (cdr ,v))) (and ,(pat-match p1 v1) ,(pat-match p2 v2)))))] [(cons 'list ps) `(and (list? ,v) (= (length ,v) ,(length ps)) ,(pat-match-list ps v))] [(cons '? (cons e ps)) `(and (,e ,v) ,(pats-match ps v))]))
The list-pattern case relies on a helper function pat-match-list and the ?-pattern case relies on pats-match, both defined below:
; (Listof Pat) Variable -> Expr ; Produces an expression determining if every ps matches x (define (pats-match ps v) (match ps ['() #t] [(cons p ps) `(and ,(pat-match p v) ,(pats-match ps v))])) ; (Listof Pat) Variable -> Expr ; Produces an expression determining if each ps matches each element of list v (define (pat-match-list ps v) (match ps ['() #t] [(cons p ps) (let ((v1 (gensym)) (v2 (gensym))) `(let ((,v1 (car ,v)) (,v2 (cdr ,v))) (and ,(pat-match p v1) ,(pat-match-list ps v2))))]))
Here are some examples:
Examples
> (pat-match 'leaf 'bt) #t
> (pat-match '(list 'node v l r) 'bt) '(and (list? bt) (= (length bt) 4) (let ((g5805 (car bt)) (g5806 (cdr bt))) (and (eq? g5805 'node) (let ((g5807 (car g5806)) (g5808 (cdr g5806))) (and #t (let ((g5809 (car g5808)) (g5810 (cdr g5808))) (and #t (let ((g5811 (car g5810)) (g5812 (cdr g5810))) (and #t #t)))))))))
> (pat-match '(list 'node (? even? v) l r) 'bt) '(and (list? bt) (= (length bt) 4) (let ((g5813 (car bt)) (g5814 (cdr bt))) (and (eq? g5813 'node) (let ((g5815 (car g5814)) (g5816 (cdr g5814))) (and (and (even? g5815) (and #t #t)) (let ((g5817 (car g5816)) (g5818 (cdr g5816))) (and #t (let ((g5819 (car g5818)) (g5820 (cdr g5818))) (and #t #t)))))))))
These aren’t very readable programs that emerge, however, we check that they’re doing the right thing. Note that the elaboration requires a few functions to be available, such as list?, and length. We make these available in an initial environment:
Examples
> (define env0 `((length ,length) (list? ,list?))) > (interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 'leaf 'bt))) env0) #t
> (interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 8 'bt))) env0) #f
> (interp-env (desugar `(let ((bt '(node 1 leaf leaf))) ,(pat-match '(list 'node v l r) 'bt))) env0) #t
> (interp-env (desugar `(let ((bt '(node 1 leaf leaf))) ,(pat-match '(list 'node (? zero?) l r) 'bt))) env0) #f
> (interp-env (desugar `(let ((bt '(node 0 leaf leaf))) ,(pat-match '(list 'node (? zero?) l r) 'bt))) env0) #t
Now moving on to pat-bind, it follows a similar structure, but we always assume the given pattern matches the scrutinee. The addition Expr argument represents the right-hand-side expression of the match-clause. The idea is that the pattern variables of p are bound to sub-parts of v in e.
; Pat Variable Expr -> Expr ; Produce an expression that deconstructs v and binds pattern variables ; of p in scope of e. ; ASSUME: v matches p (define (pat-bind p v e) (match p [#t e] [#f e] [(? integer?) e] [(? string?) e] [(list 'quote '()) e] ['_ e] [(? symbol? x) `(let ((,x ,v)) ,e)] [(list 'quote (? symbol?)) e] [(list 'cons p1 p2) (let ((v1 (gensym)) (v2 (gensym))) `(let ((,v1 (car ,v)) (,v2 (cdr ,v))) ,(pat-bind p1 v1 (pat-bind p2 v2 e))))] [(cons 'list ps) (pat-bind-list ps v e)] [(cons '? (cons _ ps)) (pats-bind ps v e)]))
Here are some examples:
Examples
> (pat-bind 'leaf 'bt 'bt) '(let ((leaf bt)) bt)
> (pat-bind '(list 'node v l r) 'bt 'v) '(let ((g5845 (car bt)) (g5846 (cdr bt))) (let ((g5847 (car g5846)) (g5848 (cdr g5846))) (let ((v g5847)) (let ((g5849 (car g5848)) (g5850 (cdr g5848))) (let ((l g5849)) (let ((g5851 (car g5850)) (g5852 (cdr g5850))) (let ((r g5851)) v)))))))
> (pat-bind '(list 'node (? even? v) l r) 'bt 'v) '(let ((g5853 (car bt)) (g5854 (cdr bt))) (let ((g5855 (car g5854)) (g5856 (cdr g5854))) (let ((v g5855)) (let ((g5857 (car g5856)) (g5858 (cdr g5856))) (let ((l g5857)) (let ((g5859 (car g5858)) (g5860 (cdr g5858))) (let ((r g5859)) v)))))))
These are tough to read, but we can confirm what they compute:
Examples
> (interp-env (desugar `(let ((bt '(node 0 leaf leaf))) ,(pat-bind '(list 'node (? zero? z) l r) 'bt 'z))) '()) 0
Putting the pieces together, we can now write a match->cond function that rewrites a match-expression into a cond-expression:
; Match -> Expr ; Rewrite match expression into an equivalent cond expression (define (match->cond m) (match m [(cons 'match (cons e mcs)) (let ((x (gensym))) `(let ((,x ,e)) (cond ,@(map (λ (mc) (match mc [(list p e) (list (pat-match p x) (pat-bind p x e))])) mcs) ; fall through to error [else (add1 #f)])))]))
Examples
> (match->cond '(match '(node 2 leaf leaf) ['leaf 0] [(list 'node v l r) v])) '(let ((g5869 '(node 2 leaf leaf))) (cond ((eq? g5869 'leaf) 0) ((and (list? g5869) (= (length g5869) 4) (let ((g5870 (car g5869)) (g5871 (cdr g5869))) (and (eq? g5870 'node) (let ((g5872 (car g5871)) (g5873 (cdr g5871))) (and #t (let ((g5874 (car g5873)) (g5875 (cdr g5873))) (and #t (let ((g5876 (car g5875)) (g5877 (cdr g5875))) (and #t #t))))))))) (let ((g5878 (car g5869)) (g5879 (cdr g5869))) (let ((g5880 (car g5879)) (g5881 (cdr g5879))) (let ((v g5880)) (let ((g5882 (car g5881)) (g5883 (cdr g5881))) (let ((l g5882)) (let ((g5884 (car g5883)) (g5885 (cdr g5883))) (let ((r g5884)) v)))))))) (else (car '()))))
Finally, we can incorporate match->cond into desugar:
Examples
; Expr+ -> Expr
(define (desugar e+) (match e+ [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) ,(desugar e))] [(? symbol? x) x] [(? imm? i) i] [`',(? symbol? s) `',s] [`',d (quote->expr d)] [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] [`(letrec ,bs ,e0) `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) ,(desugar e0))] [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] [`(cond . ,_) (desugar (cond->if e+))] [`(and . ,_) (desugar (and->if e+))] [`(or . ,_) (desugar (or->if e+))] [`(match . ,_) (desugar (match->cond e+))] ; new [`(,e . ,es) `(,(desugar e) ,@(map desugar es))]))
Now we can interpret programs such as this:
Examples
> (interp-env (desugar '(begin (define (prod bt) (match bt ['leaf 1] [(list 'node v l r) (* v (* (prod l) (prod r)))])) (prod '(node 3 (node 4 leaf leaf) leaf)))) `((* ,*) (list? ,list?) (length ,length))) 12