changes

[lambda.git] / cps.mdwn
diff --git a/cps.mdwn b/cps.mdwn

index 605e86a..259f412 100644 (file)
--- a/cps.mdwn
+++ b/cps.mdwn
@@ -1,61 +1,215 @@
-;; call-by-value CPS
-; see Dancy and Filinski, "Representing control: a study of the CPS transformation" (1992)
-; and Sabry, "Note on axiomatizing the semantics of control operators" (1996)
-
-; [x] = var x
-let var = \x (\k. k x) in
-; [\x. body] = lam (\x. [body])
-let lam = \x_body (\k. k (\x. x_body x)) in
-; [M N] = app [M] [N]
-let app = \m n. (\k. m (\m. n (\n. m n k))) in
-
-; helpers
-let app3 = \a b c. app (app a b) c in
-let app4 = \a b c d. app (app (app a b) c) d in
-; [succ] = op1 succ
-let op1 = \op. \u. u (\a k. k (op a)) in
-; [plus] = op2 plus
-let op2 = \op. \u. u (\a v. v (\b k. k (op a b))) in
-let op3 = \op. \u. u (\a v. v (\b w. w (\c k. k (op a b c)))) in
-
-;; continuation operators
-; [let/cc k M] = letcc (\k. [M])
-let callcc = \k. k (\f u. (\j. f j u) (\y w. u y)) in
-let letcc = \x_body. app callcc (lam x_body) in
-let letcc = \k_body. \k. (\j. (k_body j) k) (\y w. k y) in
-
-; [abort M] = abort [M]
-let abort = \body. \k. body (\m m) in
-; [prompt M] = prompt [M]
-let prompt = \body. \k. k (body (\m m)) in
-; [shift k M] = shift (\k. [M])
-let shift = \k_body. \k. (\j. (k_body j) (\m m)) (\y w. w (k y)) in
-
-;; examples
-; (+ 100 (let/cc k (+ 10 1))) ~~> 111
-; app3 (op2 plus) (var hundred) (letcc (\k. app3 (op2 plus) (var ten) (var one)))
-
-; (+ 100 (let/cc k (+ 10 (k 1)))) ~~> 101
-; app3 (op2 plus) (var hundred) (letcc (\k. app3 (op2 plus) (var ten) (app (var k) (var one))))
-
-; (+ 100 (+ 10 (abort 1))) ~~> 1
-; app3 (op2 plus) (var hundred) (app3 (op2 plus) (var ten) (abort (var one)))
-
-; (+ 100 (prompt (+ 10 (abort 1)))) ~~> 101
-; app3 (op2 plus) (var hundred) (prompt (app3 (op2 plus) (var ten) (abort (var one))))
-
-; (+ 1000 (prompt (+ 100 (shift k (+ 10 1))))) ~~> 1011
-; app3 (op2 plus) (var thousand) (prompt (app3 (op2 plus) (var hundred) (shift (\k. ((op2 plus) (var ten) (var one))))))
-
-; (+ 1000 (prompt (+ 100 (shift k (k (+ 10 1)))))) ~~> 1111
-; app3 (op2 plus) (var thousand) (prompt (app3 (op2 plus) (var hundred) (shift (\k. (app (var k) ((op2 plus) (var ten) (var one)))))))
-
-; (+ 1000 (prompt (+ 100 (shift k (+ 10 (k 1)))))) ~~> 1111 but added differently
-; app3 (op2 plus) (var thousand) (prompt (app3 (op2 plus) (var hundred) (shift (\k. ((op2 plus) (var ten) (app (var k) (var one)))))))
-
-; (+ 100 ((prompt (+ 10 (shift k k))) 1)) ~~> 111
-; app3 (op2 plus) (var hundred) (app (prompt (app3 (op2 plus) (var ten) (shift (\k. (var k))))) (var one))
-
-; (+ 100 (prompt (+ 10 (shift k (k (k 1)))))) ~~> 121
-; app3 (op2 plus) (var hundred) (prompt (app3 (op2 plus) (var ten) (shift (\k. app (var k) (app (var k) (var one))))))
+Gaining control over order of evaluation
+----------------------------------------
+
+We know that evaluation order matters.  We're beginning to learn how
+to gain some control over order of evaluation (think of Jim's abort handler).
+We continue to reason about order of evaluation.
+
+A lucid discussion of evaluation order in the
+context of the lambda calculus can be found here:
+[Sestoft: Demonstrating Lambda Calculus Reduction](http://www.itu.dk/~sestoft/papers/mfps2001-sestoft.pdf).
+Sestoft also provides a lovely on-line lambda evaluator:
+[Sestoft: Lambda calculus reduction workbench](http://www.itu.dk/~sestoft/lamreduce/index.html),
+which allows you to select multiple evaluation strategies, 
+and to see reductions happen step by step.
+
+Evaluation order matters
+------------------------
+
+We've seen this many times.  For instance, consider the following
+reductions.  It will be convenient to use the abbreviation `w =
+\x.xx`.  I'll indicate which lambda is about to be reduced with a *
+underneath:
+
+<pre>
+(\x.y)(ww)
+ *
+y
+</pre>
+
+Done!  We have a normal form.  But if we reduce using a different
+strategy, things go wrong:
+
+<pre>
+(\x.y)(ww) =
+(\x.y)((\x.xx)w) =
+        *
+(\x.y)(ww) =
+(\x.y)((\x.xx)w) =
+        *
+(\x.y)(ww) 
+</pre>
+
+Etc.  
+
+As a second reminder of when evaluation order matters, consider using
+`Y = \f.(\h.f(hh))(\h.f(hh))` as a fixed point combinator to define a recursive function:
+
+<pre>
+Y (\f n. blah) =
+(\f.(\h.f(hh))(\h.f(hh))) (\f n. blah) 
+     *
+(\f.f((\h.f(hh))(\h.f(hh)))) (\f n. blah) 
+       *
+(\f.f(f((\h.f(hh))(\h.f(hh))))) (\f n. blah) 
+         *
+(\f.f(f(f((\h.f(hh))(\h.f(hh)))))) (\f n. blah) 
+</pre>
+
+And we never get the recursion off the ground.
+
+
+Using a Continuation Passing Style transform to control order of evaluation
+---------------------------------------------------------------------------
+
+We'll present a technique for controlling evaluation order by transforming a lambda term
+using a Continuation Passing Style transform (CPS), then we'll explore
+what the CPS is doing, and how.
+
+In order for the CPS to work, we have to adopt a new restriction on
+beta reduction: beta reduction does not occur underneath a lambda.
+That is, `(\x.y)z` reduces to `z`, but `\w.(\x.y)z` does not, because
+the `\w` protects the redex in the body from reduction.  
+(A redex is a subform ...(\xM)N..., i.e., something that can be the
+target of beta reduction.)
+
+Start with a simple form that has two different reduction paths:
+
+reducing the leftmost lambda first: `(\x.y)((\x.z)w)  ~~> y`
+
+reducing the rightmost lambda first: `(\x.y)((\x.z)w)  ~~> (x.y)z ~~> y`
+
+After using the following call-by-name CPS transform---and assuming
+that we never evaluate redexes protected by a lambda---only the first
+reduction path will be available: we will have gained control over the
+order in which beta reductions are allowed to be performed.
+
+Here's the CPS transform:
+
+    [x] => x
+    [\xM] => \k.k(\x[M])
+    [MN] => \k.[M](\m.m[N]k)
+
+Here's the result of applying the transform to our problem term:
+
+    [(\x.y)((\x.z)w)]
+    \k.[\x.y](\m.m[(\x.z)w]k)
+    \k.(\k.k(\x.[y]))(\m.m(\k.[\x.z](\m.m[w]k))k)
+    \k.(\k.k(\x.y))(\m.m(\k.(\k.k(\x.z))(\m.mwk))k)
+
+Because the initial `\k` protects the entire transformed term, 
+we can't perform any reductions.  In order to see the computation
+unfold, we have to apply the transformed term to a trivial
+continuation, usually the identity function `I = \x.x`.
+
+    [(\x.y)((\x.z)w)] I
+    \k.[\x.y](\m.m[(\x.z)w]k) I
+    [\x.y](\m.m[(\x.z)w] I)
+    (\k.k(\x.y))(\m.m[(\x.z)w] I)
+    (\x.y)[(\x.z)w] I
+    y I
+
+The application to `I` unlocks the leftmost functor.  Because that
+functor (`\x.y`) throws away its argument, we never need to expand the
+CPS transform of the argument.
+
+Compare with a call-by-value xform:
+
+    <x> => \k.kx
+    <\aM> => \k.k(\a<M>)
+    <MN> => \k.<M>(\m.<N>(\n.mnk))
+
+This time the reduction unfolds in a different manner:
+
+    <(\x.y)((\x.z)w)> I
+    (\k.<\x.y>(\m.<(\x.z)w>(\n.mnk))) I
+    <\x.y>(\m.<(\x.z)w>(\n.mnI))
+    (\k.k(\x.<y>))(\m.<(\x.z)w>(\n.mnI))
+    <(\x.z)w>(\n.(\x.<y>)nI)
+    (\k.<\x.z>(\m.<w>(\n.mnk)))(\n.(\x.<y>)nI)
+    <\x.z>(\m.<w>(\n.mn(\n.(\x.<y>)nI)))
+    (\k.k(\x.<z>))(\m.<w>(\n.mn(\n.(\x.<y>)nI)))
+    <w>(\n.(\x.<z>)n(\n.(\x.<y>)nI))
+    (\k.kw)(\n.(\x.<z>)n(\n.(\x.<y>)nI))
+    (\x.<z>)w(\n.(\x.<y>)nI)
+    <z>(\n.(\x.<y>)nI)
+    (\k.kz)(\n.(\x.<y>)nI)
+    (\x.<y>)zI
+    <y>I
+    (\k.ky)I
+    I y
+
+Both xforms make the following guarantee: as long as redexes
+underneath a lambda are never evaluated, there will be at most one
+reduction available at any step in the evaluation.
+That is, all choice is removed from the evaluation process.
+
+Questions and excercises:
+
+1. Why is the CBN xform for variables `[x] = x' instead of something
+involving kappas?  
+
+2. Write an Ocaml function that takes a lambda term and returns a
+CPS-xformed lambda term.  You can use the following data declaration:
+
+    type form = Var of char | Abs of char * form | App of form * form;;
+
+3. What happens (in terms of evaluation order) when the application
+rule for CBN CPS is changed to `[MN] = \k.[N](\n.[M]nk)`?  Likewise,
+What happens when the application rule for CBV CPS is changed to `<MN>
+= \k.[N](\n.[M](\m.mnk))`?
+
+4. What happens when the application rules for the CPS xforms are changed to
+
+    [MN] = \k.<M>(\m.m<N>k)
+    <MN> = \k.[M](\m.[N](\n.mnk))
+
+
+Thinking through the types
+--------------------------
+
+This discussion is based on [Meyer and Wand 1985](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.44.7943&rep=rep1&type=pdf).
+
+Let's say we're working in the simply-typed lambda calculus.
+Then if the original term is well-typed, the CPS xform will also be
+well-typed.  But what will the type of the transformed term be?
+
+The transformed terms all have the form `\k.blah`.  The rule for the
+CBN xform of a variable appears to be an exception, but instead of
+writing `[x] => x`, we can write `[x] => \k.xk`, which is
+eta-equivalent.  The `k`'s are continuations: functions from something
+to a result.  Let's use &sigma; as the result type.  The each `k` in
+the transform will be a function of type &rho; --> &sigma; for some
+choice of &rho;.
+
+We'll need an ancilliary function ': for any ground type a, a' = a;
+for functional types a->b, (a->b)' = a' -> (b' -> o) -> o.
+
+    Call by name transform
+
+    Terms                            Types
+
+    [x] => \k.xk                     [a] => (a'->o)->o
+    [\xM] => \k.k(\x[M])             [a->b] => ((a->b)'->o)->o
+    [MN] => \k.[M](\m.m[N]k)         [b] => (b'->o)->o
+
+Remember that types associate to the right.  Let's work through the
+application xform and make sure the types are consistent.  We'll have
+the following types:
+
+    M:a->b
+    N:a
+    MN:b 
+    k:b'->o
+    [N]:a'
+    m:a'->(b'->o)->o
+    m[N]:(b'->o)->o
+    m[N]k:o 
+    [M]:((a->b)'->o)->o = ((a'->(b'->o)->o)->o)->o
+    [M](\m.m[N]k):o
+    [MN]:(b'->o)->o
+
+Note that even though the transform uses the same symbol for the
+translation of a variable, in general it will have a different type in
+the transformed term.