diff src/Firstorder.v @ 249:0f45421cae21

Prose for LocallyNameless
author Adam Chlipala <adamc@hcoop.net>
date Fri, 11 Dec 2009 16:07:27 -0500
parents 8bd90fe41acd
children 76043a960a38
line wrap: on
line diff
--- a/src/Firstorder.v	Fri Dec 11 15:00:42 2009 -0500
+++ b/src/Firstorder.v	Fri Dec 11 16:07:27 2009 -0500
@@ -572,6 +572,12 @@
 
 (** * Locally Nameless Syntax *)
 
+(** The most popular Coq syntax encoding today is the %\textit{%#<i>#locally nameless#</i>#%}% style, which has been around for a while but was popularized recently by Aydemir et al., following a methodology summarized in their paper "Engineering Formal Metatheory."  #<a href="http://www.cis.upenn.edu/~plclub/oregon08/">#A specialized tutorial by that group#</a>#%\footnote{\url{http://www.cis.upenn.edu/~plclub/oregon08/}}% explains the approach, based on a library.  In this section, we will build up all of the necessary ingredients from scratch.
+
+   The one-sentence summary of locally nameless encoding is that we represent free variables as concrete syntax does, and we represent bound variables with de Bruijn indices.  Many proofs involve reasoning about terms transplanted into different free variable contexts; concrete encoding of free variables means that, to perform such a transplanting, we need no fix-up operation to adjust de Bruijn indices.  At the same time, use of de Bruijn indices for local variables gives us canonical representations of expressions, with respect to the usual convention of alpha-equivalence.  This makes many operations, including substitution of open terms in open terms, easier to implement.
+
+   The "Engineering Formal Metatheory" methodology involves a number of subtle design decisions, which we will describe as they appear in the latest version of our running example. *)
+
 Module LocallyNameless.
 
   Definition free_var := string.
@@ -584,18 +590,20 @@
   | App : exp -> exp -> exp
   | Abs : exp -> exp.
 
+  (** Note the different constructors for free vs. bound variables, and note that the lack of a variable annotation on [Abs] nodes is inherited from the de Bruijn convention. *)
+
   Inductive type : Set :=
   | Bool : type
   | Arrow : type -> type -> type.
 
   Infix "-->" := Arrow (right associativity, at level 60).
 
+  (** As typing only depends on types of free variables, our contexts borrow their form from the concrete binding example.  *)
+
   Definition ctx := list (free_var * type).
 
   Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
 
-  Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
-
   Inductive lookup : ctx -> free_var -> type -> Prop :=
   | First : forall x t G,
     (x, t) :: G |-v x : t
@@ -608,7 +616,7 @@
 
   Hint Constructors lookup.
 
-  Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level).
+  (** The first unusual operation we need is %\textit{%#<i>#opening#</i>#%}%, where we replace a particular bound variable with a particular free variable.  Whenever we "go under a binder," in the typing judgment or elsewhere, we choose a new free variable to replace the old bound variable of the binder.  Opening implements the replacement of one by the other.  It is like a specialized version of the substitution function we used for pure de Bruijn terms. *)
 
   Section open.
     Variable x : free_var.
@@ -628,6 +636,8 @@
       end.
   End open.
 
+  (** We will also need to reason about an expression's set of free variables.  To keep things simple, we represent sets as lists that may contain duplicates.  Note how much easier this operation is to implement than over pure de Bruijn terms, since we do not need to maintain a separate numeric argument that keeps track of how deeply we have descended into the input expression. *)
+
   Fixpoint freeVars (e : exp) : list free_var :=
     match e with
       | Const _ => nil
@@ -637,6 +647,21 @@
       | Abs e1 => freeVars e1
     end.
 
+  (** It will be useful to have a well-formedness judgment for our terms.  This notion is called %\textit{%#<i>#local closure#</i>#%}%.  An expression may be declared to be closed, up to a particular maximum de Bruijn index. *)
+
+  Inductive lclosed : nat -> exp -> Prop :=
+  | CConst : forall n b, lclosed n (Const b)
+  | CFreeVar : forall n v, lclosed n (FreeVar v)
+  | CBoundVar : forall n v, v < n -> lclosed n (BoundVar v)
+  | CApp : forall n e1 e2, lclosed n e1 -> lclosed n e2 -> lclosed n (App e1 e2)
+  | CAbs : forall n e1, lclosed (S n) e1 -> lclosed n (Abs e1).
+
+  Hint Constructors lclosed.
+
+  (** Now we are ready to define the typing judgment. *)
+
+  Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level).
+
   Inductive hasType : ctx -> exp -> type -> Prop :=
   | TConst : forall G b,
     G |-e Const b : Bool
@@ -648,13 +673,19 @@
     -> G |-e e2 : dom
     -> G |-e App e1 e2 : ran
   | TAbs : forall G e' dom ran L,
-    (forall x, ~In x L -> (x, dom) :: G |-e open x O e' : ran)
+    (forall x, ~ In x L -> (x, dom) :: G |-e open x O e' : ran)
     -> G |-e Abs e' : dom --> ran
 
     where "G |-e e : t" := (hasType G e t).
 
+  (** Compared to the previous versions, only the [TAbs] rule is surprising.  The rule uses %\textit{%#<i>#co-finite quantiifcation#</i>#%}%.  That is, the premise of the rule quantifies over all [x] values that are not members of a finite set [L].  A proof may choose any value of [L] when applying [TAbs].  An alternate, more intuitive version of the rule would fix [L] to be [freeVars e'].  It turns out that the greater flexibility of the rule above simplifies many proofs significantly.  This typing judgment may be proved equivalent to the more intuitive version, though we will not carry out the proof here.
+
+     Specifially, what our version of [TAbs] says is that, to prove that [Abs e'] has a function type, we must prove that any opening of [e'] with a variable not in [L] has the proper type.  For each [x] choice, we extend the context [G] in the usual way. *)
+
   Hint Constructors hasType.
 
+  (** We prove a standard weakening theorem for typing, adopting a more general form than in the previous sections. *)
+
   Lemma lookup_push : forall G G' x t x' t',
     (forall x t, G |-v x : t -> G' |-v x : t)
     -> (x, t) :: G |-v x' : t'
@@ -673,20 +704,15 @@
 
   Hint Resolve weaken_hasType.
 
-  Inductive lclosed : nat -> exp -> Prop :=
-  | CConst : forall n b, lclosed n (Const b)
-  | CFreeVar : forall n v, lclosed n (FreeVar v)
-  | CBoundVar : forall n v, v < n -> lclosed n (BoundVar v)
-  | CApp : forall n e1 e2, lclosed n e1 -> lclosed n e2 -> lclosed n (App e1 e2)
-  | CAbs : forall n e1, lclosed (S n) e1 -> lclosed n (Abs e1).
-
-  Hint Constructors lclosed.
+  (** We define a simple extension of [crush] to apply in many of the lemmas that follow. *)
 
   Ltac ln := crush;
     repeat (match goal with
               | [ |- context[if ?E then _ else _] ] => destruct E
               | [ _ : context[if ?E then _ else _] |- _ ] => destruct E
             end; crush); eauto.
+
+  (** Two basic properties of local closure will be useful later. *)
   
   Lemma lclosed_S : forall x e n,
     lclosed n (open x n e)
@@ -706,6 +732,8 @@
   Hint Resolve lclosed_weaken.
   Hint Extern 1 (_ >= _) => omega.
 
+  (** To prove some further properties, we need the ability to choose a variable that is disjoint from a particular finite set.  We implement a specific choice function [fresh]; its details do not matter, as all we need is the final theorem about it, [freshOk].  Concretely, to choose a variable disjoint from set [L], we sum the lengths of the variable names in [L] and choose a new variable name that is one longer than that sum.  This variable can be the string ["x"], followed by a number of primes equal to the sum. *)
+
   Open Scope string_scope.
   
   Fixpoint primes (n : nat) : string :=
@@ -722,12 +750,15 @@
 
   Definition fresh (L : list free_var) := primes (sumLengths L).
 
+  (** A few lemmas suffice to establish the correctness theorem [freshOk] for [fresh]. *)
+
   Theorem freshOk' : forall x L, String.length x > sumLengths L
-    -> ~In x L.
+    -> ~ In x L.
     induction L; crush.
   Qed.
 
-  Lemma length_app : forall s2 s1, String.length (s1 ++ s2) = String.length s1 + String.length s2.
+  Lemma length_app : forall s2 s1,
+    String.length (s1 ++ s2) = String.length s1 + String.length s2.
     induction s1; crush.
   Qed.
 
@@ -739,18 +770,22 @@
 
   Hint Rewrite length_primes : cpdt.
 
-  Theorem freshOk : forall L, ~In (fresh L) L.
+  Theorem freshOk : forall L, ~ In (fresh L) L.
     intros; apply freshOk'; unfold fresh; crush.
   Qed.
 
   Hint Resolve freshOk.
 
+  (** Now we can prove that well-typedness implies local closure.  [fresh] will be used for us automatically by [eauto] in the [Abs] case, driven by the presence of [freshOk] as a hint. *)
+
   Lemma hasType_lclosed : forall G e t,
     G |-e e : t
     -> lclosed O e.
     induction 1; eauto.
   Qed.
 
+  (** An important consequence of local closure is that certain openings are idempotent. *)
+
   Lemma lclosed_open : forall n e, lclosed n e
     -> forall x, open x n e = e.
     induction 1; ln.
@@ -760,6 +795,8 @@
 
   Open Scope list_scope.
 
+  (** We are now almost ready to get down to the details of substitution.  First, we prove six lemmas related to treating lists as sets. *)
+
   Lemma In_cons1 : forall T (x x' : T) ls,
     x = x'
     -> In x (x' :: ls).
@@ -796,6 +833,8 @@
 
   Hint Resolve In_cons1 In_cons2 In_app1 In_app2.
 
+  (** Now we can define our simplest substitution function yet, thanks to the fact that we only subsitute for free variables, which are distinguished syntactically from bound variables. *)
+
   Section subst.
     Hint Resolve freshOk_app1 freshOk_app2.
 
@@ -813,6 +852,8 @@
 
     Variable xt : type.
 
+    (** It comes in handy to define disjointness of a variable and a context differently than in previous examples.  We use the standard list function [map], as well as the function [fst] for projecting the first element of a pair.  We write [@fst _ _] rather than just [fst] to ask that [fst]'s implicit arguments be instantiated with inferred values. *)
+
     Definition disj x (G : ctx) := In x (map (@fst _ _) G) -> False.
     Infix "#" := disj (no associativity, at level 90).
 
@@ -821,6 +862,8 @@
         | [ _ : _ :: _ = ?G0 ++ _ |- _ ] => destruct G0
       end; crush; eauto.
 
+    (** Some basic properties of variable lookup will be needed on the road to our usual theorem connecting substitution and typing. *)
+
     Lemma lookup_disj' : forall t G1,
       G1 |-v x : t
       -> forall G, x # G
@@ -838,9 +881,9 @@
 
     Lemma lookup_ne' : forall G1 v t,
       G1 |-v v : t
-        -> forall G, G1 = G ++ (x, xt) :: nil
-          -> v <> x
-          -> G |-v v : t.
+      -> forall G, G1 = G ++ (x, xt) :: nil
+        -> v <> x
+        -> G |-v v : t.
       induction 1; disj.
     Qed.
 
@@ -859,6 +902,8 @@
 
     Hint Extern 1 (@eq exp _ _) => f_equal.
 
+    (** We need to know that substitution and opening commute under appropriate circumstances. *)
+
     Lemma open_subst : forall x0 e' n,
       lclosed n e1
       -> x <> x0
@@ -866,6 +911,8 @@
       induction e'; ln.
     Qed.
 
+    (** We state a corollary of the last result which will work more smoothly with [eauto]. *)
+
     Lemma hasType_open_subst : forall G x0 e t,
       G |-e subst (open x0 0 e) : t
       -> x <> x0
@@ -876,6 +923,8 @@
 
     Hint Resolve hasType_open_subst.
 
+    (** Another lemma establishes the validity of weakening variable lookup judgments with fresh variables. *)
+
     Lemma disj_push : forall x0 (t : type) G,
       x # G
       -> x <> x0
@@ -887,7 +936,7 @@
 
     Lemma lookup_cons : forall x0 dom G x1 t,
       G |-v x1 : t
-      -> ~In x0 (map (@fst _ _) G)
+      -> ~ In x0 (map (@fst _ _) G)
       -> (x0, dom) :: G |-v x1 : t.
       induction 1; crush;
         match goal with
@@ -898,12 +947,16 @@
     Hint Resolve lookup_cons.
     Hint Unfold disj.
 
+    (** Finally, it is useful to state a version of the [TAbs] rule specialized to the choice of [L] that is useful in our main substitution proof. *)
+
     Lemma TAbs_specialized : forall G e' dom ran L x1,
-      (forall x, ~In x (x1 :: L ++ map (@fst _ _) G) -> (x, dom) :: G |-e open x O e' : ran)
+      (forall x, ~ In x (x1 :: L ++ map (@fst _ _) G) -> (x, dom) :: G |-e open x O e' : ran)
       -> G |-e Abs e' : dom --> ran.
       eauto.
     Qed.
 
+    (** Now we can prove the main inductive lemma in a manner similar to what worked for concrete binding. *)
+
     Lemma hasType_subst' : forall G1 e t,
       G1 |-e e : t
       -> forall G, G1 = G ++ (x, xt) :: nil
@@ -916,7 +969,9 @@
             apply TAbs_specialized with L x; eauto 20
         end.
     Qed.
-      
+
+    (** The main theorem about substitution of closed expressions follows easily. *)
+
     Theorem hasType_subst : forall e t,
       (x, xt) :: nil |-e e : t
       -> nil |-e e1 : xt
@@ -927,18 +982,10 @@
 
   Hint Resolve hasType_subst.
 
+  (** We can define the operational semantics in almost the same way as in previous examples. *)
+
   Notation "[ x ~> e1 ] e2" := (subst x e1 e2) (no associativity, at level 60).
 
-  Lemma alpha_open : forall x1 x2 e1 e2 n,
-    ~In x1 (freeVars e2)
-    -> ~In x2 (freeVars e2)
-    -> [x1 ~> e1](open x1 n e2) = [x2 ~> e1](open x2 n e2).
-    induction e2; crush;
-      repeat (match goal with
-                | [ |- context[if ?E then _ else _] ] => destruct E
-              end; crush).
-  Qed.
-
   Inductive val : exp -> Prop :=
   | VConst : forall b, val (Const b)
   | VAbs : forall e, val (Abs e).
@@ -950,7 +997,7 @@
   Inductive step : exp -> exp -> Prop :=
   | Beta : forall e1 e2 x,
     val e2
-    -> ~In x (freeVars e1)
+    -> ~ In x (freeVars e1)
     -> App (Abs e1) e2 ==> [x ~> e2] (open x O e1)
   | Cong1 : forall e1 e2 e1',
     e1 ==> e1'
@@ -964,6 +1011,10 @@
 
   Hint Constructors step.
 
+  (** The only interesting change is that the [Beta] rule requires identifying a fresh variable [x] to use in opening the abstraction body.  We could have avoided this by implementing a more general [open] that allows substituting expressions for variables, not just variables for variables, but it simplifies the proofs to have just one general substitution function.
+
+     Now we are ready to prove progress and preservation.  The same proof script from the last examples suffices to prove progress, though significantly different lemmas are applied for us by [eauto]. *)
+
   Lemma progress' : forall G e t, G |-e e : t
     -> G = nil
     -> val e \/ exists e', e ==> e'.
@@ -981,8 +1032,19 @@
     intros; eapply progress'; eauto.
   Qed.
 
+  (** To establish preservation, it is useful to formalize a principle of sound alpha-variation.  In particular, when we open an expression with a particular variable and then immediately substitute for the same variable, we can replace that variable with any other that is not free in the body of the opened expression. *)
+
+  Lemma alpha_open : forall x1 x2 e1 e2 n,
+    ~ In x1 (freeVars e2)
+    -> ~ In x2 (freeVars e2)
+    -> [x1 ~> e1](open x1 n e2) = [x2 ~> e1](open x2 n e2).
+    induction e2; ln.
+  Qed.
+
   Hint Resolve freshOk_app1 freshOk_app2.
 
+  (** Again it is useful to state a direct corollary which is easier to apply in proof search. *)
+
   Lemma hasType_alpha_open : forall G L e0 e2 x t,
     ~ In x (freeVars e0)
     -> G |-e [fresh (L ++ freeVars e0) ~> e2](open (fresh (L ++ freeVars e0)) 0 e0) : t
@@ -992,6 +1054,8 @@
 
   Hint Resolve hasType_alpha_open.
 
+  (** Now the previous sections' preservation proof scripts finish the job. *)
+
   Lemma preservation' : forall G e t, G |-e e : t
     -> G = nil
     -> forall e', e ==> e'