changeset 245:4b001a611e79

Prose for old part of Firstorder
author Adam Chlipala <adamc@hcoop.net>
date Wed, 09 Dec 2009 15:26:22 -0500
parents 0400fa005d5a
children cca30734ab40
files src/Firstorder.v
diffstat 1 files changed, 81 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/Firstorder.v	Wed Dec 09 14:12:33 2009 -0500
+++ b/src/Firstorder.v	Wed Dec 09 15:26:22 2009 -0500
@@ -20,16 +20,24 @@
 
    \chapter{First-Order Abstract Syntax}% *)
 
-(** TODO: Prose for this chapter *)
+(** Many people interested in interactive theorem-proving want to prove theorems about programming languages.  That domain also provides a good setting for demonstrating how to apply the ideas from the earlier parts of this book.  This part introduces some techniques for encoding the syntax and semantics of programming languages, along with some example proofs designed to be as practical as possible, rather than to illustrate basic Coq technique.
+
+   To prove anything about a language, we must first formalize the language's syntax.  We have a broad design space to choose from, and it makes sense to start with the simplest options, so-called %\textit{%#<i>#first-order#</i>#%}% syntax encodings that do not use dependent types.  These encodings are first-order because they do not use Coq function types in a critical way.  In this chapter, we consider the most popular first-order encodings, using each to prove a basic type soundness theorem. *)
 
 
 (** * Concrete Binding *)
 
+(** The most obvious encoding of the syntax of programming languages follows usual context-free grammars literally.  We represent variables as strings and include a variable in our AST definition wherever a variable appears in the informal grammar.  Concrete binding turns out to involve a surprisingly large amount of menial bookkeeping, especially when we encode higher-order languages with nested binder scopes.  This section's example should give a flavor of what is required. *)
+
 Module Concrete.
 
+  (** We need our variable type and its decidable equality operation. *)
+
   Definition var := string.
   Definition var_eq := string_dec.
 
+  (** We will formalize basic simply-typed lambda calculus.  The syntax of expressions and types follows what we would write in a context-free grammar. *)
+
   Inductive exp : Set :=
   | Const : bool -> exp
   | Var : var -> exp
@@ -40,12 +48,20 @@
   | Bool : type
   | Arrow : type -> type -> type.
 
+  (** It is useful to define a syntax extension that lets us write function types in more standard notation. *)
+
   Infix "-->" := Arrow (right associativity, at level 60).
 
+  (** Now we turn to a typing judgment.  We will need to define it in terms of typing contexts, which we represent as lists of pairs of variables and types. *)
+
   Definition ctx := list (var * type).
 
+  (** The definitions of our judgments will be prettier if we write them using mixfix syntax.  To define a judgment for looking up the type of a variable in a context, we first %\textit{%#</i>#reserve#</i>#%}% a notation for the judgment.  Reserved notations enable mutually-recursive definition of a judgment and its notation; in this sense, the reservation is like a forward declaration in C. *)
+
   Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
 
+  (** Now we define the judgment itself, using a [where] clause to associate a notation definition. *)
+
   Inductive lookup : ctx -> var -> type -> Prop :=
   | First : forall x t G,
     (x, t) :: G |-v x : t
@@ -58,6 +74,8 @@
 
   Hint Constructors lookup.
 
+  (** The same technique applies to defining the main typing judgment.  We use an [at next level] clause to cause the argument [e] of the notation to be parsed at a low enough precedence level. *)
+
   Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level).
 
   Inductive hasType : ctx -> exp -> type -> Prop :=
@@ -78,10 +96,12 @@
 
   Hint Constructors hasType.
 
+  (** It is useful to know that variable lookup results are unchanged by adding extra bindings to the end of a context. *)
+
   Lemma weaken_lookup : forall x t G' G1,
     G1 |-v x : t
     -> G1 ++ G' |-v x : t.
-    induction G1 as [ | [x' t'] tl ]; crush;
+    induction G1 as [ | [? ?] ? ]; crush;
       match goal with
         | [ H : _ |-v _ : _ |- _ ] => inversion H; crush
       end.
@@ -89,6 +109,8 @@
 
   Hint Resolve weaken_lookup.
 
+  (** The same property extends to the full typing judgment. *)
+
   Theorem weaken_hasType' : forall G' G e t,
     G |-e e : t
     -> G ++ G' |-e e : t.
@@ -104,10 +126,14 @@
 
   Hint Resolve weaken_hasType.
 
+  (** Much of the inconvenience of first-order encodings comes from the need to treat capture-avoiding substitution explicitly.  We must start by defining a substitution function. *)
+
   Section subst.
     Variable x : var.
     Variable e1 : exp.
 
+    (** We are substituting expression [e1] for every free occurrence of [x].  Note that this definition is specialized to the case where [e1] is closed; substitution is substantially more complicated otherwise, potentially involving explicit alpha-variation.  Luckily, our example of type safety for a call-by-value semantics only requires this restricted variety of substitution. *)
+
     Fixpoint subst (e2 : exp) : exp :=
       match e2 with
         | Const b => Const b
@@ -122,16 +148,22 @@
             else subst e')
       end.
 
+    (** We can prove a few theorems about substitution in well-typed terms, where we assume that [e1] is closed and has type [xt]. *)
+
     Variable xt : type.
     Hypothesis Ht' : nil |-e e1 : xt.
 
+    (** It is helpful to establish a notation asserting the freshness of a particular variable in a context. *)
+
     Notation "x # G" := (forall t' : type, In (x, t') G -> False) (no associativity, at level 90).
 
+    (** To prove type preservation, we will need lemmas proving consequences of variable lookup proofs. *)
+
     Lemma subst_lookup' : forall x' t,
       x <> x'
       -> forall G1, G1 ++ (x, xt) :: nil |-v x' : t
         -> G1 |-v x' : t.
-      induction G1 as [ | [x'' t'] tl ]; crush;
+      induction G1 as [ | [? ?] ? ]; crush;
         match goal with
           | [ H : _ |-v _ : _ |- _ ] => inversion H
         end; crush.
@@ -143,7 +175,7 @@
       x' # G1
       -> G1 ++ (x, xt) :: nil |-v x' : t
       -> t = xt.
-      induction G1 as [ | [x'' t'] tl ]; crush; eauto;
+      induction G1 as [ | [? ?] ? ]; crush; eauto;
         match goal with
           | [ H : _ |-v _ : _ |- _ ] => inversion H
         end; crush; (elimtype False; eauto;
@@ -157,11 +189,13 @@
 
     Implicit Arguments subst_lookup [x' t G1].
 
+    (** Another set of lemmas allows us to remove provably unused variables from the ends of typing contexts. *)
+
     Lemma shadow_lookup : forall v t t' G1,
       G1 |-v x : t'
       -> G1 ++ (x, xt) :: nil |-v v : t
       -> G1 |-v v : t.
-      induction G1 as [ | [x'' t''] tl ]; crush;
+      induction G1 as [ | [? ?] ? ]; crush;
         match goal with
           | [ H : nil |-v _ : _ |- _ ] => inversion H
           | [ H1 : _ |-v _ : _, H2 : _ |-v _ : _ |- _ ] =>
@@ -178,7 +212,7 @@
 
       induction 1; crush; eauto;
         match goal with
-          | [ H : (?x0, _) :: _ ++ (x, _) :: _ |-e _ : _ |- _ ] =>
+          | [ H : (?x0, _) :: _ ++ (?x, _) :: _ |-e _ : _ |- _ ] =>
             destruct (var_eq x0 x); subst; eauto
         end.
     Qed.
@@ -192,6 +226,8 @@
 
     Hint Resolve shadow_hasType.
 
+    (** Disjointness facts may be extended to larger contexts when the appropriate obligations are met. *)
+
     Lemma disjoint_cons : forall x x' t (G : ctx),
       x # G
       -> x' <> x
@@ -204,6 +240,8 @@
 
     Hint Resolve disjoint_cons.
 
+    (** Finally, we arrive at the main theorem about substitution: it preserves typing. *)
+
     Theorem subst_hasType : forall G e2 t,
       G |-e e2 : t
         -> forall G1, G = G1 ++ (x, xt) :: nil
@@ -219,6 +257,8 @@
         end; crush.
     Qed.
 
+    (** We wrap the last theorem into an easier-to-apply form specialized to closed expressions. *)
+
     Theorem subst_hasType_closed : forall e2 t,
       (x, xt) :: nil |-e e2 : t
       -> nil |-e subst e2 : t.
@@ -228,14 +268,20 @@
 
   Hint Resolve subst_hasType_closed.
 
+  (** A notation for substitution will make the operational semantics easier to read. *)
+
   Notation "[ x ~> e1 ] e2" := (subst x e1 e2) (no associativity, at level 80).
 
+  (** To define a call-by-value small-step semantics, we rely on a standard judgment characterizing which expressions are values. *)
+
   Inductive val : exp -> Prop :=
   | VConst : forall b, val (Const b)
   | VAbs : forall x e, val (Abs x e).
 
   Hint Constructors val.
 
+  (** Now the step relation is easy to define. *)
+
   Reserved Notation "e1 ==> e2" (no associativity, at level 90).
 
   Inductive step : exp -> exp -> Prop :=
@@ -254,6 +300,8 @@
 
   Hint Constructors step.
 
+  (** The progress theorem says that any well-typed expression can take a step.  To deal with limitations of the [induction] tactic, we put most of the proof in a lemma whose statement uses the usual trick of introducing extra equality hypotheses. *)
+
   Lemma progress' : forall G e t, G |-e e : t
     -> G = nil
     -> val e \/ exists e', e ==> e'.
@@ -261,9 +309,9 @@
       try match goal with
             | [ H : _ |-e _ : _ --> _ |- _ ] => inversion H
           end;
-      repeat match goal with
-               | [ H : _ |- _ ] => solve [ inversion H; crush; eauto ]
-             end.
+      match goal with
+        | [ H : _ |- _ ] => solve [ inversion H; crush; eauto ]
+      end.
   Qed.
 
   Theorem progress : forall e t, nil |-e e : t
@@ -271,6 +319,8 @@
     intros; eapply progress'; eauto.
   Qed.
 
+  (** A similar pattern works for the preservation theorem, which says that any step of execution preserves an expression's type. *)
+
   Lemma preservation' : forall G e t, G |-e e : t
     -> G = nil
     -> forall e', e ==> e'
@@ -289,9 +339,13 @@
 
 End Concrete.
 
+(** This was a relatively simple example, giving only a taste of the proof burden associated with concrete syntax.  We were helped by the fact that, with call-by-value semantics, we only need to reason about substitution in closed expressions.  There was also no need to alpha-vary an expression. *)
+
 
 (** * De Bruijn Indices *)
 
+(** De Bruijn indices are much more popular than concrete syntax.  This technique provides a %\textit{%#<i>#canonical#</i>#%}% representation of syntax, where any two alpha-equivalent expressions have syntactically equal encodings, removing the need for explicit reasoning about alpha conversion.  Variables are represented as natural numbers, where variable [n] denotes a reference to the [n]th closest enclosing binder.  Because variable references in effect point to binders, there is no need to label binders, such as function abstraction, with variables. *)
+
 Module DeBruijn.
 
   Definition var := nat.
@@ -309,6 +363,8 @@
 
   Infix "-->" := Arrow (right associativity, at level 60).
 
+  (** The definition of typing proceeds much the same as in the last section.  Since variables are numbers, contexts can be simple lists of types.  This makes it possible to write the lookup judgment without mentioning inequality of variables. *)
+
   Definition ctx := list type.
 
   Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
@@ -342,8 +398,12 @@
 
     where "G |-e e : t" := (hasType G e t).
 
+  (** In the [hasType] case for function abstraction, there is no need to choose a variable name.  We simply push the function domain type onto the context [G]. *)
+
   Hint Constructors hasType.
 
+  (** We prove roughly the same weakening theorems as before. *)
+
   Lemma weaken_lookup : forall G' v t G,
     G |-v v : t
     -> G ++ G' |-v v : t.
@@ -370,6 +430,8 @@
   Section subst.
     Variable e1 : exp.
 
+    (** Substitution is easier to define than with concrete syntax.  While our old definition needed to use two comparisons for equality of variables, the de Bruijn substitution only needs one comparison. *)
+
     Fixpoint subst (x : var) (e2 : exp) : exp :=
       match e2 with
         | Const b => Const b
@@ -383,6 +445,8 @@
 
     Variable xt : type.
 
+    (** We prove similar theorems about inversion of variable lookup. *)
+
     Lemma subst_eq : forall t G1,
       G1 ++ xt :: nil |-v length G1 : t
       -> t = xt.
@@ -414,6 +478,8 @@
 
     Hypothesis Ht' : nil |-e e1 : xt.
 
+    (** The next lemma is included solely to guide [eauto], which will not apply computational equivalences automatically. *)
+
     Lemma hasType_push : forall dom G1 e' ran,
       dom :: G1 |-e subst (length (dom :: G1)) e' : ran
       -> dom :: G1 |-e subst (S (length G1)) e' : ran.
@@ -422,6 +488,8 @@
 
     Hint Resolve hasType_push.
 
+    (** Finally, we are ready for the main theorem about substitution and typing. *)
+
     Theorem subst_hasType : forall G e2 t,
       G |-e e2 : t
         -> forall G1, G = G1 ++ xt :: nil
@@ -445,6 +513,8 @@
 
   Hint Resolve subst_hasType_closed.
 
+  (** We define the operational semantics much as before. *)
+
   Notation "[ x ~> e1 ] e2" := (subst e1 x e2) (no associativity, at level 80).
 
   Inductive val : exp -> Prop :=
@@ -471,6 +541,8 @@
 
   Hint Constructors step.
 
+  (** Since we have added the right hints, the progress and preservation theorem statements and proofs are exactly the same as in the concrete encoding example. *)
+
   Lemma progress' : forall G e t, G |-e e : t
     -> G = nil
     -> val e \/ exists e', e ==> e'.