comparison src/Firstorder.v @ 245:4b001a611e79

Prose for old part of Firstorder
author Adam Chlipala <adamc@hcoop.net>
date Wed, 09 Dec 2009 15:26:22 -0500
parents 13620dfd5f97
children cca30734ab40
comparison
equal deleted inserted replaced
244:0400fa005d5a 245:4b001a611e79
18 18
19 (** %\part{Formalizing Programming Languages and Compilers} 19 (** %\part{Formalizing Programming Languages and Compilers}
20 20
21 \chapter{First-Order Abstract Syntax}% *) 21 \chapter{First-Order Abstract Syntax}% *)
22 22
23 (** TODO: Prose for this chapter *) 23 (** Many people interested in interactive theorem-proving want to prove theorems about programming languages. That domain also provides a good setting for demonstrating how to apply the ideas from the earlier parts of this book. This part introduces some techniques for encoding the syntax and semantics of programming languages, along with some example proofs designed to be as practical as possible, rather than to illustrate basic Coq technique.
24
25 To prove anything about a language, we must first formalize the language's syntax. We have a broad design space to choose from, and it makes sense to start with the simplest options, so-called %\textit{%#<i>#first-order#</i>#%}% syntax encodings that do not use dependent types. These encodings are first-order because they do not use Coq function types in a critical way. In this chapter, we consider the most popular first-order encodings, using each to prove a basic type soundness theorem. *)
24 26
25 27
26 (** * Concrete Binding *) 28 (** * Concrete Binding *)
27 29
30 (** The most obvious encoding of the syntax of programming languages follows usual context-free grammars literally. We represent variables as strings and include a variable in our AST definition wherever a variable appears in the informal grammar. Concrete binding turns out to involve a surprisingly large amount of menial bookkeeping, especially when we encode higher-order languages with nested binder scopes. This section's example should give a flavor of what is required. *)
31
28 Module Concrete. 32 Module Concrete.
33
34 (** We need our variable type and its decidable equality operation. *)
29 35
30 Definition var := string. 36 Definition var := string.
31 Definition var_eq := string_dec. 37 Definition var_eq := string_dec.
38
39 (** We will formalize basic simply-typed lambda calculus. The syntax of expressions and types follows what we would write in a context-free grammar. *)
32 40
33 Inductive exp : Set := 41 Inductive exp : Set :=
34 | Const : bool -> exp 42 | Const : bool -> exp
35 | Var : var -> exp 43 | Var : var -> exp
36 | App : exp -> exp -> exp 44 | App : exp -> exp -> exp
38 46
39 Inductive type : Set := 47 Inductive type : Set :=
40 | Bool : type 48 | Bool : type
41 | Arrow : type -> type -> type. 49 | Arrow : type -> type -> type.
42 50
51 (** It is useful to define a syntax extension that lets us write function types in more standard notation. *)
52
43 Infix "-->" := Arrow (right associativity, at level 60). 53 Infix "-->" := Arrow (right associativity, at level 60).
44 54
55 (** Now we turn to a typing judgment. We will need to define it in terms of typing contexts, which we represent as lists of pairs of variables and types. *)
56
45 Definition ctx := list (var * type). 57 Definition ctx := list (var * type).
46 58
59 (** The definitions of our judgments will be prettier if we write them using mixfix syntax. To define a judgment for looking up the type of a variable in a context, we first %\textit{%#</i>#reserve#</i>#%}% a notation for the judgment. Reserved notations enable mutually-recursive definition of a judgment and its notation; in this sense, the reservation is like a forward declaration in C. *)
60
47 Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level). 61 Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
62
63 (** Now we define the judgment itself, using a [where] clause to associate a notation definition. *)
48 64
49 Inductive lookup : ctx -> var -> type -> Prop := 65 Inductive lookup : ctx -> var -> type -> Prop :=
50 | First : forall x t G, 66 | First : forall x t G,
51 (x, t) :: G |-v x : t 67 (x, t) :: G |-v x : t
52 | Next : forall x t x' t' G, 68 | Next : forall x t x' t' G,
55 -> (x', t') :: G |-v x : t 71 -> (x', t') :: G |-v x : t
56 72
57 where "G |-v x : t" := (lookup G x t). 73 where "G |-v x : t" := (lookup G x t).
58 74
59 Hint Constructors lookup. 75 Hint Constructors lookup.
76
77 (** The same technique applies to defining the main typing judgment. We use an [at next level] clause to cause the argument [e] of the notation to be parsed at a low enough precedence level. *)
60 78
61 Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level). 79 Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level).
62 80
63 Inductive hasType : ctx -> exp -> type -> Prop := 81 Inductive hasType : ctx -> exp -> type -> Prop :=
64 | TConst : forall G b, 82 | TConst : forall G b,
76 94
77 where "G |-e e : t" := (hasType G e t). 95 where "G |-e e : t" := (hasType G e t).
78 96
79 Hint Constructors hasType. 97 Hint Constructors hasType.
80 98
99 (** It is useful to know that variable lookup results are unchanged by adding extra bindings to the end of a context. *)
100
81 Lemma weaken_lookup : forall x t G' G1, 101 Lemma weaken_lookup : forall x t G' G1,
82 G1 |-v x : t 102 G1 |-v x : t
83 -> G1 ++ G' |-v x : t. 103 -> G1 ++ G' |-v x : t.
84 induction G1 as [ | [x' t'] tl ]; crush; 104 induction G1 as [ | [? ?] ? ]; crush;
85 match goal with 105 match goal with
86 | [ H : _ |-v _ : _ |- _ ] => inversion H; crush 106 | [ H : _ |-v _ : _ |- _ ] => inversion H; crush
87 end. 107 end.
88 Qed. 108 Qed.
89 109
90 Hint Resolve weaken_lookup. 110 Hint Resolve weaken_lookup.
111
112 (** The same property extends to the full typing judgment. *)
91 113
92 Theorem weaken_hasType' : forall G' G e t, 114 Theorem weaken_hasType' : forall G' G e t,
93 G |-e e : t 115 G |-e e : t
94 -> G ++ G' |-e e : t. 116 -> G ++ G' |-e e : t.
95 induction 1; crush; eauto. 117 induction 1; crush; eauto.
102 eapply weaken_hasType'; eauto. 124 eapply weaken_hasType'; eauto.
103 Qed. 125 Qed.
104 126
105 Hint Resolve weaken_hasType. 127 Hint Resolve weaken_hasType.
106 128
129 (** Much of the inconvenience of first-order encodings comes from the need to treat capture-avoiding substitution explicitly. We must start by defining a substitution function. *)
130
107 Section subst. 131 Section subst.
108 Variable x : var. 132 Variable x : var.
109 Variable e1 : exp. 133 Variable e1 : exp.
134
135 (** We are substituting expression [e1] for every free occurrence of [x]. Note that this definition is specialized to the case where [e1] is closed; substitution is substantially more complicated otherwise, potentially involving explicit alpha-variation. Luckily, our example of type safety for a call-by-value semantics only requires this restricted variety of substitution. *)
110 136
111 Fixpoint subst (e2 : exp) : exp := 137 Fixpoint subst (e2 : exp) : exp :=
112 match e2 with 138 match e2 with
113 | Const b => Const b 139 | Const b => Const b
114 | Var x' => 140 | Var x' =>
120 Abs x' (if var_eq x' x 146 Abs x' (if var_eq x' x
121 then e' 147 then e'
122 else subst e') 148 else subst e')
123 end. 149 end.
124 150
151 (** We can prove a few theorems about substitution in well-typed terms, where we assume that [e1] is closed and has type [xt]. *)
152
125 Variable xt : type. 153 Variable xt : type.
126 Hypothesis Ht' : nil |-e e1 : xt. 154 Hypothesis Ht' : nil |-e e1 : xt.
127 155
156 (** It is helpful to establish a notation asserting the freshness of a particular variable in a context. *)
157
128 Notation "x # G" := (forall t' : type, In (x, t') G -> False) (no associativity, at level 90). 158 Notation "x # G" := (forall t' : type, In (x, t') G -> False) (no associativity, at level 90).
159
160 (** To prove type preservation, we will need lemmas proving consequences of variable lookup proofs. *)
129 161
130 Lemma subst_lookup' : forall x' t, 162 Lemma subst_lookup' : forall x' t,
131 x <> x' 163 x <> x'
132 -> forall G1, G1 ++ (x, xt) :: nil |-v x' : t 164 -> forall G1, G1 ++ (x, xt) :: nil |-v x' : t
133 -> G1 |-v x' : t. 165 -> G1 |-v x' : t.
134 induction G1 as [ | [x'' t'] tl ]; crush; 166 induction G1 as [ | [? ?] ? ]; crush;
135 match goal with 167 match goal with
136 | [ H : _ |-v _ : _ |- _ ] => inversion H 168 | [ H : _ |-v _ : _ |- _ ] => inversion H
137 end; crush. 169 end; crush.
138 Qed. 170 Qed.
139 171
141 173
142 Lemma subst_lookup : forall x' t G1, 174 Lemma subst_lookup : forall x' t G1,
143 x' # G1 175 x' # G1
144 -> G1 ++ (x, xt) :: nil |-v x' : t 176 -> G1 ++ (x, xt) :: nil |-v x' : t
145 -> t = xt. 177 -> t = xt.
146 induction G1 as [ | [x'' t'] tl ]; crush; eauto; 178 induction G1 as [ | [? ?] ? ]; crush; eauto;
147 match goal with 179 match goal with
148 | [ H : _ |-v _ : _ |- _ ] => inversion H 180 | [ H : _ |-v _ : _ |- _ ] => inversion H
149 end; crush; (elimtype False; eauto; 181 end; crush; (elimtype False; eauto;
150 match goal with 182 match goal with
151 | [ H : nil |-v _ : _ |- _ ] => inversion H 183 | [ H : nil |-v _ : _ |- _ ] => inversion H
155 end. 187 end.
156 Qed. 188 Qed.
157 189
158 Implicit Arguments subst_lookup [x' t G1]. 190 Implicit Arguments subst_lookup [x' t G1].
159 191
192 (** Another set of lemmas allows us to remove provably unused variables from the ends of typing contexts. *)
193
160 Lemma shadow_lookup : forall v t t' G1, 194 Lemma shadow_lookup : forall v t t' G1,
161 G1 |-v x : t' 195 G1 |-v x : t'
162 -> G1 ++ (x, xt) :: nil |-v v : t 196 -> G1 ++ (x, xt) :: nil |-v v : t
163 -> G1 |-v v : t. 197 -> G1 |-v v : t.
164 induction G1 as [ | [x'' t''] tl ]; crush; 198 induction G1 as [ | [? ?] ? ]; crush;
165 match goal with 199 match goal with
166 | [ H : nil |-v _ : _ |- _ ] => inversion H 200 | [ H : nil |-v _ : _ |- _ ] => inversion H
167 | [ H1 : _ |-v _ : _, H2 : _ |-v _ : _ |- _ ] => 201 | [ H1 : _ |-v _ : _, H2 : _ |-v _ : _ |- _ ] =>
168 inversion H1; crush; inversion H2; crush 202 inversion H1; crush; inversion H2; crush
169 end. 203 end.
176 -> G1 |-e e : t. 210 -> G1 |-e e : t.
177 Hint Resolve shadow_lookup. 211 Hint Resolve shadow_lookup.
178 212
179 induction 1; crush; eauto; 213 induction 1; crush; eauto;
180 match goal with 214 match goal with
181 | [ H : (?x0, _) :: _ ++ (x, _) :: _ |-e _ : _ |- _ ] => 215 | [ H : (?x0, _) :: _ ++ (?x, _) :: _ |-e _ : _ |- _ ] =>
182 destruct (var_eq x0 x); subst; eauto 216 destruct (var_eq x0 x); subst; eauto
183 end. 217 end.
184 Qed. 218 Qed.
185 219
186 Lemma shadow_hasType : forall G1 e t t'', 220 Lemma shadow_hasType : forall G1 e t t'',
190 intros; eapply shadow_hasType'; eauto. 224 intros; eapply shadow_hasType'; eauto.
191 Qed. 225 Qed.
192 226
193 Hint Resolve shadow_hasType. 227 Hint Resolve shadow_hasType.
194 228
229 (** Disjointness facts may be extended to larger contexts when the appropriate obligations are met. *)
230
195 Lemma disjoint_cons : forall x x' t (G : ctx), 231 Lemma disjoint_cons : forall x x' t (G : ctx),
196 x # G 232 x # G
197 -> x' <> x 233 -> x' <> x
198 -> x # (x', t) :: G. 234 -> x # (x', t) :: G.
199 firstorder; 235 firstorder;
201 | [ H : (_, _) = (_, _) |- _ ] => injection H 237 | [ H : (_, _) = (_, _) |- _ ] => injection H
202 end; crush. 238 end; crush.
203 Qed. 239 Qed.
204 240
205 Hint Resolve disjoint_cons. 241 Hint Resolve disjoint_cons.
242
243 (** Finally, we arrive at the main theorem about substitution: it preserves typing. *)
206 244
207 Theorem subst_hasType : forall G e2 t, 245 Theorem subst_hasType : forall G e2 t,
208 G |-e e2 : t 246 G |-e e2 : t
209 -> forall G1, G = G1 ++ (x, xt) :: nil 247 -> forall G1, G = G1 ++ (x, xt) :: nil
210 -> x # G1 248 -> x # G1
217 | [ H1 : x # _, H2 : _ |-v x : _ |- _ ] => 255 | [ H1 : x # _, H2 : _ |-v x : _ |- _ ] =>
218 rewrite (subst_lookup H1 H2) 256 rewrite (subst_lookup H1 H2)
219 end; crush. 257 end; crush.
220 Qed. 258 Qed.
221 259
260 (** We wrap the last theorem into an easier-to-apply form specialized to closed expressions. *)
261
222 Theorem subst_hasType_closed : forall e2 t, 262 Theorem subst_hasType_closed : forall e2 t,
223 (x, xt) :: nil |-e e2 : t 263 (x, xt) :: nil |-e e2 : t
224 -> nil |-e subst e2 : t. 264 -> nil |-e subst e2 : t.
225 intros; eapply subst_hasType; eauto. 265 intros; eapply subst_hasType; eauto.
226 Qed. 266 Qed.
227 End subst. 267 End subst.
228 268
229 Hint Resolve subst_hasType_closed. 269 Hint Resolve subst_hasType_closed.
230 270
271 (** A notation for substitution will make the operational semantics easier to read. *)
272
231 Notation "[ x ~> e1 ] e2" := (subst x e1 e2) (no associativity, at level 80). 273 Notation "[ x ~> e1 ] e2" := (subst x e1 e2) (no associativity, at level 80).
274
275 (** To define a call-by-value small-step semantics, we rely on a standard judgment characterizing which expressions are values. *)
232 276
233 Inductive val : exp -> Prop := 277 Inductive val : exp -> Prop :=
234 | VConst : forall b, val (Const b) 278 | VConst : forall b, val (Const b)
235 | VAbs : forall x e, val (Abs x e). 279 | VAbs : forall x e, val (Abs x e).
236 280
237 Hint Constructors val. 281 Hint Constructors val.
282
283 (** Now the step relation is easy to define. *)
238 284
239 Reserved Notation "e1 ==> e2" (no associativity, at level 90). 285 Reserved Notation "e1 ==> e2" (no associativity, at level 90).
240 286
241 Inductive step : exp -> exp -> Prop := 287 Inductive step : exp -> exp -> Prop :=
242 | Beta : forall x e1 e2, 288 | Beta : forall x e1 e2,
252 298
253 where "e1 ==> e2" := (step e1 e2). 299 where "e1 ==> e2" := (step e1 e2).
254 300
255 Hint Constructors step. 301 Hint Constructors step.
256 302
303 (** The progress theorem says that any well-typed expression can take a step. To deal with limitations of the [induction] tactic, we put most of the proof in a lemma whose statement uses the usual trick of introducing extra equality hypotheses. *)
304
257 Lemma progress' : forall G e t, G |-e e : t 305 Lemma progress' : forall G e t, G |-e e : t
258 -> G = nil 306 -> G = nil
259 -> val e \/ exists e', e ==> e'. 307 -> val e \/ exists e', e ==> e'.
260 induction 1; crush; eauto; 308 induction 1; crush; eauto;
261 try match goal with 309 try match goal with
262 | [ H : _ |-e _ : _ --> _ |- _ ] => inversion H 310 | [ H : _ |-e _ : _ --> _ |- _ ] => inversion H
263 end; 311 end;
264 repeat match goal with 312 match goal with
265 | [ H : _ |- _ ] => solve [ inversion H; crush; eauto ] 313 | [ H : _ |- _ ] => solve [ inversion H; crush; eauto ]
266 end. 314 end.
267 Qed. 315 Qed.
268 316
269 Theorem progress : forall e t, nil |-e e : t 317 Theorem progress : forall e t, nil |-e e : t
270 -> val e \/ exists e', e ==> e'. 318 -> val e \/ exists e', e ==> e'.
271 intros; eapply progress'; eauto. 319 intros; eapply progress'; eauto.
272 Qed. 320 Qed.
321
322 (** A similar pattern works for the preservation theorem, which says that any step of execution preserves an expression's type. *)
273 323
274 Lemma preservation' : forall G e t, G |-e e : t 324 Lemma preservation' : forall G e t, G |-e e : t
275 -> G = nil 325 -> G = nil
276 -> forall e', e ==> e' 326 -> forall e', e ==> e'
277 -> nil |-e e' : t. 327 -> nil |-e e' : t.
287 intros; eapply preservation'; eauto. 337 intros; eapply preservation'; eauto.
288 Qed. 338 Qed.
289 339
290 End Concrete. 340 End Concrete.
291 341
342 (** This was a relatively simple example, giving only a taste of the proof burden associated with concrete syntax. We were helped by the fact that, with call-by-value semantics, we only need to reason about substitution in closed expressions. There was also no need to alpha-vary an expression. *)
343
292 344
293 (** * De Bruijn Indices *) 345 (** * De Bruijn Indices *)
346
347 (** De Bruijn indices are much more popular than concrete syntax. This technique provides a %\textit{%#<i>#canonical#</i>#%}% representation of syntax, where any two alpha-equivalent expressions have syntactically equal encodings, removing the need for explicit reasoning about alpha conversion. Variables are represented as natural numbers, where variable [n] denotes a reference to the [n]th closest enclosing binder. Because variable references in effect point to binders, there is no need to label binders, such as function abstraction, with variables. *)
294 348
295 Module DeBruijn. 349 Module DeBruijn.
296 350
297 Definition var := nat. 351 Definition var := nat.
298 Definition var_eq := eq_nat_dec. 352 Definition var_eq := eq_nat_dec.
306 Inductive type : Set := 360 Inductive type : Set :=
307 | Bool : type 361 | Bool : type
308 | Arrow : type -> type -> type. 362 | Arrow : type -> type -> type.
309 363
310 Infix "-->" := Arrow (right associativity, at level 60). 364 Infix "-->" := Arrow (right associativity, at level 60).
365
366 (** The definition of typing proceeds much the same as in the last section. Since variables are numbers, contexts can be simple lists of types. This makes it possible to write the lookup judgment without mentioning inequality of variables. *)
311 367
312 Definition ctx := list type. 368 Definition ctx := list type.
313 369
314 Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level). 370 Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
315 371
340 dom :: G |-e e' : ran 396 dom :: G |-e e' : ran
341 -> G |-e Abs e' : dom --> ran 397 -> G |-e Abs e' : dom --> ran
342 398
343 where "G |-e e : t" := (hasType G e t). 399 where "G |-e e : t" := (hasType G e t).
344 400
401 (** In the [hasType] case for function abstraction, there is no need to choose a variable name. We simply push the function domain type onto the context [G]. *)
402
345 Hint Constructors hasType. 403 Hint Constructors hasType.
404
405 (** We prove roughly the same weakening theorems as before. *)
346 406
347 Lemma weaken_lookup : forall G' v t G, 407 Lemma weaken_lookup : forall G' v t G,
348 G |-v v : t 408 G |-v v : t
349 -> G ++ G' |-v v : t. 409 -> G ++ G' |-v v : t.
350 induction 1; crush. 410 induction 1; crush.
367 427
368 Hint Resolve weaken_hasType. 428 Hint Resolve weaken_hasType.
369 429
370 Section subst. 430 Section subst.
371 Variable e1 : exp. 431 Variable e1 : exp.
432
433 (** Substitution is easier to define than with concrete syntax. While our old definition needed to use two comparisons for equality of variables, the de Bruijn substitution only needs one comparison. *)
372 434
373 Fixpoint subst (x : var) (e2 : exp) : exp := 435 Fixpoint subst (x : var) (e2 : exp) : exp :=
374 match e2 with 436 match e2 with
375 | Const b => Const b 437 | Const b => Const b
376 | Var x' => 438 | Var x' =>
381 | Abs e' => Abs (subst (S x) e') 443 | Abs e' => Abs (subst (S x) e')
382 end. 444 end.
383 445
384 Variable xt : type. 446 Variable xt : type.
385 447
448 (** We prove similar theorems about inversion of variable lookup. *)
449
386 Lemma subst_eq : forall t G1, 450 Lemma subst_eq : forall t G1,
387 G1 ++ xt :: nil |-v length G1 : t 451 G1 ++ xt :: nil |-v length G1 : t
388 -> t = xt. 452 -> t = xt.
389 induction G1; inversion 1; crush. 453 induction G1; inversion 1; crush.
390 Qed. 454 Qed.
412 476
413 Hint Resolve subst_neq. 477 Hint Resolve subst_neq.
414 478
415 Hypothesis Ht' : nil |-e e1 : xt. 479 Hypothesis Ht' : nil |-e e1 : xt.
416 480
481 (** The next lemma is included solely to guide [eauto], which will not apply computational equivalences automatically. *)
482
417 Lemma hasType_push : forall dom G1 e' ran, 483 Lemma hasType_push : forall dom G1 e' ran,
418 dom :: G1 |-e subst (length (dom :: G1)) e' : ran 484 dom :: G1 |-e subst (length (dom :: G1)) e' : ran
419 -> dom :: G1 |-e subst (S (length G1)) e' : ran. 485 -> dom :: G1 |-e subst (S (length G1)) e' : ran.
420 trivial. 486 trivial.
421 Qed. 487 Qed.
422 488
423 Hint Resolve hasType_push. 489 Hint Resolve hasType_push.
490
491 (** Finally, we are ready for the main theorem about substitution and typing. *)
424 492
425 Theorem subst_hasType : forall G e2 t, 493 Theorem subst_hasType : forall G e2 t,
426 G |-e e2 : t 494 G |-e e2 : t
427 -> forall G1, G = G1 ++ xt :: nil 495 -> forall G1, G = G1 ++ xt :: nil
428 -> G1 |-e subst (length G1) e2 : t. 496 -> G1 |-e subst (length G1) e2 : t.
443 Qed. 511 Qed.
444 End subst. 512 End subst.
445 513
446 Hint Resolve subst_hasType_closed. 514 Hint Resolve subst_hasType_closed.
447 515
516 (** We define the operational semantics much as before. *)
517
448 Notation "[ x ~> e1 ] e2" := (subst e1 x e2) (no associativity, at level 80). 518 Notation "[ x ~> e1 ] e2" := (subst e1 x e2) (no associativity, at level 80).
449 519
450 Inductive val : exp -> Prop := 520 Inductive val : exp -> Prop :=
451 | VConst : forall b, val (Const b) 521 | VConst : forall b, val (Const b)
452 | VAbs : forall e, val (Abs e). 522 | VAbs : forall e, val (Abs e).
469 539
470 where "e1 ==> e2" := (step e1 e2). 540 where "e1 ==> e2" := (step e1 e2).
471 541
472 Hint Constructors step. 542 Hint Constructors step.
473 543
544 (** Since we have added the right hints, the progress and preservation theorem statements and proofs are exactly the same as in the concrete encoding example. *)
545
474 Lemma progress' : forall G e t, G |-e e : t 546 Lemma progress' : forall G e t, G |-e e : t
475 -> G = nil 547 -> G = nil
476 -> val e \/ exists e', e ==> e'. 548 -> val e \/ exists e', e ==> e'.
477 induction 1; crush; eauto; 549 induction 1; crush; eauto;
478 try match goal with 550 try match goal with