comparison src/MoreDep.v @ 214:768889c969e9

Finish porting MoreDep
author Adam Chlipala <adamc@hcoop.net>
date Wed, 11 Nov 2009 12:21:28 -0500
parents c4b1c0de7af9
children d1e0a6d8eef1
comparison
equal deleted inserted replaced
213:c4b1c0de7af9 214:768889c969e9
372 (* end thide *) 372 (* end thide *)
373 373
374 374
375 (** * Dependently-Typed Red-Black Trees *) 375 (** * Dependently-Typed Red-Black Trees *)
376 376
377 (** TODO: Add commentary for this section. *) 377 (** Red-black trees are a favorite purely-functional data structure with an interesting invariant. We can use dependent types to enforce that operations on red-black trees preserve the invariant. For simplicity, we specialize our red-black trees to represent sets of [nat]s. *)
378 378
379 Inductive color : Set := Red | Black. 379 Inductive color : Set := Red | Black.
380 380
381 Inductive rbtree : color -> nat -> Set := 381 Inductive rbtree : color -> nat -> Set :=
382 | Leaf : rbtree Black 0 382 | Leaf : rbtree Black 0
383 | RedNode : forall n, rbtree Black n -> nat-> rbtree Black n -> rbtree Red n 383 | RedNode : forall n, rbtree Black n -> nat -> rbtree Black n -> rbtree Red n
384 | BlackNode : forall c1 c2 n, rbtree c1 n -> nat -> rbtree c2 n -> rbtree Black (S n). 384 | BlackNode : forall c1 c2 n, rbtree c1 n -> nat -> rbtree c2 n -> rbtree Black (S n).
385
386 (** A value of type [rbtree c d] is a red-black tree node whose root has color [c] and that has black depth [d]. The latter property means that there are no more than [d] black-colored nodes on any path from the root to a leaf. *)
387
388 (** At first, it can be unclear that this choice of type indices tracks any useful property. To convince ourselves, we will prove that every red-black tree is balanced. We will phrase our theorem in terms of a depth calculating function that ignores the extra information in the types. It will be useful to parameterize this function over a combining operation, so that we can re-use the same code to calculate the minimum or maximum height among all paths from root to leaf. *)
385 389
386 (* EX: Prove that every [rbtree] is balanced. *) 390 (* EX: Prove that every [rbtree] is balanced. *)
387 391
388 (* begin thide *) 392 (* begin thide *)
389 Require Import Max Min. 393 Require Import Max Min.
390 394
391 Section depth. 395 Section depth.
392 Variable f : nat -> nat -> nat. 396 Variable f : nat -> nat -> nat.
393 397
394 Fixpoint depth c n (t : rbtree c n) {struct t} : nat := 398 Fixpoint depth c n (t : rbtree c n) : nat :=
395 match t with 399 match t with
396 | Leaf => 0 400 | Leaf => 0
397 | RedNode _ t1 _ t2 => S (f (depth t1) (depth t2)) 401 | RedNode _ t1 _ t2 => S (f (depth t1) (depth t2))
398 | BlackNode _ _ _ t1 _ t2 => S (f (depth t1) (depth t2)) 402 | BlackNode _ _ _ t1 _ t2 => S (f (depth t1) (depth t2))
399 end. 403 end.
400 End depth. 404 End depth.
401 405
406 (** Our proof of balanced-ness decomposes naturally into a lower bound and an upper bound. We prove the lower bound first. Unsurprisingly, a tree's black depth provides such a bound on the minimum path length. We use the richly-typed procedure [min_dec] to do case analysis on whether [min X Y] equals [X] or [Y]. *)
407
402 Theorem depth_min : forall c n (t : rbtree c n), depth min t >= n. 408 Theorem depth_min : forall c n (t : rbtree c n), depth min t >= n.
403 induction t; crush; 409 induction t; crush;
404 match goal with 410 match goal with
405 | [ |- context[min ?X ?Y] ] => destruct (min_dec X Y) 411 | [ |- context[min ?X ?Y] ] => destruct (min_dec X Y)
406 end; crush. 412 end; crush.
407 Qed. 413 Qed.
414
415 (** There is an analogous upper-bound theorem based on black depth. Unfortunately, a symmetric proof script does not suffice to establish it. *)
416
417 Theorem depth_max : forall c n (t : rbtree c n), depth max t <= 2 * n + 1.
418 induction t; crush;
419 match goal with
420 | [ |- context[max ?X ?Y] ] => destruct (max_dec X Y)
421 end; crush.
422
423 (** Two subgoals remain. One of them is: [[
424 n : nat
425 t1 : rbtree Black n
426 n0 : nat
427 t2 : rbtree Black n
428 IHt1 : depth max t1 <= n + (n + 0) + 1
429 IHt2 : depth max t2 <= n + (n + 0) + 1
430 e : max (depth max t1) (depth max t2) = depth max t1
431 ============================
432 S (depth max t1) <= n + (n + 0) + 1
433
434 ]]
435
436 We see that [IHt1] is %\textit{%#<i>#almost#</i>#%}% the fact we need, but it is not quite strong enough. We will need to strengthen our induction hypothesis to get the proof to go through. *)
437
438 Abort.
439
440 (** In particular, we prove a lemma that provides a stronger upper bound for trees with black root nodes. We got stuck above in a case about a red root node. Since red nodes have only black children, our IH strengthening will enable us to finish the proof. *)
408 441
409 Lemma depth_max' : forall c n (t : rbtree c n), match c with 442 Lemma depth_max' : forall c n (t : rbtree c n), match c with
410 | Red => depth max t <= 2 * n + 1 443 | Red => depth max t <= 2 * n + 1
411 | Black => depth max t <= 2 * n 444 | Black => depth max t <= 2 * n
412 end. 445 end.
413 induction t; crush; 446 induction t; crush;
414 match goal with 447 match goal with
415 | [ |- context[max ?X ?Y] ] => destruct (max_dec X Y) 448 | [ |- context[max ?X ?Y] ] => destruct (max_dec X Y)
416 end; crush; 449 end; crush;
417 repeat (match goal with 450 repeat (match goal with
418 | [ H : context[match ?C with Red => _ | Black => _ end] |- _ ] => destruct C 451 | [ H : context[match ?C with Red => _ | Black => _ end] |- _ ] =>
452 destruct C
419 end; crush). 453 end; crush).
420 Qed. 454 Qed.
455
456 (** The original theorem follows easily from the lemma. We use the tactic [generalize pf], which, when [pf] proves the proposition [P], changes the goal from [Q] to [P -> Q]. It is useful to do this because it makes the truth of [P] manifest syntactically, so that automation machinery can rely on [P], even if that machinery is not smart enough to establish [P] on its own. *)
421 457
422 Theorem depth_max : forall c n (t : rbtree c n), depth max t <= 2 * n + 1. 458 Theorem depth_max : forall c n (t : rbtree c n), depth max t <= 2 * n + 1.
423 intros; generalize (depth_max' t); destruct c; crush. 459 intros; generalize (depth_max' t); destruct c; crush.
424 Qed. 460 Qed.
425 461
462 (** The final balance theorem establishes that the minimum and maximum path lengths of any tree are within a factor of two of each other. *)
463
426 Theorem balanced : forall c n (t : rbtree c n), 2 * depth min t + 1 >= depth max t. 464 Theorem balanced : forall c n (t : rbtree c n), 2 * depth min t + 1 >= depth max t.
427 intros; generalize (depth_min t); generalize (depth_max t); crush. 465 intros; generalize (depth_min t); generalize (depth_max t); crush.
428 Qed. 466 Qed.
429 (* end thide *) 467 (* end thide *)
430 468
469 (** Now we are ready to implement an example operation on our trees, insertion. Insertion can be thought of as breaking the tree invariants locally but then rebalancing. In particular, in intermediate states we find red nodes that may have red children. The type [rtree] captures the idea of such a node, continuing to track black depth as a type index. *)
431 470
432 Inductive rtree : nat -> Set := 471 Inductive rtree : nat -> Set :=
433 | RedNode' : forall c1 c2 n, rbtree c1 n -> nat -> rbtree c2 n -> rtree n. 472 | RedNode' : forall c1 c2 n, rbtree c1 n -> nat -> rbtree c2 n -> rtree n.
434 473
474 (** Before starting to define [insert], we define predicates capturing when a data value is in the set represented by a normal or possibly-invalid tree. *)
475
435 Section present. 476 Section present.
436 Variable x : nat. 477 Variable x : nat.
437 478
438 Fixpoint present c n (t : rbtree c n) {struct t} : Prop := 479 Fixpoint present c n (t : rbtree c n) : Prop :=
439 match t with 480 match t with
440 | Leaf => False 481 | Leaf => False
441 | RedNode _ a y b => present a \/ x = y \/ present b 482 | RedNode _ a y b => present a \/ x = y \/ present b
442 | BlackNode _ _ _ a y b => present a \/ x = y \/ present b 483 | BlackNode _ _ _ a y b => present a \/ x = y \/ present b
443 end. 484 end.
446 match t with 487 match t with
447 | RedNode' _ _ _ a y b => present a \/ x = y \/ present b 488 | RedNode' _ _ _ a y b => present a \/ x = y \/ present b
448 end. 489 end.
449 End present. 490 End present.
450 491
492 (** Insertion relies on two balancing operations. It will be useful to give types to these operations using a relative of the subset types from last chapter. While subset types let us pair a value with a proof about that value, here we want to pair a value with another non-proof dependently-typed value. The [sigT] type fills this role. *)
493
451 Locate "{ _ : _ & _ }". 494 Locate "{ _ : _ & _ }".
495 (** [[
496 Notation Scope
497 "{ x : A & P }" := sigT (fun x : A => P)
498 ]] *)
499
452 Print sigT. 500 Print sigT.
501 (** [[
502 Inductive sigT (A : Type) (P : A -> Type) : Type :=
503 existT : forall x : A, P x -> sigT P
504 ]] *)
505
506 (** It will be helpful to define a concise notation for the constructor of [sigT]. *)
453 507
454 Notation "{< x >}" := (existT _ _ x). 508 Notation "{< x >}" := (existT _ _ x).
455 509
510 (** Each balance function is used to construct a new tree whose keys include the keys of two input trees, as well as a new key. One of the two input trees may violate the red-black alternation invariant (that is, it has an [rtree] type), while the other tree is known to be valid. Crucially, the two input trees have the same black depth.
511
512 A balance operation may return a tree whose root is of either color. Thus, we use a [sigT] type to package the result tree with the color of its root. Here is the definition of the first balance operation, which applies when the possibly-invalid [rtree] belongs to the left of the valid [rbtree]. *)
513
456 Definition balance1 n (a : rtree n) (data : nat) c2 := 514 Definition balance1 n (a : rtree n) (data : nat) c2 :=
457 match a in rtree n return rbtree c2 n -> { c : color & rbtree c (S n) } with 515 match a in rtree n return rbtree c2 n
516 -> { c : color & rbtree c (S n) } with
458 | RedNode' _ _ _ t1 y t2 => 517 | RedNode' _ _ _ t1 y t2 =>
459 match t1 in rbtree c n return rbtree _ n -> rbtree c2 n -> { c : color & rbtree c (S n) } with 518 match t1 in rbtree c n return rbtree _ n -> rbtree c2 n
460 | RedNode _ a x b => fun c d => {<RedNode (BlackNode a x b) y (BlackNode c data d)>} 519 -> { c : color & rbtree c (S n) } with
520 | RedNode _ a x b => fun c d =>
521 {<RedNode (BlackNode a x b) y (BlackNode c data d)>}
461 | t1' => fun t2 => 522 | t1' => fun t2 =>
462 match t2 in rbtree c n return rbtree _ n -> rbtree c2 n -> { c : color & rbtree c (S n) } with 523 match t2 in rbtree c n return rbtree _ n -> rbtree c2 n
463 | RedNode _ b x c => fun a d => {<RedNode (BlackNode a y b) x (BlackNode c data d)>} 524 -> { c : color & rbtree c (S n) } with
525 | RedNode _ b x c => fun a d =>
526 {<RedNode (BlackNode a y b) x (BlackNode c data d)>}
464 | b => fun a t => {<BlackNode (RedNode a y b) data t>} 527 | b => fun a t => {<BlackNode (RedNode a y b) data t>}
465 end t1' 528 end t1'
466 end t2 529 end t2
467 end. 530 end.
468 531
532 (** We apply a trick that I call the %\textit{%#<i>#convoy pattern#</i>#%}%. Recall that [match] annotations only make it possible to describe a dependence of a [match] %\textit{%#<i>#result type#</i>#%}% on the discriminee. There is no automatic refinement of the types of free variables. However, it is possible to effect such a refinement by finding a way to encode free variable type dependencies in the [match] result type, so that a [return] clause can express the connection.
533
534 In particular, we can extend the [match] to return %\textit{%#<i>#functions over the free variables whose types we want to refine#</i>#%}%. In the case of [balance1], we only find ourselves wanting to refine the type of one tree variable at a time. We match on one subtree of a node, and we want the type of the other subtree to be refined based on what we learn. We indicate this with a [return] clause starting like [rbtree _ n -> ...], where [n] is bound in an [in] pattern. Such a [match] expression is applied immediately to the "old version" of the variable to be refined, and the type checker is happy.
535
536 After writing this code, even I do not understand the precise details of how balancing works. I consulted Chris Okasaki's paper "Red-Black Trees in a Functional Setting" and transcribed the code to use dependent types. Luckily, the details are not so important here; types alone will tell us that insertion preserves balanced-ness, and we will prove that insertion produces trees containing the right keys.
537
538 Here is the symmetric function [balance2], for cases where the possibly-invalid tree appears on the right rather than on the left. *)
539
469 Definition balance2 n (a : rtree n) (data : nat) c2 := 540 Definition balance2 n (a : rtree n) (data : nat) c2 :=
470 match a in rtree n return rbtree c2 n -> { c : color & rbtree c (S n) } with 541 match a in rtree n return rbtree c2 n -> { c : color & rbtree c (S n) } with
471 | RedNode' _ _ _ t1 z t2 => 542 | RedNode' _ _ _ t1 z t2 =>
472 match t1 in rbtree c n return rbtree _ n -> rbtree c2 n -> { c : color & rbtree c (S n) } with 543 match t1 in rbtree c n return rbtree _ n -> rbtree c2 n
473 | RedNode _ b y c => fun d a => {<RedNode (BlackNode a data b) y (BlackNode c z d)>} 544 -> { c : color & rbtree c (S n) } with
545 | RedNode _ b y c => fun d a =>
546 {<RedNode (BlackNode a data b) y (BlackNode c z d)>}
474 | t1' => fun t2 => 547 | t1' => fun t2 =>
475 match t2 in rbtree c n return rbtree _ n -> rbtree c2 n -> { c : color & rbtree c (S n) } with 548 match t2 in rbtree c n return rbtree _ n -> rbtree c2 n
476 | RedNode _ c z' d => fun b a => {<RedNode (BlackNode a data b) z (BlackNode c z' d)>} 549 -> { c : color & rbtree c (S n) } with
550 | RedNode _ c z' d => fun b a =>
551 {<RedNode (BlackNode a data b) z (BlackNode c z' d)>}
477 | b => fun a t => {<BlackNode t data (RedNode a z b)>} 552 | b => fun a t => {<BlackNode t data (RedNode a z b)>}
478 end t1' 553 end t1'
479 end t2 554 end t2
480 end. 555 end.
481 556
557 (** Now we are almost ready to get down to the business of writing an [insert] function. First, we enter a section that declares a variable [x], for the key we want to insert. *)
558
482 Section insert. 559 Section insert.
483 Variable x : nat. 560 Variable x : nat.
561
562 (** Most of the work of insertion is done by a helper function [ins], whose return types are expressed using a type-level function [insResult]. *)
484 563
485 Definition insResult c n := 564 Definition insResult c n :=
486 match c with 565 match c with
487 | Red => rtree n 566 | Red => rtree n
488 | Black => { c' : color & rbtree c' n } 567 | Black => { c' : color & rbtree c' n }
489 end. 568 end.
490 569
491 Fixpoint ins c n (t : rbtree c n) {struct t} : insResult c n := 570 (** That is, inserting into a tree with root color [c] and black depth [n], the variety of tree we get out depends on [c]. If we started with a red root, then we get back a possibly-invalid tree of depth [n]. If we started with a black root, we get back a valid tree of depth [n] with a root node of an arbitary color.
492 match t in (rbtree c n) return (insResult c n) with 571
572 Here is the definition of [ins]. Again, we do not want to dwell on the functional details. *)
573
574 Fixpoint ins c n (t : rbtree c n) : insResult c n :=
575 match t with
493 | Leaf => {< RedNode Leaf x Leaf >} 576 | Leaf => {< RedNode Leaf x Leaf >}
494 | RedNode _ a y b => 577 | RedNode _ a y b =>
495 if le_lt_dec x y 578 if le_lt_dec x y
496 then RedNode' (projT2 (ins a)) y b 579 then RedNode' (projT2 (ins a)) y b
497 else RedNode' a y (projT2 (ins b)) 580 else RedNode' a y (projT2 (ins b))
507 | Red => fun ins_b => balance2 ins_b y a 590 | Red => fun ins_b => balance2 ins_b y a
508 | _ => fun ins_b => {< BlackNode a y (projT2 ins_b) >} 591 | _ => fun ins_b => {< BlackNode a y (projT2 ins_b) >}
509 end (ins b) 592 end (ins b)
510 end. 593 end.
511 594
595 (** The one new trick is a variation of the convoy pattern. In each of the last two pattern matches, we want to take advantage of the typing connection between the trees [a] and [b]. We might naively apply the convoy pattern directly on [a] in the first [match] and on [b] in the second. This satisifies the type checker per se, but it does not satisfy the termination checker. Inside each [match], we would be calling [ins] recursively on a locally-bound variable. The termination checker is not smart enough to trace the dataflow into that variable, so the checker does not know that this recursive argument is smaller than the original argument. We make this fact clearer by applying the convoy pattern on %\textit{%#<i>#the result of a recursive call#</i>#%}%, rather than just on that call's argument.
596
597 Finally, we are in the home stretch of our effort to define [insert]. We just need a few more definitions of non-recursive functions. First, we need to give the final characterization of [insert]'s return type. Inserting into a red-rooted tree gives a black-rooted tree where black depth has increased, and inserting into a black-rooted tree gives a tree where black depth has stayed the same and where the root is an arbitrary color. *)
598
512 Definition insertResult c n := 599 Definition insertResult c n :=
513 match c with 600 match c with
514 | Red => rbtree Black (S n) 601 | Red => rbtree Black (S n)
515 | Black => { c' : color & rbtree c' n } 602 | Black => { c' : color & rbtree c' n }
516 end. 603 end.
517 604
605 (** A simple clean-up procedure translates [insResult]s into [insertResult]s. *)
606
518 Definition makeRbtree c n : insResult c n -> insertResult c n := 607 Definition makeRbtree c n : insResult c n -> insertResult c n :=
519 match c return insResult c n -> insertResult c n with 608 match c with
520 | Red => fun r => 609 | Red => fun r =>
521 match r in rtree n return insertResult Red n with 610 match r with
522 | RedNode' _ _ _ a x b => BlackNode a x b 611 | RedNode' _ _ _ a x b => BlackNode a x b
523 end 612 end
524 | Black => fun r => r 613 | Black => fun r => r
525 end. 614 end.
526 615
616 (** We modify Coq's default choice of implicit arguments for [makeRbtree], so that we do not need to specify the [c] and [n] arguments explicitly in later calls. *)
617
527 Implicit Arguments makeRbtree [c n]. 618 Implicit Arguments makeRbtree [c n].
619
620 (** Finally, we define [insert] as a simple composition of [ins] and [makeRbtree]. *)
528 621
529 Definition insert c n (t : rbtree c n) : insertResult c n := 622 Definition insert c n (t : rbtree c n) : insertResult c n :=
530 makeRbtree (ins t). 623 makeRbtree (ins t).
531 624
625 (** As we noted earlier, the type of [insert] guarantees that it outputs balanced trees whose depths have not increased too much. We also want to know that [insert] operates correctly on trees interpreted as finite sets, so we finish this section with a proof of that fact. *)
626
532 Section present. 627 Section present.
533 Variable z : nat. 628 Variable z : nat.
629
630 (** The variable [z] stands for an arbitrary key. We will reason about [z]'s presence in particular trees. As usual, outside the section the theorems we prove will quantify over all possible keys, giving us the facts we wanted.
631
632 We start by proving the correctness of the balance operations. It is useful to define a custom tactic [present_balance] that encapsulates the reasoning common to the two proofs. We use the keyword [Ltac] to assign a name to a proof script. This particular script just iterates between [crush] and identification of a tree that is being pattern-matched on and should be destructed. *)
534 633
535 Ltac present_balance := 634 Ltac present_balance :=
536 crush; 635 crush;
537 repeat (match goal with 636 repeat (match goal with
538 | [ H : context[match ?T with 637 | [ H : context[match ?T with
545 | RedNode _ _ _ _ => _ 644 | RedNode _ _ _ _ => _
546 | BlackNode _ _ _ _ _ _ => _ 645 | BlackNode _ _ _ _ _ _ => _
547 end] ] => dep_destruct T 646 end] ] => dep_destruct T
548 end; crush). 647 end; crush).
549 648
649 (** The balance correctness theorems are simple first-order logic equivalences, where we use the function [projT2] to project the payload of a [sigT] value. *)
650
550 Lemma present_balance1 : forall n (a : rtree n) (y : nat) c2 (b : rbtree c2 n) , 651 Lemma present_balance1 : forall n (a : rtree n) (y : nat) c2 (b : rbtree c2 n) ,
551 present z (projT2 (balance1 a y b)) 652 present z (projT2 (balance1 a y b))
552 <-> rpresent z a \/ z = y \/ present z b. 653 <-> rpresent z a \/ z = y \/ present z b.
553 destruct a; present_balance. 654 destruct a; present_balance.
554 Qed. 655 Qed.
557 present z (projT2 (balance2 a y b)) 658 present z (projT2 (balance2 a y b))
558 <-> rpresent z a \/ z = y \/ present z b. 659 <-> rpresent z a \/ z = y \/ present z b.
559 destruct a; present_balance. 660 destruct a; present_balance.
560 Qed. 661 Qed.
561 662
663 (** To state the theorem for [ins], it is useful to define a new type-level function, since [ins] returns different result types based on the type indices passed to it. Recall that [x] is the section variable standing for the key we are inserting. *)
664
562 Definition present_insResult c n := 665 Definition present_insResult c n :=
563 match c return (rbtree c n -> insResult c n -> Prop) with 666 match c return (rbtree c n -> insResult c n -> Prop) with
564 | Red => fun t r => rpresent z r <-> z = x \/ present z t 667 | Red => fun t r => rpresent z r <-> z = x \/ present z t
565 | Black => fun t r => present z (projT2 r) <-> z = x \/ present z t 668 | Black => fun t r => present z (projT2 r) <-> z = x \/ present z t
566 end. 669 end.
670
671 (** Now the statement and proof of the [ins] correctness theorem are straightforward, if verbose. We proceed by induction on the structure of a tree, followed by finding case analysis opportunities on expressions we see being analyzed in [if] or [match] expressions. After that, we pattern-match to find opportunities to use the theorems we proved about balancing. Finally, we identify two variables that are asserted by some hypothesis to be equal, and we use that hypothesis to replace one variable with the other everywhere. *)
672
673 (** printing * $*$ *)
567 674
568 Theorem present_ins : forall c n (t : rbtree c n), 675 Theorem present_ins : forall c n (t : rbtree c n),
569 present_insResult t (ins t). 676 present_insResult t (ins t).
570 induction t; crush; 677 induction t; crush;
571 repeat (match goal with 678 repeat (match goal with
572 | [ H : context[if ?E then _ else _] |- _ ] => destruct E 679 | [ H : context[if ?E then _ else _] |- _ ] => destruct E
573 | [ |- context[if ?E then _ else _] ] => destruct E 680 | [ |- context[if ?E then _ else _] ] => destruct E
574 | [ H : context[match ?C with Red => _ | Black => _ end] |- _ ] => destruct C 681 | [ H : context[match ?C with Red => _ | Black => _ end]
682 |- _ ] => destruct C
575 end; crush); 683 end; crush);
576 try match goal with 684 try match goal with
577 | [ H : context[balance1 ?A ?B ?C] |- _ ] => 685 | [ H : context[balance1 ?A ?B ?C] |- _ ] =>
578 generalize (present_balance1 A B C) 686 generalize (present_balance1 A B C)
579 end; 687 end;
587 end; 695 end;
588 try match goal with 696 try match goal with
589 | [ |- context[balance2 ?A ?B ?C] ] => 697 | [ |- context[balance2 ?A ?B ?C] ] =>
590 generalize (present_balance2 A B C) 698 generalize (present_balance2 A B C)
591 end; 699 end;
592 intuition; 700 crush;
593 match goal with 701 match goal with
594 | [ z : nat, x : nat |- _ ] => 702 | [ z : nat, x : nat |- _ ] =>
595 match goal with 703 match goal with
596 | [ H : z = x |- _ ] => rewrite H in *; clear H 704 | [ H : z = x |- _ ] => rewrite H in *; clear H
597 end 705 end
598 end; 706 end;
599 tauto. 707 tauto.
600 Qed. 708 Qed.
601 709
710 (** printing * $\times$ *)
711
712 (** The hard work is done. The most readable way to state correctness of [insert] involves splitting the property into two color-specific theorems. We write a tactic to encapsulate the reasoning steps that work to establish both facts. *)
713
602 Ltac present_insert := 714 Ltac present_insert :=
603 unfold insert; intros n t; inversion t; 715 unfold insert; intros n t; inversion t;
604 generalize (present_ins t); simpl; 716 generalize (present_ins t); simpl;
605 dep_destruct (ins t); tauto. 717 dep_destruct (ins t); tauto.
606 718
646 | Char : forall ch : ascii, 758 | Char : forall ch : ascii,
647 regexp (fun s => s = String ch "") 759 regexp (fun s => s = String ch "")
648 | Concat : forall (P1 P2 : string -> Prop) (r1 : regexp P1) (r2 : regexp P2), 760 | Concat : forall (P1 P2 : string -> Prop) (r1 : regexp P1) (r2 : regexp P2),
649 regexp (fun s => exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2). 761 regexp (fun s => exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2).
650 762
651 [[
652 User error: Large non-propositional inductive types must be in Type 763 User error: Large non-propositional inductive types must be in Type
764
653 ]] 765 ]]
654 766
655 What is a large inductive type? In Coq, it is an inductive type that has a constructor which quantifies over some type of type [Type]. We have not worked with [Type] very much to this point. Every term of CIC has a type, including [Set] and [Prop], which are assigned type [Type]. The type [string -> Prop] from the failed definition also has type [Type]. 767 What is a large inductive type? In Coq, it is an inductive type that has a constructor which quantifies over some type of type [Type]. We have not worked with [Type] very much to this point. Every term of CIC has a type, including [Set] and [Prop], which are assigned type [Type]. The type [string -> Prop] from the failed definition also has type [Type].
656 768
657 It turns out that allowing large inductive types in [Set] leads to contradictions when combined with certain kinds of classical logic reasoning. Thus, by default, such types are ruled out. There is a simple fix for our [regexp] definition, which is to place our new type in [Type]. While fixing the problem, we also expand the list of constructors to cover the remaining regular expression operators. *) 769 It turns out that allowing large inductive types in [Set] leads to contradictions when combined with certain kinds of classical logic reasoning. Thus, by default, such types are ruled out. There is a simple fix for our [regexp] definition, which is to place our new type in [Type]. While fixing the problem, we also expand the list of constructors to cover the remaining regular expression operators. *)
726 Hint Rewrite <- minus_n_O : cpdt. 838 Hint Rewrite <- minus_n_O : cpdt.
727 839
728 induction s1; crush. 840 induction s1; crush.
729 Qed. 841 Qed.
730 842
731 Hint Rewrite substring_app_fst substring_app_snd using (trivial; fail) : cpdt. 843 Hint Rewrite substring_app_fst substring_app_snd using solve [trivial] : cpdt.
732 (* end hide *) 844 (* end hide *)
733 845
734 (** A few auxiliary functions help us in our final matcher definition. The function [split] will be used to implement the regexp concatenation case. *) 846 (** A few auxiliary functions help us in our final matcher definition. The function [split] will be used to implement the regexp concatenation case. *)
735 847
736 Section split. 848 Section split.
737 Variables P1 P2 : string -> Prop. 849 Variables P1 P2 : string -> Prop.
738 Variable P1_dec : forall s, {P1 s} + { ~P1 s}. 850 Variable P1_dec : forall s, {P1 s} + {~ P1 s}.
739 Variable P2_dec : forall s, {P2 s} + { ~P2 s}. 851 Variable P2_dec : forall s, {P2 s} + {~ P2 s}.
740 (** We require a choice of two arbitrary string predicates and functions for deciding them. *) 852 (** We require a choice of two arbitrary string predicates and functions for deciding them. *)
741 853
742 Variable s : string. 854 Variable s : string.
743 (** Our computation will take place relative to a single fixed string, so it is easiest to make it a [Variable], rather than an explicit argument to our functions. *) 855 (** Our computation will take place relative to a single fixed string, so it is easiest to make it a [Variable], rather than an explicit argument to our functions. *)
744 856
745 (** [split'] is the workhorse behind [split]. It searches through the possible ways of splitting [s] into two pieces, checking the two predicates against each such pair. [split'] progresses right-to-left, from splitting all of [s] into the first piece to splitting all of [s] into the second piece. It takes an extra argument, [n], which specifies how far along we are in this search process. *) 857 (** [split'] is the workhorse behind [split]. It searches through the possible ways of splitting [s] into two pieces, checking the two predicates against each such pair. [split'] progresses right-to-left, from splitting all of [s] into the first piece to splitting all of [s] into the second piece. It takes an extra argument, [n], which specifies how far along we are in this search process. *)
746 858
747 Definition split' (n : nat) : n <= length s 859 Definition split' (n : nat) : n <= length s
748 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2} 860 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2}
749 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2}. 861 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~ P1 s1 \/ ~ P2 s2}.
750 refine (fix F (n : nat) : n <= length s 862 refine (fix F (n : nat) : n <= length s
751 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2} 863 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2}
752 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2} := 864 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~ P1 s1 \/ ~ P2 s2} :=
753 match n return n <= length s 865 match n with
754 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2}
755 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2} with
756 | O => fun _ => Reduce (P1_dec "" && P2_dec s) 866 | O => fun _ => Reduce (P1_dec "" && P2_dec s)
757 | S n' => fun _ => (P1_dec (substring 0 (S n') s) 867 | S n' => fun _ => (P1_dec (substring 0 (S n') s)
758 && P2_dec (substring (S n') (length s - S n') s)) 868 && P2_dec (substring (S n') (length s - S n') s))
759 || F n' _ 869 || F n' _
760 end); clear F; crush; eauto 7; 870 end); clear F; crush; eauto 7;
766 Defined. 876 Defined.
767 877
768 (** There is one subtle point in the [split'] code that is worth mentioning. The main body of the function is a [match] on [n]. In the case where [n] is known to be [S n'], we write [S n'] in several places where we might be tempted to write [n]. However, without further work to craft proper [match] annotations, the type-checker does not use the equality between [n] and [S n']. Thus, it is common to see patterns repeated in [match] case bodies in dependently-typed Coq code. We can at least use a [let] expression to avoid copying the pattern more than once, replacing the first case body with: 878 (** There is one subtle point in the [split'] code that is worth mentioning. The main body of the function is a [match] on [n]. In the case where [n] is known to be [S n'], we write [S n'] in several places where we might be tempted to write [n]. However, without further work to craft proper [match] annotations, the type-checker does not use the equality between [n] and [S n']. Thus, it is common to see patterns repeated in [match] case bodies in dependently-typed Coq code. We can at least use a [let] expression to avoid copying the pattern more than once, replacing the first case body with:
769 879
770 [[ 880 [[
771
772 | S n' => fun _ => let n := S n' in 881 | S n' => fun _ => let n := S n' in
773 (P1_dec (substring 0 n s) 882 (P1_dec (substring 0 n s)
774 && P2_dec (substring n (length s - n) s)) 883 && P2_dec (substring n (length s - n) s))
775 || F n' _ 884 || F n' _
885
776 ]] 886 ]]
777 887
778 [split] itself is trivial to implement in terms of [split']. We just ask [split'] to begin its search with [n = length s]. *) 888 [split] itself is trivial to implement in terms of [split']. We just ask [split'] to begin its search with [n = length s]. *)
779 889
780 Definition split : {exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2} 890 Definition split : {exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2}
781 + {forall s1 s2, s = s1 ++ s2 -> ~P1 s1 \/ ~P2 s2}. 891 + {forall s1 s2, s = s1 ++ s2 -> ~ P1 s1 \/ ~ P2 s2}.
782 refine (Reduce (split' (n := length s) _)); crush; eauto. 892 refine (Reduce (split' (n := length s) _)); crush; eauto.
783 Defined. 893 Defined.
784 End split. 894 End split.
785 895
786 Implicit Arguments split [P1 P2]. 896 Implicit Arguments split [P1 P2].
881 991
882 (** One more helper function will come in handy: [dec_star], for implementing another linear search through ways of splitting a string, this time for implementing the Kleene star. *) 992 (** One more helper function will come in handy: [dec_star], for implementing another linear search through ways of splitting a string, this time for implementing the Kleene star. *)
883 993
884 Section dec_star. 994 Section dec_star.
885 Variable P : string -> Prop. 995 Variable P : string -> Prop.
886 Variable P_dec : forall s, {P s} + { ~P s}. 996 Variable P_dec : forall s, {P s} + {~ P s}.
887 997
888 (** Some new lemmas and hints about the [star] type family are useful here. We omit them here; they are included in the book source at this point. *) 998 (** Some new lemmas and hints about the [star] type family are useful here. We omit them here; they are included in the book source at this point. *)
889 999
890 (* begin hide *) 1000 (* begin hide *)
891 Hint Constructors star. 1001 Hint Constructors star.
958 (** [n] is the length of the prefix of [s] that we have already processed. *) 1068 (** [n] is the length of the prefix of [s] that we have already processed. *)
959 1069
960 Variable P' : string -> Prop. 1070 Variable P' : string -> Prop.
961 Variable P'_dec : forall n' : nat, n' > n 1071 Variable P'_dec : forall n' : nat, n' > n
962 -> {P' (substring n' (length s - n') s)} 1072 -> {P' (substring n' (length s - n') s)}
963 + { ~P' (substring n' (length s - n') s)}. 1073 + {~ P' (substring n' (length s - n') s)}.
964 (** When we use [dec_star''], we will instantiate [P'_dec] with a function for continuing the search for more instances of [P] in [s]. *) 1074 (** When we use [dec_star''], we will instantiate [P'_dec] with a function for continuing the search for more instances of [P] in [s]. *)
965 1075
966 (** Now we come to [dec_star''] itself. It takes as an input a natural [l] that records how much of the string has been searched so far, as we did for [split']. The return type expresses that [dec_star''] is looking for an index into [s] that splits [s] into a nonempty prefix and a suffix, such that the prefix satisfies [P] and the suffix satisfies [P']. *) 1076 (** Now we come to [dec_star''] itself. It takes as an input a natural [l] that records how much of the string has been searched so far, as we did for [split']. The return type expresses that [dec_star''] is looking for an index into [s] that splits [s] into a nonempty prefix and a suffix, such that the prefix satisfies [P] and the suffix satisfies [P']. *)
967 1077
968 Definition dec_star'' (l : nat) 1078 Definition dec_star'' (l : nat)
969 : {exists l', S l' <= l 1079 : {exists l', S l' <= l
970 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} 1080 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)}
971 + {forall l', S l' <= l 1081 + {forall l', S l' <= l
972 -> ~P (substring n (S l') s) 1082 -> ~ P (substring n (S l') s)
973 \/ ~P' (substring (n + S l') (length s - (n + S l')) s)}. 1083 \/ ~ P' (substring (n + S l') (length s - (n + S l')) s)}.
974 refine (fix F (l : nat) : {exists l', S l' <= l 1084 refine (fix F (l : nat) : {exists l', S l' <= l
975 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} 1085 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)}
976 + {forall l', S l' <= l 1086 + {forall l', S l' <= l
977 -> ~P (substring n (S l') s) 1087 -> ~ P (substring n (S l') s)
978 \/ ~P' (substring (n + S l') (length s - (n + S l')) s)} := 1088 \/ ~ P' (substring (n + S l') (length s - (n + S l')) s)} :=
979 match l return {exists l', S l' <= l 1089 match l with
980 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)}
981 + {forall l', S l' <= l ->
982 ~P (substring n (S l') s)
983 \/ ~P' (substring (n + S l') (length s - (n + S l')) s)} with
984 | O => _ 1090 | O => _
985 | S l' => 1091 | S l' =>
986 (P_dec (substring n (S l') s) && P'_dec (n' := n + S l') _) 1092 (P_dec (substring n (S l') s) && P'_dec (n' := n + S l') _)
987 || F l' 1093 || F l'
988 end); clear F; crush; eauto 7; 1094 end); clear F; crush; eauto 7;
1012 1118
1013 (** The work of [dec_star''] is nested inside another linear search by [dec_star'], which provides the final functionality we need, but for arbitrary suffixes of [s], rather than just for [s] overall. *) 1119 (** The work of [dec_star''] is nested inside another linear search by [dec_star'], which provides the final functionality we need, but for arbitrary suffixes of [s], rather than just for [s] overall. *)
1014 1120
1015 Definition dec_star' (n n' : nat) : length s - n' <= n 1121 Definition dec_star' (n n' : nat) : length s - n' <= n
1016 -> {star P (substring n' (length s - n') s)} 1122 -> {star P (substring n' (length s - n') s)}
1017 + { ~star P (substring n' (length s - n') s)}. 1123 + {~ star P (substring n' (length s - n') s)}.
1018 refine (fix F (n n' : nat) {struct n} : length s - n' <= n 1124 refine (fix F (n n' : nat) : length s - n' <= n
1019 -> {star P (substring n' (length s - n') s)} 1125 -> {star P (substring n' (length s - n') s)}
1020 + { ~star P (substring n' (length s - n') s)} := 1126 + {~ star P (substring n' (length s - n') s)} :=
1021 match n return length s - n' <= n 1127 match n with
1022 -> {star P (substring n' (length s - n') s)}
1023 + { ~star P (substring n' (length s - n') s)} with
1024 | O => fun _ => Yes 1128 | O => fun _ => Yes
1025 | S n'' => fun _ => 1129 | S n'' => fun _ =>
1026 le_gt_dec (length s) n' 1130 le_gt_dec (length s) n'
1027 || dec_star'' (n := n') (star P) (fun n0 _ => Reduce (F n'' n0 _)) (length s - n') 1131 || dec_star'' (n := n') (star P) (fun n0 _ => Reduce (F n'' n0 _)) (length s - n')
1028 end); clear F; crush; eauto; 1132 end); clear F; crush; eauto;
1035 end. 1139 end.
1036 Defined. 1140 Defined.
1037 1141
1038 (** Finally, we have [dec_star]. It has a straightforward implementation. We introduce a spurious match on [s] so that [simpl] will know to reduce calls to [dec_star]. The heuristic that [simpl] uses is only to unfold identifier definitions when doing so would simplify some [match] expression. *) 1142 (** Finally, we have [dec_star]. It has a straightforward implementation. We introduce a spurious match on [s] so that [simpl] will know to reduce calls to [dec_star]. The heuristic that [simpl] uses is only to unfold identifier definitions when doing so would simplify some [match] expression. *)
1039 1143
1040 Definition dec_star : {star P s} + { ~star P s}. 1144 Definition dec_star : {star P s} + {~ star P s}.
1041 refine (match s return _ with 1145 refine (match s return _ with
1042 | "" => Reduce (dec_star' (n := length s) 0 _) 1146 | "" => Reduce (dec_star' (n := length s) 0 _)
1043 | _ => Reduce (dec_star' (n := length s) 0 _) 1147 | _ => Reduce (dec_star' (n := length s) 0 _)
1044 end); crush. 1148 end); crush.
1045 Defined. 1149 Defined.
1056 Hint Resolve app_cong. 1160 Hint Resolve app_cong.
1057 (* end hide *) 1161 (* end hide *)
1058 1162
1059 (** With these helper functions completed, the implementation of our [matches] function is refreshingly straightforward. We only need one small piece of specific tactic work beyond what [crush] does for us. *) 1163 (** With these helper functions completed, the implementation of our [matches] function is refreshingly straightforward. We only need one small piece of specific tactic work beyond what [crush] does for us. *)
1060 1164
1061 Definition matches P (r : regexp P) s : {P s} + { ~P s}. 1165 Definition matches P (r : regexp P) s : {P s} + {~ P s}.
1062 refine (fix F P (r : regexp P) s : {P s} + { ~P s} := 1166 refine (fix F P (r : regexp P) s : {P s} + {~ P s} :=
1063 match r with 1167 match r with
1064 | Char ch => string_dec s (String ch "") 1168 | Char ch => string_dec s (String ch "")
1065 | Concat _ _ r1 r2 => Reduce (split (F _ r1) (F _ r2) s) 1169 | Concat _ _ r1 r2 => Reduce (split (F _ r1) (F _ r2) s)
1066 | Or _ _ r1 r2 => F _ r1 s || F _ r2 s 1170 | Or _ _ r1 r2 => F _ r1 s || F _ r2 s
1067 | Star _ r => dec_star _ _ _ 1171 | Star _ r => dec_star _ _ _