Mercurial > cpdt > repo
comparison src/MoreDep.v @ 93:a08e82c646a5
Comment regexp matcher
author | Adam Chlipala <adamc@hcoop.net> |
---|---|
date | Tue, 07 Oct 2008 16:50:46 -0400 |
parents | 41392e5acbf5 |
children | affdf00759d8 |
comparison
equal
deleted
inserted
replaced
92:41392e5acbf5 | 93:a08e82c646a5 |
---|---|
344 Qed. | 344 Qed. |
345 | 345 |
346 | 346 |
347 (** * A Certified Regular Expression Matcher *) | 347 (** * A Certified Regular Expression Matcher *) |
348 | 348 |
349 (** Another interesting example is regular expressions with dependent types that express which predicates over strings particular regexps implement. We can then assign a dependent type to a regular expression matching function, guaranteeing that it always decides the string property that we expect it to decide. | |
350 | |
351 Before defining the syntax of expressions, it is helpful to define an inductive type capturing the meaning of the Kleene star. We use Coq's string support, which comes through a combination of the [Strings] library and some parsing notations built into Coq. Operators like [++] and functions like [length] that we know from lists are defined again for strings. Notation scopes help us control which versions we want to use in particular contexts. *) | |
352 | |
349 Require Import Ascii String. | 353 Require Import Ascii String. |
350 Open Scope string_scope. | 354 Open Scope string_scope. |
351 | 355 |
352 Section star. | 356 Section star. |
353 Variable P : string -> Prop. | 357 Variable P : string -> Prop. |
357 | Iter : forall s1 s2, | 361 | Iter : forall s1 s2, |
358 P s1 | 362 P s1 |
359 -> star s2 | 363 -> star s2 |
360 -> star (s1 ++ s2). | 364 -> star (s1 ++ s2). |
361 End star. | 365 End star. |
366 | |
367 (** Now we can make our first attempt at defining a [regexp] type that is indexed by predicates on strings. Here is a reasonable-looking definition that is restricted to constant characters and concatenation. | |
368 | |
369 [[ | |
370 Inductive regexp : (string -> Prop) -> Set := | |
371 | Char : forall ch : ascii, | |
372 regexp (fun s => s = String ch "") | |
373 | Concat : forall (P1 P2 : string -> Prop) (r1 : regexp P1) (r2 : regexp P2), | |
374 regexp (fun s => exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2). | |
375 | |
376 [[ | |
377 User error: Large non-propositional inductive types must be in Type | |
378 ]] | |
379 | |
380 What is a large inductive type? In Coq, it is an inductive type that has a constructor which quantifies over some type of type [Type]. We have not worked with [Type] very much to this point. Every term of CIC has a type, including [Set] and [Prop], which are assigned type [Type]. The type [string -> Prop] from the failed definition also has type [Type]. | |
381 | |
382 It turns out that allowing large inductive types in [Set] leads to contradictions when combined with certain kinds of classical logic reasoning. Thus, by default, such types are ruled out. There is a simple fix for our [regexp] definition, which is to place our new type in [Type]. While fixing the problem, we also expand the list of constructors to cover the remaining regular expression operators. *) | |
362 | 383 |
363 Inductive regexp : (string -> Prop) -> Type := | 384 Inductive regexp : (string -> Prop) -> Type := |
364 | Char : forall ch : ascii, | 385 | Char : forall ch : ascii, |
365 regexp (fun s => s = String ch "") | 386 regexp (fun s => s = String ch "") |
366 | Concat : forall P1 P2 (r1 : regexp P1) (r2 : regexp P2), | 387 | Concat : forall P1 P2 (r1 : regexp P1) (r2 : regexp P2), |
368 | Or : forall P1 P2 (r1 : regexp P1) (r2 : regexp P2), | 389 | Or : forall P1 P2 (r1 : regexp P1) (r2 : regexp P2), |
369 regexp (fun s => P1 s \/ P2 s) | 390 regexp (fun s => P1 s \/ P2 s) |
370 | Star : forall P (r : regexp P), | 391 | Star : forall P (r : regexp P), |
371 regexp (star P). | 392 regexp (star P). |
372 | 393 |
394 (** Many theorems about strings are useful for implementing a certified regexp matcher, and few of them are in the [Strings] library. The book source includes statements, proofs, and hint commands for a handful of such omittted theorems. Since they are orthogonal to our use of dependent types, we hide them in the rendered versions of this book. *) | |
395 | |
396 (* begin hide *) | |
373 Open Scope specif_scope. | 397 Open Scope specif_scope. |
374 | 398 |
375 Lemma length_emp : length "" <= 0. | 399 Lemma length_emp : length "" <= 0. |
376 crush. | 400 crush. |
377 Qed. | 401 Qed. |
395 substring 0 (length s) s = s. | 419 substring 0 (length s) s = s. |
396 induction s; substring. | 420 induction s; substring. |
397 Qed. | 421 Qed. |
398 | 422 |
399 Lemma substring_none : forall s n, | 423 Lemma substring_none : forall s n, |
400 substring n 0 s = EmptyString. | 424 substring n 0 s = "". |
401 induction s; substring. | 425 induction s; substring. |
402 Qed. | 426 Qed. |
403 | 427 |
404 Hint Rewrite substring_all substring_none : cpdt. | 428 Hint Rewrite substring_all substring_none : cpdt. |
405 | 429 |
428 | 452 |
429 induction s1; crush. | 453 induction s1; crush. |
430 Qed. | 454 Qed. |
431 | 455 |
432 Hint Rewrite substring_app_fst substring_app_snd using (trivial; fail) : cpdt. | 456 Hint Rewrite substring_app_fst substring_app_snd using (trivial; fail) : cpdt. |
457 (* end hide *) | |
458 | |
459 (** A few auxiliary functions help us in our final matcher definition. The function [split] will be used to implement the regexp concatenation case. *) | |
433 | 460 |
434 Section split. | 461 Section split. |
435 Variables P1 P2 : string -> Prop. | 462 Variables P1 P2 : string -> Prop. |
436 Variable P1_dec : forall s, {P1 s} + { ~P1 s}. | 463 Variable P1_dec : forall s, {P1 s} + { ~P1 s}. |
437 Variable P2_dec : forall s, {P2 s} + { ~P2 s}. | 464 Variable P2_dec : forall s, {P2 s} + { ~P2 s}. |
465 (** We require a choice of two arbitrary string predicates and functions for deciding them. *) | |
438 | 466 |
439 Variable s : string. | 467 Variable s : string. |
468 (** Our computation will take place relative to a single fixed string, so it is easiest to make it a [Variable], rather than an explicit argument to our functions. *) | |
469 | |
470 (** [split'] is the workhorse behind [split]. It searches through the possible ways of splitting [s] into two pieces, checking the two predicates against each such pair. [split'] progresses right-to-left, from splitting all of [s] into the first piece to splitting all of [s] into the second piece. It takes an extra argument, [n], which specifies how far along we are in this search process. *) | |
440 | 471 |
441 Definition split' (n : nat) : n <= length s | 472 Definition split' (n : nat) : n <= length s |
442 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2} | 473 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2} |
443 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2}. | 474 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2}. |
444 refine (fix F (n : nat) : n <= length s | 475 refine (fix F (n : nat) : n <= length s |
446 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2} := | 477 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2} := |
447 match n return n <= length s | 478 match n return n <= length s |
448 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2} | 479 -> {exists s1, exists s2, length s1 <= n /\ s1 ++ s2 = s /\ P1 s1 /\ P2 s2} |
449 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2} with | 480 + {forall s1 s2, length s1 <= n -> s1 ++ s2 = s -> ~P1 s1 \/ ~P2 s2} with |
450 | O => fun _ => Reduce (P1_dec "" && P2_dec s) | 481 | O => fun _ => Reduce (P1_dec "" && P2_dec s) |
451 | S n' => fun _ => (P1_dec (substring 0 (S n') s) && P2_dec (substring (S n') (length s - S n') s)) | 482 | S n' => fun _ => (P1_dec (substring 0 (S n') s) |
483 && P2_dec (substring (S n') (length s - S n') s)) | |
452 || F n' _ | 484 || F n' _ |
453 end); clear F; crush; eauto 7; | 485 end); clear F; crush; eauto 7; |
454 match goal with | 486 match goal with |
455 | [ _ : length ?S <= 0 |- _ ] => destruct S | 487 | [ _ : length ?S <= 0 |- _ ] => destruct S |
456 | [ _ : length ?S' <= S ?N |- _ ] => | 488 | [ _ : length ?S' <= S ?N |- _ ] => |
457 generalize (eq_nat_dec (length S') (S N)); destruct 1 | 489 generalize (eq_nat_dec (length S') (S N)); destruct 1 |
458 end; crush. | 490 end; crush. |
459 Defined. | 491 Defined. |
460 | 492 |
493 (** There is one subtle point in the [split'] code that is worth mentioning. The main body of the function is a [match] on [n]. In the case where [n] is known to be [S n'], we write [S n'] in several places where we might be tempted to write [n]. However, without further work to craft proper [match] annotations, the type-checker does not use the equality between [n] and [S n']. Thus, it is common to see patterns repeated in [match] case bodies in dependently-typed Coq code. We can at least use a [let] expression to avoid copying the pattern more than once, replacing the first case body with: | |
494 | |
495 [[ | |
496 | |
497 | S n' => fun _ => let n := S n' in | |
498 (P1_dec (substring 0 n s) | |
499 && P2_dec (substring n (length s - n) s)) | |
500 || F n' _ | |
501 ]] | |
502 | |
503 [split] itself is trivial to implement in terms of [split']. We just ask [split'] to begin its search with [n = length s]. *) | |
504 | |
461 Definition split : {exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2} | 505 Definition split : {exists s1, exists s2, s = s1 ++ s2 /\ P1 s1 /\ P2 s2} |
462 + {forall s1 s2, s = s1 ++ s2 -> ~P1 s1 \/ ~P2 s2}. | 506 + {forall s1 s2, s = s1 ++ s2 -> ~P1 s1 \/ ~P2 s2}. |
463 refine (Reduce (split' (n := length s) _)); crush; eauto. | 507 refine (Reduce (split' (n := length s) _)); crush; eauto. |
464 Defined. | 508 Defined. |
465 End split. | 509 End split. |
466 | 510 |
467 Implicit Arguments split [P1 P2]. | 511 Implicit Arguments split [P1 P2]. |
468 | 512 |
513 (* begin hide *) | |
469 Lemma app_empty_end : forall s, s ++ "" = s. | 514 Lemma app_empty_end : forall s, s ++ "" = s. |
470 induction s; crush. | 515 induction s; crush. |
471 Qed. | 516 Qed. |
472 | 517 |
473 Hint Rewrite app_empty_end : cpdt. | 518 Hint Rewrite app_empty_end : cpdt. |
555 Lemma plus_n_Sm' : forall n m : nat, S (n + m) = m + S n. | 600 Lemma plus_n_Sm' : forall n m : nat, S (n + m) = m + S n. |
556 intros; omega. | 601 intros; omega. |
557 Qed. | 602 Qed. |
558 | 603 |
559 Hint Rewrite minus_minus using omega : cpdt. | 604 Hint Rewrite minus_minus using omega : cpdt. |
605 (* end hide *) | |
606 | |
607 (** One more helper function will come in handy: [dec_star], for implementing another linear search through ways of splitting a string, this time for implementing the Kleene star. *) | |
560 | 608 |
561 Section dec_star. | 609 Section dec_star. |
562 Variable P : string -> Prop. | 610 Variable P : string -> Prop. |
563 Variable P_dec : forall s, {P s} + { ~P s}. | 611 Variable P_dec : forall s, {P s} + { ~P s}. |
564 | 612 |
613 (** Some new lemmas and hints about the [star] type family are useful here. We omit them here; they are included in the book source at this point. *) | |
614 | |
615 (* begin hide *) | |
565 Hint Constructors star. | 616 Hint Constructors star. |
566 | 617 |
567 Lemma star_empty : forall s, | 618 Lemma star_empty : forall s, |
568 length s = 0 | 619 length s = 0 |
569 -> star P s. | 620 -> star P s. |
621 intros; | 672 intros; |
622 match goal with | 673 match goal with |
623 | [ H : star _ _ |- _ ] => generalize (star_inv H); do 3 crush; eauto | 674 | [ H : star _ _ |- _ ] => generalize (star_inv H); do 3 crush; eauto |
624 end. | 675 end. |
625 Qed. | 676 Qed. |
677 (* end hide *) | |
678 | |
679 (** The function [dec_star''] implements a single iteration of the star. That is, it tries to find a string prefix matching [P], and it calls a parameter function on the remainder of the string. *) | |
626 | 680 |
627 Section dec_star''. | 681 Section dec_star''. |
628 Variable n : nat. | 682 Variable n : nat. |
683 (** [n] is the length of the prefix of [s] that we have already processed. *) | |
629 | 684 |
630 Variable P' : string -> Prop. | 685 Variable P' : string -> Prop. |
631 Variable P'_dec : forall n' : nat, n' > n | 686 Variable P'_dec : forall n' : nat, n' > n |
632 -> {P' (substring n' (length s - n') s)} | 687 -> {P' (substring n' (length s - n') s)} |
633 + { ~P' (substring n' (length s - n') s)}. | 688 + { ~P' (substring n' (length s - n') s)}. |
689 (** When we use [dec_star''], we will instantiate [P'_dec] with a function for continuing the search for more instances of [P] in [s]. *) | |
690 | |
691 (** Now we come to [dec_star''] itself. It takes as an input a natural [l] that records how much of the string has been searched so far, as we did for [split']. The return type expresses that [dec_star''] is looking for an index into [s] that splits [s] into a nonempty prefix and a suffix, such that the prefix satisfies [P] and the suffix satisfies [P']. *) | |
634 | 692 |
635 Definition dec_star'' (l : nat) | 693 Definition dec_star'' (l : nat) |
636 : {exists l', S l' <= l | 694 : {exists l', S l' <= l |
637 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} | 695 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} |
638 + {forall l', S l' <= l | 696 + {forall l', S l' <= l |
639 -> ~P (substring n (S l') s) \/ ~P' (substring (n + S l') (length s - (n + S l')) s)}. | 697 -> ~P (substring n (S l') s) |
698 \/ ~P' (substring (n + S l') (length s - (n + S l')) s)}. | |
640 refine (fix F (l : nat) : {exists l', S l' <= l | 699 refine (fix F (l : nat) : {exists l', S l' <= l |
641 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} | 700 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} |
642 + {forall l', S l' <= l | 701 + {forall l', S l' <= l |
643 -> ~P (substring n (S l') s) \/ ~P' (substring (n + S l') (length s - (n + S l')) s)} := | 702 -> ~P (substring n (S l') s) |
703 \/ ~P' (substring (n + S l') (length s - (n + S l')) s)} := | |
644 match l return {exists l', S l' <= l | 704 match l return {exists l', S l' <= l |
645 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} | 705 /\ P (substring n (S l') s) /\ P' (substring (n + S l') (length s - (n + S l')) s)} |
646 + {forall l', S l' <= l -> | 706 + {forall l', S l' <= l -> |
647 ~P (substring n (S l') s) \/ ~P' (substring (n + S l') (length s - (n + S l')) s)} with | 707 ~P (substring n (S l') s) |
708 \/ ~P' (substring (n + S l') (length s - (n + S l')) s)} with | |
648 | O => _ | 709 | O => _ |
649 | S l' => | 710 | S l' => |
650 (P_dec (substring n (S l') s) && P'_dec (n' := n + S l') _) | 711 (P_dec (substring n (S l') s) && P'_dec (n' := n + S l') _) |
651 || F l' | 712 || F l' |
652 end); clear F; crush; eauto 7; | 713 end); clear F; crush; eauto 7; |
654 | [ H : ?X <= S ?Y |- _ ] => destruct (eq_nat_dec X (S Y)); crush | 715 | [ H : ?X <= S ?Y |- _ ] => destruct (eq_nat_dec X (S Y)); crush |
655 end. | 716 end. |
656 Defined. | 717 Defined. |
657 End dec_star''. | 718 End dec_star''. |
658 | 719 |
720 (* begin hide *) | |
659 Lemma star_length_contra : forall n, | 721 Lemma star_length_contra : forall n, |
660 length s > n | 722 length s > n |
661 -> n >= length s | 723 -> n >= length s |
662 -> False. | 724 -> False. |
663 crush. | 725 crush. |
669 -> length s - n > 0. | 731 -> length s - n > 0. |
670 crush. | 732 crush. |
671 Qed. | 733 Qed. |
672 | 734 |
673 Hint Resolve star_length_contra star_length_flip substring_suffix_emp. | 735 Hint Resolve star_length_contra star_length_flip substring_suffix_emp. |
674 | 736 (* end hide *) |
737 | |
738 (** The work of [dec_star''] is nested inside another linear search by [dec_star'], which provides the final functionality we need, but for arbitrary suffixes of [s], rather than just for [s] overall. *) | |
739 | |
675 Definition dec_star' (n n' : nat) : length s - n' <= n | 740 Definition dec_star' (n n' : nat) : length s - n' <= n |
676 -> {star P (substring n' (length s - n') s)} | 741 -> {star P (substring n' (length s - n') s)} |
677 + {~star P (substring n' (length s - n') s)}. | 742 + { ~star P (substring n' (length s - n') s)}. |
678 refine (fix F (n n' : nat) {struct n} : length s - n' <= n | 743 refine (fix F (n n' : nat) {struct n} : length s - n' <= n |
679 -> {star P (substring n' (length s - n') s)} | 744 -> {star P (substring n' (length s - n') s)} |
680 + {~star P (substring n' (length s - n') s)} := | 745 + { ~star P (substring n' (length s - n') s)} := |
681 match n return length s - n' <= n | 746 match n return length s - n' <= n |
682 -> {star P (substring n' (length s - n') s)} | 747 -> {star P (substring n' (length s - n') s)} |
683 + {~star P (substring n' (length s - n') s)} with | 748 + { ~star P (substring n' (length s - n') s)} with |
684 | O => fun _ => Yes | 749 | O => fun _ => Yes |
685 | S n'' => fun _ => | 750 | S n'' => fun _ => |
686 le_gt_dec (length s) n' | 751 le_gt_dec (length s) n' |
687 || dec_star'' (n := n') (star P) (fun n0 _ => Reduce (F n'' n0 _)) (length s - n') | 752 || dec_star'' (n := n') (star P) (fun n0 _ => Reduce (F n'' n0 _)) (length s - n') |
688 end); clear F; crush; eauto; | 753 end); clear F; crush; eauto; |
693 | [ H1 : _ < _ - _, H2 : forall l' : nat, _ <= _ - _ -> _ |- _ ] => | 758 | [ H1 : _ < _ - _, H2 : forall l' : nat, _ <= _ - _ -> _ |- _ ] => |
694 generalize (H2 _ (lt_le_S _ _ H1)); tauto | 759 generalize (H2 _ (lt_le_S _ _ H1)); tauto |
695 end. | 760 end. |
696 Defined. | 761 Defined. |
697 | 762 |
763 (** Finally, we have [dec_star]. It has a straightforward implementation. We introduce a spurious match on [s] so that [simpl] will know to reduce calls to [dec_star]. The heuristic that [simpl] uses is only to unfold identifier definitions when doing so would simplify some [match] expression. *) | |
764 | |
698 Definition dec_star : {star P s} + { ~star P s}. | 765 Definition dec_star : {star P s} + { ~star P s}. |
699 refine (match s with | 766 refine (match s with |
700 | "" => Reduce (dec_star' (n := length s) 0 _) | 767 | "" => Reduce (dec_star' (n := length s) 0 _) |
701 | _ => Reduce (dec_star' (n := length s) 0 _) | 768 | _ => Reduce (dec_star' (n := length s) 0 _) |
702 end); crush. | 769 end); crush. |
703 Defined. | 770 Defined. |
704 End dec_star. | 771 End dec_star. |
705 | 772 |
773 (* begin hide *) | |
706 Lemma app_cong : forall x1 y1 x2 y2, | 774 Lemma app_cong : forall x1 y1 x2 y2, |
707 x1 = x2 | 775 x1 = x2 |
708 -> y1 = y2 | 776 -> y1 = y2 |
709 -> x1 ++ y1 = x2 ++ y2. | 777 -> x1 ++ y1 = x2 ++ y2. |
710 congruence. | 778 congruence. |
711 Qed. | 779 Qed. |
712 | 780 |
713 Hint Resolve app_cong. | 781 Hint Resolve app_cong. |
782 (* end hide *) | |
783 | |
784 (** With these helper functions completed, the implementation of our [matches] function is refreshingly straightforward. We only need one small piece of specific tactic work beyond what [crush] does for us. *) | |
714 | 785 |
715 Definition matches P (r : regexp P) s : {P s} + { ~P s}. | 786 Definition matches P (r : regexp P) s : {P s} + { ~P s}. |
716 refine (fix F P (r : regexp P) s : {P s} + { ~P s} := | 787 refine (fix F P (r : regexp P) s : {P s} + { ~P s} := |
717 match r with | 788 match r with |
718 | Char ch => string_dec s (String ch "") | 789 | Char ch => string_dec s (String ch "") |
720 | Or _ _ r1 r2 => F _ r1 s || F _ r2 s | 791 | Or _ _ r1 r2 => F _ r1 s || F _ r2 s |
721 | Star _ r => dec_star _ _ _ | 792 | Star _ r => dec_star _ _ _ |
722 end); crush; | 793 end); crush; |
723 match goal with | 794 match goal with |
724 | [ H : _ |- _ ] => generalize (H _ _ (refl_equal _)) | 795 | [ H : _ |- _ ] => generalize (H _ _ (refl_equal _)) |
725 end; | 796 end; tauto. |
726 tauto. | |
727 Defined. | 797 Defined. |
728 | 798 |
799 (* begin hide *) | |
729 Example hi := Concat (Char "h"%char) (Char "i"%char). | 800 Example hi := Concat (Char "h"%char) (Char "i"%char). |
730 Eval simpl in matches hi "hi". | 801 Eval simpl in matches hi "hi". |
731 Eval simpl in matches hi "bye". | 802 Eval simpl in matches hi "bye". |
732 | 803 |
733 Example a_b := Or (Char "a"%char) (Char "b"%char). | 804 Example a_b := Or (Char "a"%char) (Char "b"%char). |
739 Example a_star := Star (Char "a"%char). | 810 Example a_star := Star (Char "a"%char). |
740 Eval simpl in matches a_star "". | 811 Eval simpl in matches a_star "". |
741 Eval simpl in matches a_star "a". | 812 Eval simpl in matches a_star "a". |
742 Eval simpl in matches a_star "b". | 813 Eval simpl in matches a_star "b". |
743 Eval simpl in matches a_star "aa". | 814 Eval simpl in matches a_star "aa". |
815 (* end hide *) |