Commit 240aaf57 authored by Adam Chlipala's avatar Adam Chlipala

Prose for old part of Firstorder

parent e9410295
...@@ -20,16 +20,24 @@ Set Implicit Arguments. ...@@ -20,16 +20,24 @@ Set Implicit Arguments.
\chapter{First-Order Abstract Syntax}% *) \chapter{First-Order Abstract Syntax}% *)
(** TODO: Prose for this chapter *) (** Many people interested in interactive theorem-proving want to prove theorems about programming languages. That domain also provides a good setting for demonstrating how to apply the ideas from the earlier parts of this book. This part introduces some techniques for encoding the syntax and semantics of programming languages, along with some example proofs designed to be as practical as possible, rather than to illustrate basic Coq technique.
To prove anything about a language, we must first formalize the language's syntax. We have a broad design space to choose from, and it makes sense to start with the simplest options, so-called %\textit{%#<i>#first-order#</i>#%}% syntax encodings that do not use dependent types. These encodings are first-order because they do not use Coq function types in a critical way. In this chapter, we consider the most popular first-order encodings, using each to prove a basic type soundness theorem. *)
(** * Concrete Binding *) (** * Concrete Binding *)
(** The most obvious encoding of the syntax of programming languages follows usual context-free grammars literally. We represent variables as strings and include a variable in our AST definition wherever a variable appears in the informal grammar. Concrete binding turns out to involve a surprisingly large amount of menial bookkeeping, especially when we encode higher-order languages with nested binder scopes. This section's example should give a flavor of what is required. *)
Module Concrete. Module Concrete.
(** We need our variable type and its decidable equality operation. *)
Definition var := string. Definition var := string.
Definition var_eq := string_dec. Definition var_eq := string_dec.
(** We will formalize basic simply-typed lambda calculus. The syntax of expressions and types follows what we would write in a context-free grammar. *)
Inductive exp : Set := Inductive exp : Set :=
| Const : bool -> exp | Const : bool -> exp
| Var : var -> exp | Var : var -> exp
...@@ -40,12 +48,20 @@ Module Concrete. ...@@ -40,12 +48,20 @@ Module Concrete.
| Bool : type | Bool : type
| Arrow : type -> type -> type. | Arrow : type -> type -> type.
(** It is useful to define a syntax extension that lets us write function types in more standard notation. *)
Infix "-->" := Arrow (right associativity, at level 60). Infix "-->" := Arrow (right associativity, at level 60).
(** Now we turn to a typing judgment. We will need to define it in terms of typing contexts, which we represent as lists of pairs of variables and types. *)
Definition ctx := list (var * type). Definition ctx := list (var * type).
(** The definitions of our judgments will be prettier if we write them using mixfix syntax. To define a judgment for looking up the type of a variable in a context, we first %\textit{%#</i>#reserve#</i>#%}% a notation for the judgment. Reserved notations enable mutually-recursive definition of a judgment and its notation; in this sense, the reservation is like a forward declaration in C. *)
Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level). Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
(** Now we define the judgment itself, using a [where] clause to associate a notation definition. *)
Inductive lookup : ctx -> var -> type -> Prop := Inductive lookup : ctx -> var -> type -> Prop :=
| First : forall x t G, | First : forall x t G,
(x, t) :: G |-v x : t (x, t) :: G |-v x : t
...@@ -58,6 +74,8 @@ Module Concrete. ...@@ -58,6 +74,8 @@ Module Concrete.
Hint Constructors lookup. Hint Constructors lookup.
(** The same technique applies to defining the main typing judgment. We use an [at next level] clause to cause the argument [e] of the notation to be parsed at a low enough precedence level. *)
Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level). Reserved Notation "G |-e e : t" (no associativity, at level 90, e at next level).
Inductive hasType : ctx -> exp -> type -> Prop := Inductive hasType : ctx -> exp -> type -> Prop :=
...@@ -78,10 +96,12 @@ Module Concrete. ...@@ -78,10 +96,12 @@ Module Concrete.
Hint Constructors hasType. Hint Constructors hasType.
(** It is useful to know that variable lookup results are unchanged by adding extra bindings to the end of a context. *)
Lemma weaken_lookup : forall x t G' G1, Lemma weaken_lookup : forall x t G' G1,
G1 |-v x : t G1 |-v x : t
-> G1 ++ G' |-v x : t. -> G1 ++ G' |-v x : t.
induction G1 as [ | [x' t'] tl ]; crush; induction G1 as [ | [? ?] ? ]; crush;
match goal with match goal with
| [ H : _ |-v _ : _ |- _ ] => inversion H; crush | [ H : _ |-v _ : _ |- _ ] => inversion H; crush
end. end.
...@@ -89,6 +109,8 @@ Module Concrete. ...@@ -89,6 +109,8 @@ Module Concrete.
Hint Resolve weaken_lookup. Hint Resolve weaken_lookup.
(** The same property extends to the full typing judgment. *)
Theorem weaken_hasType' : forall G' G e t, Theorem weaken_hasType' : forall G' G e t,
G |-e e : t G |-e e : t
-> G ++ G' |-e e : t. -> G ++ G' |-e e : t.
...@@ -104,10 +126,14 @@ Module Concrete. ...@@ -104,10 +126,14 @@ Module Concrete.
Hint Resolve weaken_hasType. Hint Resolve weaken_hasType.
(** Much of the inconvenience of first-order encodings comes from the need to treat capture-avoiding substitution explicitly. We must start by defining a substitution function. *)
Section subst. Section subst.
Variable x : var. Variable x : var.
Variable e1 : exp. Variable e1 : exp.
(** We are substituting expression [e1] for every free occurrence of [x]. Note that this definition is specialized to the case where [e1] is closed; substitution is substantially more complicated otherwise, potentially involving explicit alpha-variation. Luckily, our example of type safety for a call-by-value semantics only requires this restricted variety of substitution. *)
Fixpoint subst (e2 : exp) : exp := Fixpoint subst (e2 : exp) : exp :=
match e2 with match e2 with
| Const b => Const b | Const b => Const b
...@@ -122,16 +148,22 @@ Module Concrete. ...@@ -122,16 +148,22 @@ Module Concrete.
else subst e') else subst e')
end. end.
(** We can prove a few theorems about substitution in well-typed terms, where we assume that [e1] is closed and has type [xt]. *)
Variable xt : type. Variable xt : type.
Hypothesis Ht' : nil |-e e1 : xt. Hypothesis Ht' : nil |-e e1 : xt.
(** It is helpful to establish a notation asserting the freshness of a particular variable in a context. *)
Notation "x # G" := (forall t' : type, In (x, t') G -> False) (no associativity, at level 90). Notation "x # G" := (forall t' : type, In (x, t') G -> False) (no associativity, at level 90).
(** To prove type preservation, we will need lemmas proving consequences of variable lookup proofs. *)
Lemma subst_lookup' : forall x' t, Lemma subst_lookup' : forall x' t,
x <> x' x <> x'
-> forall G1, G1 ++ (x, xt) :: nil |-v x' : t -> forall G1, G1 ++ (x, xt) :: nil |-v x' : t
-> G1 |-v x' : t. -> G1 |-v x' : t.
induction G1 as [ | [x'' t'] tl ]; crush; induction G1 as [ | [? ?] ? ]; crush;
match goal with match goal with
| [ H : _ |-v _ : _ |- _ ] => inversion H | [ H : _ |-v _ : _ |- _ ] => inversion H
end; crush. end; crush.
...@@ -143,7 +175,7 @@ Module Concrete. ...@@ -143,7 +175,7 @@ Module Concrete.
x' # G1 x' # G1
-> G1 ++ (x, xt) :: nil |-v x' : t -> G1 ++ (x, xt) :: nil |-v x' : t
-> t = xt. -> t = xt.
induction G1 as [ | [x'' t'] tl ]; crush; eauto; induction G1 as [ | [? ?] ? ]; crush; eauto;
match goal with match goal with
| [ H : _ |-v _ : _ |- _ ] => inversion H | [ H : _ |-v _ : _ |- _ ] => inversion H
end; crush; (elimtype False; eauto; end; crush; (elimtype False; eauto;
...@@ -157,11 +189,13 @@ Module Concrete. ...@@ -157,11 +189,13 @@ Module Concrete.
Implicit Arguments subst_lookup [x' t G1]. Implicit Arguments subst_lookup [x' t G1].
(** Another set of lemmas allows us to remove provably unused variables from the ends of typing contexts. *)
Lemma shadow_lookup : forall v t t' G1, Lemma shadow_lookup : forall v t t' G1,
G1 |-v x : t' G1 |-v x : t'
-> G1 ++ (x, xt) :: nil |-v v : t -> G1 ++ (x, xt) :: nil |-v v : t
-> G1 |-v v : t. -> G1 |-v v : t.
induction G1 as [ | [x'' t''] tl ]; crush; induction G1 as [ | [? ?] ? ]; crush;
match goal with match goal with
| [ H : nil |-v _ : _ |- _ ] => inversion H | [ H : nil |-v _ : _ |- _ ] => inversion H
| [ H1 : _ |-v _ : _, H2 : _ |-v _ : _ |- _ ] => | [ H1 : _ |-v _ : _, H2 : _ |-v _ : _ |- _ ] =>
...@@ -178,7 +212,7 @@ Module Concrete. ...@@ -178,7 +212,7 @@ Module Concrete.
induction 1; crush; eauto; induction 1; crush; eauto;
match goal with match goal with
| [ H : (?x0, _) :: _ ++ (x, _) :: _ |-e _ : _ |- _ ] => | [ H : (?x0, _) :: _ ++ (?x, _) :: _ |-e _ : _ |- _ ] =>
destruct (var_eq x0 x); subst; eauto destruct (var_eq x0 x); subst; eauto
end. end.
Qed. Qed.
...@@ -192,6 +226,8 @@ Module Concrete. ...@@ -192,6 +226,8 @@ Module Concrete.
Hint Resolve shadow_hasType. Hint Resolve shadow_hasType.
(** Disjointness facts may be extended to larger contexts when the appropriate obligations are met. *)
Lemma disjoint_cons : forall x x' t (G : ctx), Lemma disjoint_cons : forall x x' t (G : ctx),
x # G x # G
-> x' <> x -> x' <> x
...@@ -204,6 +240,8 @@ Module Concrete. ...@@ -204,6 +240,8 @@ Module Concrete.
Hint Resolve disjoint_cons. Hint Resolve disjoint_cons.
(** Finally, we arrive at the main theorem about substitution: it preserves typing. *)
Theorem subst_hasType : forall G e2 t, Theorem subst_hasType : forall G e2 t,
G |-e e2 : t G |-e e2 : t
-> forall G1, G = G1 ++ (x, xt) :: nil -> forall G1, G = G1 ++ (x, xt) :: nil
...@@ -219,6 +257,8 @@ Module Concrete. ...@@ -219,6 +257,8 @@ Module Concrete.
end; crush. end; crush.
Qed. Qed.
(** We wrap the last theorem into an easier-to-apply form specialized to closed expressions. *)
Theorem subst_hasType_closed : forall e2 t, Theorem subst_hasType_closed : forall e2 t,
(x, xt) :: nil |-e e2 : t (x, xt) :: nil |-e e2 : t
-> nil |-e subst e2 : t. -> nil |-e subst e2 : t.
...@@ -228,14 +268,20 @@ Module Concrete. ...@@ -228,14 +268,20 @@ Module Concrete.
Hint Resolve subst_hasType_closed. Hint Resolve subst_hasType_closed.
(** A notation for substitution will make the operational semantics easier to read. *)
Notation "[ x ~> e1 ] e2" := (subst x e1 e2) (no associativity, at level 80). Notation "[ x ~> e1 ] e2" := (subst x e1 e2) (no associativity, at level 80).
(** To define a call-by-value small-step semantics, we rely on a standard judgment characterizing which expressions are values. *)
Inductive val : exp -> Prop := Inductive val : exp -> Prop :=
| VConst : forall b, val (Const b) | VConst : forall b, val (Const b)
| VAbs : forall x e, val (Abs x e). | VAbs : forall x e, val (Abs x e).
Hint Constructors val. Hint Constructors val.
(** Now the step relation is easy to define. *)
Reserved Notation "e1 ==> e2" (no associativity, at level 90). Reserved Notation "e1 ==> e2" (no associativity, at level 90).
Inductive step : exp -> exp -> Prop := Inductive step : exp -> exp -> Prop :=
...@@ -254,6 +300,8 @@ Module Concrete. ...@@ -254,6 +300,8 @@ Module Concrete.
Hint Constructors step. Hint Constructors step.
(** The progress theorem says that any well-typed expression can take a step. To deal with limitations of the [induction] tactic, we put most of the proof in a lemma whose statement uses the usual trick of introducing extra equality hypotheses. *)
Lemma progress' : forall G e t, G |-e e : t Lemma progress' : forall G e t, G |-e e : t
-> G = nil -> G = nil
-> val e \/ exists e', e ==> e'. -> val e \/ exists e', e ==> e'.
...@@ -261,7 +309,7 @@ Module Concrete. ...@@ -261,7 +309,7 @@ Module Concrete.
try match goal with try match goal with
| [ H : _ |-e _ : _ --> _ |- _ ] => inversion H | [ H : _ |-e _ : _ --> _ |- _ ] => inversion H
end; end;
repeat match goal with match goal with
| [ H : _ |- _ ] => solve [ inversion H; crush; eauto ] | [ H : _ |- _ ] => solve [ inversion H; crush; eauto ]
end. end.
Qed. Qed.
...@@ -271,6 +319,8 @@ Module Concrete. ...@@ -271,6 +319,8 @@ Module Concrete.
intros; eapply progress'; eauto. intros; eapply progress'; eauto.
Qed. Qed.
(** A similar pattern works for the preservation theorem, which says that any step of execution preserves an expression's type. *)
Lemma preservation' : forall G e t, G |-e e : t Lemma preservation' : forall G e t, G |-e e : t
-> G = nil -> G = nil
-> forall e', e ==> e' -> forall e', e ==> e'
...@@ -289,9 +339,13 @@ Module Concrete. ...@@ -289,9 +339,13 @@ Module Concrete.
End Concrete. End Concrete.
(** This was a relatively simple example, giving only a taste of the proof burden associated with concrete syntax. We were helped by the fact that, with call-by-value semantics, we only need to reason about substitution in closed expressions. There was also no need to alpha-vary an expression. *)
(** * De Bruijn Indices *) (** * De Bruijn Indices *)
(** De Bruijn indices are much more popular than concrete syntax. This technique provides a %\textit{%#<i>#canonical#</i>#%}% representation of syntax, where any two alpha-equivalent expressions have syntactically equal encodings, removing the need for explicit reasoning about alpha conversion. Variables are represented as natural numbers, where variable [n] denotes a reference to the [n]th closest enclosing binder. Because variable references in effect point to binders, there is no need to label binders, such as function abstraction, with variables. *)
Module DeBruijn. Module DeBruijn.
Definition var := nat. Definition var := nat.
...@@ -309,6 +363,8 @@ Module DeBruijn. ...@@ -309,6 +363,8 @@ Module DeBruijn.
Infix "-->" := Arrow (right associativity, at level 60). Infix "-->" := Arrow (right associativity, at level 60).
(** The definition of typing proceeds much the same as in the last section. Since variables are numbers, contexts can be simple lists of types. This makes it possible to write the lookup judgment without mentioning inequality of variables. *)
Definition ctx := list type. Definition ctx := list type.
Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level). Reserved Notation "G |-v x : t" (no associativity, at level 90, x at next level).
...@@ -342,8 +398,12 @@ Module DeBruijn. ...@@ -342,8 +398,12 @@ Module DeBruijn.
where "G |-e e : t" := (hasType G e t). where "G |-e e : t" := (hasType G e t).
(** In the [hasType] case for function abstraction, there is no need to choose a variable name. We simply push the function domain type onto the context [G]. *)
Hint Constructors hasType. Hint Constructors hasType.
(** We prove roughly the same weakening theorems as before. *)
Lemma weaken_lookup : forall G' v t G, Lemma weaken_lookup : forall G' v t G,
G |-v v : t G |-v v : t
-> G ++ G' |-v v : t. -> G ++ G' |-v v : t.
...@@ -370,6 +430,8 @@ Module DeBruijn. ...@@ -370,6 +430,8 @@ Module DeBruijn.
Section subst. Section subst.
Variable e1 : exp. Variable e1 : exp.
(** Substitution is easier to define than with concrete syntax. While our old definition needed to use two comparisons for equality of variables, the de Bruijn substitution only needs one comparison. *)
Fixpoint subst (x : var) (e2 : exp) : exp := Fixpoint subst (x : var) (e2 : exp) : exp :=
match e2 with match e2 with
| Const b => Const b | Const b => Const b
...@@ -383,6 +445,8 @@ Module DeBruijn. ...@@ -383,6 +445,8 @@ Module DeBruijn.
Variable xt : type. Variable xt : type.
(** We prove similar theorems about inversion of variable lookup. *)
Lemma subst_eq : forall t G1, Lemma subst_eq : forall t G1,
G1 ++ xt :: nil |-v length G1 : t G1 ++ xt :: nil |-v length G1 : t
-> t = xt. -> t = xt.
...@@ -414,6 +478,8 @@ Module DeBruijn. ...@@ -414,6 +478,8 @@ Module DeBruijn.
Hypothesis Ht' : nil |-e e1 : xt. Hypothesis Ht' : nil |-e e1 : xt.
(** The next lemma is included solely to guide [eauto], which will not apply computational equivalences automatically. *)
Lemma hasType_push : forall dom G1 e' ran, Lemma hasType_push : forall dom G1 e' ran,
dom :: G1 |-e subst (length (dom :: G1)) e' : ran dom :: G1 |-e subst (length (dom :: G1)) e' : ran
-> dom :: G1 |-e subst (S (length G1)) e' : ran. -> dom :: G1 |-e subst (S (length G1)) e' : ran.
...@@ -422,6 +488,8 @@ Module DeBruijn. ...@@ -422,6 +488,8 @@ Module DeBruijn.
Hint Resolve hasType_push. Hint Resolve hasType_push.
(** Finally, we are ready for the main theorem about substitution and typing. *)
Theorem subst_hasType : forall G e2 t, Theorem subst_hasType : forall G e2 t,
G |-e e2 : t G |-e e2 : t
-> forall G1, G = G1 ++ xt :: nil -> forall G1, G = G1 ++ xt :: nil
...@@ -445,6 +513,8 @@ Module DeBruijn. ...@@ -445,6 +513,8 @@ Module DeBruijn.
Hint Resolve subst_hasType_closed. Hint Resolve subst_hasType_closed.
(** We define the operational semantics much as before. *)
Notation "[ x ~> e1 ] e2" := (subst e1 x e2) (no associativity, at level 80). Notation "[ x ~> e1 ] e2" := (subst e1 x e2) (no associativity, at level 80).
Inductive val : exp -> Prop := Inductive val : exp -> Prop :=
...@@ -471,6 +541,8 @@ Module DeBruijn. ...@@ -471,6 +541,8 @@ Module DeBruijn.
Hint Constructors step. Hint Constructors step.
(** Since we have added the right hints, the progress and preservation theorem statements and proofs are exactly the same as in the concrete encoding example. *)
Lemma progress' : forall G e t, G |-e e : t Lemma progress' : forall G e t, G |-e e : t
-> G = nil -> G = nil
-> val e \/ exists e', e ==> e'. -> val e \/ exists e', e ==> e'.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment