#lang racket ; file: sexpParser.rkt ; author: Robert Keller ; purpose: Illustrate recursive-descent parsing for S-expressions ; This is a simple parser for S-expressions based on recursive descent. ; The grammar, with start symbol S, is: ; ; S -> W '(' L W ')' W | W A W S-expressions ; L -> SL | empty List contents ; A -> CA | C Atoms ; C -> 'a' | 'b' | . . . 'z' Characters ; W -> optional whitespace chars, as defined by char-whitespace? (require htdp/testing) (require (lib "trace.ss")) ; Classify terminal alphabet (define left-paren #\( ) (define right-paren #\) ) (define non-parens (string->list "abcdefghijklmnopqrstuvwxyz")) ; Discriminators for characters ; True iff c is a left parenthesis (define (left-paren? c) (char=? c left-paren)) ; True iff c is a right parenthesis (define (right-paren? c) (char=? c right-paren)) ; True iff c is not either parenthesis (define (non-paren? c) (member c non-parens)) ; Parse function success and failure abstractions: ; ; RUI stands for "Remaining Unparsed Input". ; It will be a list of characters. ; ; The rule argument is used for instrumentation purposes in tracing, ; so that we can see which parse functions succeed or fail. ; Constant symbols for tracing purposes (define success 'success) (define failure 'failure) ; Call succeed to return in the case a parse function succeeds ; rule is a symbol representing the function. ; newRUI is the new value of remaining unparsed input. (define (succeed rule newRUI) (list success rule newRUI)) ; Call fail to return in the case a parse function succeeds ; rule is a symbol representing the function. ; newRUI is the new value of remaining unparsed input. (define (fail rule RUI) (list failure rule RUI)) ; Call success? to determine whether a parse function result succeeded. (define (success? result) (equal? success (first result))) ; Call failure? to determine whether a parse function result failed. (define (failure? result) (equal? failure (first result))) ; Call residual to get the remaining unparsed input from a result. (define (residual result) (third result)) ; The top-level parse function. ; Given a string, attempts to parse the string. ; Returns one of these, as a string: ; ; parse of ... succeeded ; parse of ... succeeded but with residual ___ ; parse of ... failed ; ; The string is first converted to a list of characters, ; which is used by the parse functions as their RUI argument. (define (parse string) (let ( (result (parse-S (string->list string))) ) (if (success? result) (if (null? (residual result)) (string-append "parse of " string " succeeded") (string-append "parse of " string " succeeded, but with residual " (list->string (residual result)))) (string-append "parse of " string " failed")))) ; Individual parse functions, one for each non-terminal: ; Productions for "Character": C -> a|b|c|...|z (define (parse-C RUI) (cond ((null? RUI) (fail 'C RUI)) ; no more input ((non-paren? (first RUI)) (succeed 'C (rest RUI))) ; one of the desired chars (else (fail 'C RUI)))) ; anything else (check-expect (parse-C (string->list "a" )) (succeed 'C (string->list "" ))) (check-expect (parse-C (string->list "ab")) (succeed 'C (string->list "b" ))) (check-expect (parse-C (string->list "(" )) (fail 'C (string->list "(" ))) (check-expect (parse-C (string->list "" )) (fail 'C (string->list "" ))) ; Productions for "Atom": A -> CA | C (define (parse-A RUI) (if (null? RUI) (fail 'A RUI) ; no more input (let ( (C-result (parse-C RUI)) ; try a char ) (if (success? C-result) ; have a char (let ( (A-result (parse-A (residual C-result))) ; recurse ) (if (success? A-result) (succeed 'A (residual A-result)) ; case CA (succeed 'A (residual C-result)))) ; case C (fail 'A RUI))))) ; anything else (check-expect (parse-A (string->list "a" )) (succeed 'A (string->list "" ))) (check-expect (parse-A (string->list "ab" )) (succeed 'A (string->list "" ))) (check-expect (parse-A (string->list "abc" )) (succeed 'A (string->list "" ))) (check-expect (parse-A (string->list "a)" )) (succeed 'A (string->list ")" ))) (check-expect (parse-A (string->list "a(" )) (succeed 'A (string->list "(" ))) (check-expect (parse-A (string->list "(" )) (fail 'A (string->list "(" ))) (check-expect (parse-A (string->list ")" )) (fail 'A (string->list ")" ))) (check-expect (parse-A (string->list "" )) (fail 'A (string->list "" ))) ; Productions for "List Content": L -> SL | empty (define (parse-L RUI) (if (null? RUI) (succeed 'L RUI) ; no more input, empty case (let ( (S-result (parse-S RUI)) ; try an S ) (if (success? S-result) (parse-L (residual S-result)) ; case SL (succeed 'L RUI))))) ; empty case (check-expect (parse-L (string->list "" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list ")" )) (succeed 'L (string->list ")" ))) (check-expect (parse-L (string->list "(" )) (succeed 'L (string->list "(" ))) ; The following tests use parse-S implicitly, due to mutual recursion. (check-expect (parse-L (string->list "a" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "abc" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "ab(cd)" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "(ab)cd" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "()()()" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "()())" )) (succeed 'L (string->list ")" ))) (check-expect (parse-L (string->list "(ab)cd" )) (succeed 'L (string->list "" ))) ; The following tests rely on skipping whitespace. (check-expect (parse-L (string->list "(ab) cd" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "(a b) cd" )) (succeed 'L (string->list "" ))) (check-expect (parse-L (string->list "(a b) c d" )) (succeed 'L (string->list "" ))) ; Productions for "S Expression": S -> W ( L W ) W | W A W ; where W represents optional whitespace (define (parse-S RUI) (let ((RUI (skip-white RUI))) (cond ((null? RUI) (fail 'S RUI)) ; no more input, fail ((left-paren? (first RUI)) (let* ( (L-result (parse-L (skip-white (rest RUI)))) ; have (, try L (residue (skip-white (residual L-result))) ) (if (success? L-result) (if (and (not (null? residue)) (right-paren? (first residue))) ; have (L, try ')' (succeed 'S (skip-white (rest residue))) ; case (L) (fail 'S RUI)) ; (L, but no ')', fail (fail 'S RUI)))) ; ( but no L, fail (else (let ( (A-result (parse-A RUI)) ; no (, try A ) (if (success? A-result) (succeed 'S (skip-white (residual A-result))) ; have A, success (fail 'S RUI))))))) ; no A, fail ; optional-whitespace parser (define (skip-white RUI) (cond ((null? RUI) '()) ((char-whitespace? (first RUI)) (skip-white (rest RUI))) (else RUI))) (check-expect (parse-S (string->list "a" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "abc" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "" )) (fail 'S (string->list "" ))) (check-expect (parse-S (string->list ")" )) (fail 'S (string->list ")" ))) (check-expect (parse-S (string->list "(" )) (fail 'S (string->list "(" ))) ; The following tests use parse-L implicitly, due to mutual recursion. (check-expect (parse-S (string->list "(a)" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "(ab)" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "()" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "()()" )) (succeed 'S (string->list "()" ))) (check-expect (parse-S (string->list "(())" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "(()a)" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "(a())" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a))" )) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a)(b))")) (succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((ab)cd(b)ef)"))(succeed 'S (string->list "" ))) ; The following tests rely on skipping whitespace. (check-expect (parse-S (string->list "((ab) cd(b)ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((ab) cd (b)ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "( (ab) cd (b)ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list " ((ab) cd (b)ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a b) cd(b)ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a b) cd (b)ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a b) cd (b) ef)"))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a b) cd (b) ef) "))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list "((a b) c d (b ) e f) "))(succeed 'S (string->list "" ))) (check-expect (parse-S (string->list " ( (a b) c d ( b ) e f) "))(succeed 'S (string->list "" ))) (trace parse-C parse-A parse-L parse-S parse) (generate-report)