#lang racket ; file: sexpTranslator.rkt ; author: Robert Keller ; purpose: Illustrate recursive-descent parsing for S-expressions ; with additional translation to list structures for use within Racket. ; This simulate a process similar to what Racket does when it reads ; input. Input comes in as characters, and Rackets interprets the ; punctuation of parentheses and whitespace to create list structures. ; Atoms are translated into symbols. ; For example, the parser reading string ; " ((ab) cd (b)ef)" ; creates the equivalent of ; '((ab)cd(b)ef) )) ; in Racket. ; ; Example of (repl) parsing, one line at a time: ; S exp > ((ab) cd (b)ef) ; parse of " ((ab) cd (b)ef)" succeeded with ((ab) cd (b) ef) ; This is a simple parser for S-expressions based on recursive descent. ; The grammar, with start symbol S, is: ; ; S -> W '(' L W ')' W | W A W S-expressions ; L -> SL | empty List contents ; A -> CA | C Atoms ; C -> 'a' | 'b' | . . . 'z' Characters ; W -> optional whitespace chars, as defined by char-whitespace? (require htdp/testing) (require (lib "trace.ss")) ; Classify terminal alphabet (define left-paren #\( ) (define right-paren #\) ) (define non-parens (string->list "abcdefghijklmnopqrstuvwxyz")) ; Read-Eval-Print loop that calls the top-level parser. (define (repl) (begin (prompt) (let ( (string (read-line)) ; reads one line as a string ) (if (eof-object? string) string (begin (parse string) ; call top-level parse with string (repl) )) ) )) ;; Prompt the user to enter an expression. (define (prompt) (begin (newline) (display "S exp > "))) ; Discriminators for characters ; True iff c is a left parenthesis (define (left-paren? c) (char=? c left-paren)) ; True iff c is a right parenthesis (define (right-paren? c) (char=? c right-paren)) ; True iff c is not either parenthesis (define (non-paren? c) (member c non-parens)) ; Parse function success and failure abstractions: ; ; RUI stands for "Remaining Unparsed Input". ; It will be a list of characters. ; ; The rule argument is used for instrumentation purposes in tracing, ; so that we can see which parse functions succeed or fail. ; Constant symbols for tracing purposes (define success 'success) (define failure 'failure) ; Call succeed to return in the case a parse function succeeds ; rule is a symbol representing the function. ; newRUI is the new value of remaining unparsed input. (define (succeed rule newRUI result) (list success rule newRUI result)) ; Call fail to return in the case a parse function succeeds ; rule is a symbol representing the function. ; newRUI is the new value of remaining unparsed input. (define (fail rule RUI) (list failure rule RUI)) ; Call success? to determine whether a parse function result succeeded. (define (success? result) (equal? success (first result))) ; Call failure? to determine whether a parse function result failed. (define (failure? result) (equal? failure (first result))) ; Call residual to get the remaining unparsed input from a result. (define (residual result) (third result)) ; Call result to get the result, in the case of success (define (result arg) (fourth arg)) ; The top-level parse function. ; Given a string, attempts to parse the string. ; Returns one of these, as a string: ; ; parse of ... succeeded ; parse of ... succeeded but with residual ___ ; parse of ... failed ; ; The string is first converted to a list of characters, ; which is used by the parse functions as their RUI argument. (define (parse string) (let ( (S-result (parse-S (string->list string))) ) (if (success? S-result) (begin (display (string-append "parse of \"" string "\" succeeded with ")) (display (result S-result)) (if (null? (residual S-result)) '() (begin (display " but with residual ") (display (list->string (residual S-result))))) (newline)) (display (string-append "parse of " string " failed"))))) ; Individual parse functions, one for each non-terminal: ; Productions for "Character": C -> a|b|c|...|z (define (parse-C RUI) (cond ((null? RUI) (fail 'C RUI)) ; no more input ((non-paren? (first RUI)) (succeed 'C (rest RUI) (first RUI))) ; one of the desired chars (else (fail 'C RUI)))) ; anything else (check-expect (parse-C (string->list "a" )) (succeed 'C (string->list "" ) #\a)) (check-expect (parse-C (string->list "ab")) (succeed 'C (string->list "b" ) #\a)) (check-expect (parse-C (string->list "(" )) (fail 'C (string->list "(" ))) (check-expect (parse-C (string->list "" )) (fail 'C (string->list "" ))) ; Productions for "Atom": A -> CA | C (define (parse-A RUI) (if (null? RUI) (fail 'A RUI) ; no more input (let ( (C-result (parse-C RUI)) ; try a char ) (if (success? C-result) ; have a char (let* ( (char (result C-result)) (C-residue (residual C-result)) (A-result (parse-A C-residue)) ; recurse ) (if (success? A-result) (succeed 'A (residual A-result) ; case CA (cons char (result A-result))) ; accumulate list (succeed 'A C-residue (list char)))) ; case C (fail 'A RUI))))) ; anything else (check-expect (parse-A (string->list "a" )) (succeed 'A (string->list "" ) '(#\a))) (check-expect (parse-A (string->list "ab" )) (succeed 'A (string->list "" ) '(#\a #\b))) (check-expect (parse-A (string->list "abc" )) (succeed 'A (string->list "" ) '(#\a #\b #\c))) (check-expect (parse-A (string->list "a)" )) (succeed 'A (string->list ")") '(#\a))) (check-expect (parse-A (string->list "a(" )) (succeed 'A (string->list "(") '(#\a))) (check-expect (parse-A (string->list "(" )) (fail 'A (string->list "(" ))) (check-expect (parse-A (string->list ")" )) (fail 'A (string->list ")" ))) (check-expect (parse-A (string->list "" )) (fail 'A (string->list "" ))) ; Productions for "List Content": L -> SL | empty (define (parse-L RUI) (if (null? RUI) (succeed 'L RUI '()) ; no more input, empty case (let ( (S-result (parse-S RUI)) ; try an S ) (if (success? S-result) (let ( (L-result (parse-L (residual S-result))) ) (succeed 'L (residual L-result) (cons (result S-result) (result L-result)))) ; case SL (succeed 'L RUI '()))))) ; empty case (check-expect (parse-L (string->list "" )) (succeed 'L (string->list "" ) '())) (check-expect (parse-L (string->list ")" )) (succeed 'L (string->list ")") '())) (check-expect (parse-L (string->list "(" )) (succeed 'L (string->list "(") '())) ;; The following tests use parse-S implicitly, due to mutual recursion. (check-expect (parse-L (string->list "a" )) (succeed 'L (string->list "" ) '(a))) (check-expect (parse-L (string->list "abc" )) (succeed 'L (string->list "" ) '(abc))) (check-expect (parse-L (string->list "ab(cd)" )) (succeed 'L (string->list "" ) '(ab (cd)))) (check-expect (parse-L (string->list "(ab)cd" )) (succeed 'L (string->list "" ) '((ab) cd))) (check-expect (parse-L (string->list "()()()" )) (succeed 'L (string->list "" ) '(()()()))) (check-expect (parse-L (string->list "()())" )) (succeed 'L (string->list ")" ) '(()()))) (check-expect (parse-L (string->list "(ab)cd" )) (succeed 'L (string->list "" ) '((ab) cd))) ;; The following tests rely on skipping whitespace. (check-expect (parse-L (string->list "(ab) cd" )) (succeed 'L (string->list "" ) '((ab) cd))) (check-expect (parse-L (string->list "(a b) cd" )) (succeed 'L (string->list "" ) '((a b) cd))) (check-expect (parse-L (string->list "(a b) c d" )) (succeed 'L (string->list "" ) '((a b) c d))) ; Productions for "S Expression": S -> W ( L W ) W | W A W ; where W represents optional whitespace (define (parse-S RUI) (let ((RUI (skip-white RUI))) (cond ((null? RUI) (fail 'S RUI)) ; no more input, fail ((left-paren? (first RUI)) (let* ( (L-result (parse-L (skip-white (rest RUI)))) ; have (, try L (residue (skip-white (residual L-result))) ) (if (success? L-result) (if (and (not (null? residue)) (right-paren? (first residue))) ; have (L, try ')' (succeed 'S ; case (L) (skip-white (rest residue)) (result L-result)) (fail 'S RUI)) ; (L, but no ')', fail (fail 'S RUI)))) ; ( but no L, fail (else (let ( (A-result (parse-A RUI)) ; no (, try A ) (if (success? A-result) (succeed 'S (skip-white (residual A-result)) (list->symbol (result A-result))) ; have A, success (fail 'S RUI))))))) ; no A, fail (define (list->symbol L) (string->symbol (list->string L))) ; optional-whitespace parser (define (skip-white RUI) (cond ((null? RUI) '()) ((char-whitespace? (first RUI)) (skip-white (rest RUI))) (else RUI))) (check-expect (parse-S (string->list "a" )) (succeed 'S (string->list "" ) 'a)) (check-expect (parse-S (string->list "abc" )) (succeed 'S (string->list "" ) 'abc)) (check-expect (parse-S (string->list "" )) (fail 'S (string->list "" ))) (check-expect (parse-S (string->list ")" )) (fail 'S (string->list ")"))) (check-expect (parse-S (string->list "(" )) (fail 'S (string->list "("))) ;; The following tests use parse-L implicitly, due to mutual recursion. (check-expect (parse-S (string->list "(a)" )) (succeed 'S (string->list "" ) '(a) )) (check-expect (parse-S (string->list "(ab)" )) (succeed 'S (string->list "" ) '(ab) )) (check-expect (parse-S (string->list "()" )) (succeed 'S (string->list "" ) '() )) (check-expect (parse-S (string->list "(()())" )) (succeed 'S (string->list "" ) '(()()) )) (check-expect (parse-S (string->list "(())" )) (succeed 'S (string->list "" ) '(()) )) (check-expect (parse-S (string->list "(()a)" )) (succeed 'S (string->list "" ) '(()a) )) (check-expect (parse-S (string->list "(a())" )) (succeed 'S (string->list "" ) '(a()) )) (check-expect (parse-S (string->list "((a))" )) (succeed 'S (string->list "" ) '((a)) )) (check-expect (parse-S (string->list "((a)(b))")) (succeed 'S (string->list "" ) '((a)(b)))) (check-expect (parse-S (string->list "((ab)cd(b)ef)"))(succeed 'S (string->list "") '((ab)cd(b)ef))) ;; The following tests rely on skipping whitespace. (check-expect (parse-S (string->list "((ab) cd(b)ef)"))(succeed 'S (string->list "" ) '((ab)cd(b)ef) )) (check-expect (parse-S (string->list "((ab) cd (b)ef)"))(succeed 'S (string->list "" ) '((ab)cd(b)ef) )) (check-expect (parse-S (string->list "( (ab) cd (b)ef)"))(succeed 'S (string->list "" ) '((ab)cd(b)ef) )) (check-expect (parse-S (string->list " ((ab) cd (b)ef)"))(succeed 'S (string->list "") '((ab)cd(b)ef) )) (check-expect (parse-S (string->list "((a b) cd(b)ef)"))(succeed 'S (string->list "" ) '((a b)cd(b)ef) )) (check-expect (parse-S (string->list "((a b) cd (b)ef)"))(succeed 'S (string->list "") '((a b)cd(b)ef) )) (check-expect (parse-S (string->list "((a b) cd (b) ef)"))(succeed 'S (string->list "") '((a b)cd(b)ef) )) (check-expect (parse-S (string->list "((a b) cd (b) ef) "))(succeed 'S (string->list "") '((a b)cd(b)ef) )) (check-expect (parse-S (string->list "((a b) c d (b ) e f) "))(succeed 'S (string->list "") '((a b)c d(b)e f) )) (check-expect (parse-S (string->list " ( (a b) c d ( b ) e f) "))(succeed 'S (string->list "") '((a b)c d(b)e f) )) ; (trace parse-C parse-A parse-L parse-S parse) (generate-report)