; A Scheme function that derives a DFA from a Regular Expression
; based on the derivatives method, originally describe in the paper
; Derivatives of Regular Expressions, Janusz A. Brzozowski, 
; Journal of the ACM, Volume 11, Issue 4, 1964. 

; Program author: Robert M. Keller
; Thanks to Stuart Pernsteiner for finding a bug in derivConct-helper (emp was originally lam).
;
; The syntax of regular expressions is here represented by S expressions, as follows:
;
; A single letter is represented by a symbol which is the letter itself. Below,
; the allowable alphabet is specified, although that aspect can be modified.
;
; The empty-set is represented by 'empty.
;
; The set consisting of the empty string is represented by 'lambda.
;
; The union of two (or more) sets is represented by (+ R S ...) where R and S ... are regular expressions.
;
; The concatenation of two (or more) sets is represented by (^ R S ...) where R and S ... are regular expressions.
;
; The star of a set is represented by (* R) where R is a regular expression.
;
; Examples, one per line:
;    a
;    b
;    empty
;    lambda
;    (+ a b)
;    (^ a b)
;    (* a)
;    (* (+ a b))
;    (+ (* a) (^ b c))
;
; We don't provide much error checking, just to keep the exposition simple.

; This is the assumed alphabet for letters in the regular expressions:

(define alphabet '(0 1 2 3 4 5 6 7 8 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z))
  
(define emp 'empty)     ; Avoid magic strings in the code

(define lam 'lambda)      ; lambda is reserved, so can't be an identifier

(define union-symbol '+)  ; Define the three operator symbols

(define concat-symbol '^)

(define star-symbol '*)

; In the algorithm, the original regular expression and its derivative become the states of the DFA.
; The initial state is the original regular expression.
; Accepting states are those states for which lambda (the empty sequence) is an element of the
; corresponding regular expression. The key parts of the conversion are determining derivatives,
; which are simplified to keep the resulting state set small, checking whether the set represented by
; an expression contains lambda, the empty string, and controlling the whole process, which is done
; by creating new regular expressions depth-first from existing ones. Here we need to check whether
; an expression was already created, and avoid processing it again, to avoid infinite loops.

(load "tester.scm")

; deriv computes the derivative of any regular expression regex
; with respect to a letter.

(define (deriv letter regex)
  (cond
    ((isEmpty  regex) emp)
    ((isLambda regex) emp)
    ((isLetter regex) (derivLetter letter regex))
    ((isUnion  regex) (derivUnion  letter regex))
    ((isConcat regex) (derivConcat letter regex))
    ((isStar   regex) (derivStar   letter regex))
    (else (((error "invalid regular expression " regex))))))

; union forms the union of two sets represented as lists
; Neither list should have any duplicates, and the result is then guaranteed not to.

(define (union x y)
  (if (null? x) 
      y
      (if (member (first x) y)
          (union (rest x) y)
          (cons (first x) (union (rest x) y)))))

; union-all returns the union of a list of lists, provided that none of the
; inner lists has duplicates.

(define (union-all L)
  (foldr union () L))


; The following are discriminator functions used in dispatching the type of expression
; They are not completely thorough in checking the form of the expression.

(define (isEmpty regex)
  (equal? regex emp))

(define (isLambda regex)
  (equal? regex lam))

(define (isLetter regex)
  (member regex alphabet))

(define (isUnion regex)
  (and
   (list? regex)
   (not (null? regex))
   (equal? (first regex) union-symbol)))

(define (isConcat regex)
  (and
   (list? regex)
   (not (null? regex))
   (equal? (first regex) concat-symbol)))

(define (isStar regex)
  (and
   (list? regex)
   (not (null? regex))
   (equal? (first regex) star-symbol)
   (not (null? (rest regex)))
   (null? (rest (rest regex)))))


; The following extract derivatives for various cases

; Derivative of a single letter expression
; If letter is the same as regex, then the derivative is lambda.
; Otherwise the derivative is the empty set.

(define (derivLetter letter regex)
  (if (equal? regex letter)
      lam
      emp))


; Derivative of a union expression
; Map deriv over the arguments, then simplify.

(define (derivUnion letter regex)
  (simplifyUnion (map (lambda(x) (deriv letter x)) (rest regex))))


; simplifyUnion simplifies a union of multiple regular expressions
; The argument is a list of regular expressions of a union, but
; the union symbol itself is assumed not present.
;
; The union is first flattened, so that any sub-unions appear at
; the top level.
; Any empty sets in the union are dropped.
; Any duplicates are dropped.
; If there are any star sub-expressions, then lambdas are dropped.
; If any of the elements of the union is itself a union, then the
; elements of the inner union are moved to the outer level

(define (simplifyUnion L)
  (let* (
        (flat (flattenUnion L))
        (clean (drop-instances emp flat))
        (cleaner (drop-successive-equals (sort clean re<?)))
        (cleaned (if (any-stars clean) (drop-instances lam cleaner) cleaner))
        )
    (if (null? cleaned)
        emp
        (if (null? (rest cleaned))
            (first cleaned)
            (cons union-symbol cleaned)))))

; re<? is used to establish an ordering on regular expressions for
; purpose of sorting
; The ordering is: empty, lambda, letters, unions, concatenation, star
; In the latter three cases, lexicographic ordering is applied to 
; substructure.

(define (re<? R S)
  (let (
        (rr (rank R))
        (ss (rank S))
        )
    (if (< rr ss)
        #t
        (if (< ss rr)
            #f
            (if (isLetter R)
                (atom<? R S)
                (if (or (isUnion R) (isConcat R) (isStar R))
                    (lex<? (rest R) (rest S))
                    #f))))))

(define (rank R)
    (cond
    ((isEmpty  R) 0)
    ((isLambda R) 1)
    ((isLetter R) 2)
    ((isUnion  R) 3)
    ((isConcat R) 4)
    ((isStar   R) 5)
    (else ((error "invalid regular expression " R)))))

(define (lex<? L M)
  (if (null? L) #t
      (if (null? M)
          #f
          (and (re<? (first L) (first M))
               (lex<? (rest L) (rest M))))))

(define (atom<? R S)
  (if (number? R)
      (if (number? S) (< R S) #t)
      (if (number? S)
          #f
          (string<? (symbol->string R) (symbol->string S)))))
     

; Flatten a union so that all sub-unions are at the same level
(define (flattenUnion L)
  (if (null? L)
      ()
      (if (isUnion (first L))
          (flattenUnion (append (rest (first L)) (rest L)))
          (cons (first L) (flattenUnion (rest L))))))

(test (flattenUnion '(+ a (+ b c) (+ (+ d e) f))) '(+ a b c d e f))


; Determine whether or not a list has any star sub-expressions.
; If it does, and this is a list of elements of a union, then
; any lambdas in the union can be eliminated.

(define (any-stars L)
  (if (null? L)
      #f
      (if (isStar (first L))
          #t
          (any-stars (rest L)))))

(test (any-stars '(a b (* c))) #t)
(test (any-stars '(a b c)) #f)


; Drop instances of an element from a list

(define (drop-instances x L)
  (if (null? L)
      ()
      (if (equal? x (first L))
          (drop-instances x (rest L))
          (cons (first L) (drop-instances x (rest L))))))

(test (drop-instances 'c '(a b c d c e f c g)) '(a b d e f g))


; Drop any duplicates that occur one right after the other
; The first argument represents a dummy previous element that
; should be some element not in the list.

(define (drop-successive-equals L)
  (if (null? L)
      ()
      (cons (first L) (drop-successive-equals-helper (first L) (rest L)))))

(define (drop-successive-equals-helper previous L)
  (if (null? L)
      ()
      (if (equal? (first L) previous)
          (drop-successive-equals-helper previous (rest L))
          (cons (first L) (drop-successive-equals-helper (first L) (rest L))))))
          
(test (drop-successive-equals '(a b b c d c e e e)) '(a b c d c e))

(test (derivUnion 1 '(+ 0 1)) lam)


; Derivative of a concatenation expression
; This is the most complex, because we have to distinguish
; between whether the first element "contains lambda" or not.

(define (derivConcat letter regex)
  (derivConcat-helper letter (rest regex))
  )

(define (derivConcat-helper letter subexps)
  (if (null? subexps)
      emp
      (let* (
             (first-deriv (deriv letter (first subexps)))
             (partial-result (simplifyConcat (cons first-deriv (rest subexps))))
             )
        (if (contains-lambda (first subexps))
            (simplifyUnion
             (list 
              (derivConcat-helper letter (rest subexps))
              partial-result
              ))
            partial-result))))

  
; Simplify a concatenation by
; simplifyConcat simplifies a union of concatenation regular expressions
; The argument is a list of regular expressions of a concatenation, but
; the concatenation symbol itself is assumed not present.
; It works by:
;   flattening out any sub-concatenations
;   dropping any lambda's
;   dropping any star sub-expressions that are equal

(define (simplifyConcat L)
  (let ((flat (flattenConcat L)))
    (if (has-instance emp flat)
        emp
        (let* (
               (clean (drop-instances lam flat))
               (cleaned (drop-successive-equal-stars () clean))
               )
          (if (null? cleaned)
              lam
              (if (null? (rest cleaned))
                  (first cleaned)
                  (cons concat-symbol cleaned)))))))


; Flatten a concatenation so that all sub-concatenations are at the same level
(define (flattenConcat L)
  (if (null? L)
      ()
      (if (isConcat (first L))
          (flattenConcat (append (rest (first L)) (rest L)))
          (cons (first L) (flattenConcat (rest L))))))

(test (flattenConcat '(^ a (^ b c) (^ (^ d e) f))) '(^ a b c d e f))


; Determine whether a list has an instance of a specified element

(define (has-instance x L)
  (if (null? L)
      #f
      (if (equal? x (first L))
          #t
          (has-instance x (rest L)))))


; Drop any consecutive star sub-expressions that are equal.

(define (drop-successive-equal-stars previous L)
  (if (null? L)
      ()
      (if (and (isStar (first L)) (equal? (first L) previous))
          (drop-successive-equal-stars previous (rest L))
          (cons (first L) (drop-successive-equal-stars (first L) (rest L))))))
          

; Derivative of a star expression

(define (derivStar letter regex)
  (simplifyConcat (map simplifyStar (list (deriv letter (second regex))  regex))))

; simplifyStar replaces (* (* R)) with (* R), recursively

(define (simplifyStar L)
   (if (isStar L)
       (if (isStar (second L))
           (simplifyStar (second L))
          L)
      L))

; Determine by syntactic analysis whether or not the argument regular expression
; represents a set with lambda as an element

(define (contains-lambda regex)
    (cond
    ((isEmpty  regex) #f)
    ((isLambda regex) #t)
    ((isLetter regex) #f)
    ((isUnion  regex) (foldr (lambda (x y) (or  x y))  #f (map contains-lambda (rest regex))))
    ((isConcat regex) (foldr (lambda (x y) (and x y))  #t (map contains-lambda (rest regex))))
    ((isStar   regex) #t)
    (else ((error "invalid regular expression " regex)))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Make a DFA for a regular expression.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; This uses the derivatives method.
; The states of the generated dfa are regular expressions.
; The initial state is the given regular expression.
; The accepting states are those containing lambda.
; There is a transition from state R to state S via letter whenever
; S is (deriv letter R)
;
; In order for this to function to terminate, simplifications of states need to be made
; in such a way that checking equivalence of the generated regular expressions can be
; done syntactically. Brzozowski's original paper proves this is possible. The simplifications
; of regular expressios described earlier are my attempt to do the necessary syntactic checks.
; I haven't proved that my implementation is complete, so it is possible that this procedure
; will fail to terminate for some regular expressions. For this reason, if absolute safety
; is required, it might be better to use a different method for creating the dfa, such as
; by creating an nfa and converting it to a dfa. It should be easier to establish the
; correctness of the alternate implementation.

(define (re2dfa alphabet regex)
  (re2dfa-helper alphabet (list regex) () ()))


; Make a DFA by creating states and transitions for a backlog of regular expressions.
; accum is the accumulated list of states and transitions. Accumulation is done
; by stacking new rows atop old, so the final result has to be reversed, so that
; the initial state is first. accum is also used to check whether a state has been
; seen before, and if so, avoid reprocessing it.

(define (re2dfa-helper alphabet backlog seen accum)
  (if (null? backlog)
      (reverse accum)                     ; backlog eliminated, return accumulation
      
      (let ((regex (first backlog)))
 
        (if (member regex seen)
            (re2dfa-helper alphabet (rest backlog) seen accum) ; already processed
            
            (let* (                                      ; not processed before
                  (transitions (makeTransitions alphabet regex))
                  (newRow (makeRow regex transitions))
                  (moreStates (filter (lambda (state) (not (member state seen))) (map second transitions)))
                  )
            (re2dfa-helper                                ; process resulting backlog
             alphabet 
             (append (rest backlog) moreStates)
             (cons regex seen)
             (cons newRow accum)))))))


; Make the transitions from a given state, a regular expression,
; by taking derivatives with respect to each letter in the alphabet.

(define (makeTransitions alphabet regex)
  (if (null? alphabet)
      ()
      (let* (
            (letter (first alphabet))
            (deriv1 (deriv letter regex))
            )
        (cons 
             (list letter deriv1)
             (makeTransitions (rest alphabet) regex)))))


; Make a row of the state-transition table, adding an output value with the state
; and including the transitions. The output is 1 (accepting) if the set represented by
; the regular expression corresponding to the state has lambda as an element.

(define (makeRow regex transitions)
  (let ((output (if (contains-lambda regex) 1 0)))
  (cons regex (cons output transitions))))


; Prettify the DFA by replacing regular expression states with successive numbers

(define (prettify dfa)
  (rename-from dfa -1 ()))

; Rename the states of a dfa to be numbers, starting with 1 greater than the given number.

(define (rename-from dfa number alist)
  (if (null? dfa)
      ()
      (let* ((row (first dfa))
             (new-alist-and-number
              (add-to-alist number alist (cons (first row) (map second (rest (rest row)))))) ; augment symbol table
             (new-number (first new-alist-and-number))
             (new-alist (second new-alist-and-number))
             (new-row                                                       ; reconstruct row using augmented table
              (cons
               (second (assoc (first row) new-alist))
               (cons
                (second row)
                (map (lambda(x) (list (first x) (second (assoc (second x) new-alist))))
                     (rest (rest row))))))
             )
        (cons new-row (rename-from (rest dfa) new-number new-alist)))))


; Add a new association (between a state and a number) to an association list.

(define (add-to-alist number alist states)
  (if (null? states)
      (list number alist)
      (let* (
             (state (first states))
             (found (assoc state alist)))
        (if found
            (add-to-alist number alist (rest states))
            (let* (
                   (new-number (+ 1 number))
                   (new-alist (cons (list state new-number) alist))
                   )
            (add-to-alist new-number new-alist (rest states)))))))

; Get the row of a dfa, which contains the output and transitions, given the state name.

(define (get-row state-name dfa)
  (let ((found (assoc state-name dfa)))
    (if found
        found
        (error "named state not found in machine: " state-name dfa))))


; Remove all duplicates from a list of regular expressions

(define (remove-duplicates L)
  (define (remove-duplicates-helper L previous)
    (if (null? L)
        ()
        (if (equal? (first L) previous)
            (remove-duplicates-helper (rest L) previous)
            (cons (first L) (remove-duplicates-helper (rest L) (first L))))))
  (remove-duplicates-helper (sort L re<?) ()))


; Determine whether or not two dfa's are equivalent.
; This is done by determining whether or not their respective state
; are "indistinguishable". Two states are indistinguishable if, for each input sequence,
; they lead to states with the same output. First a composite ("direct union") machine
; is formed, by renaming the states of both machines so as not to conflict. Then a
; distinguishability test is applied to the renamed initial states.

(define (dfa-equivalent dfa1 dfa2)
  (let* (
        (renamed-dfa1 (rename-from dfa1 -1 ()))
        (initial1 (first (first renamed-dfa1)))
        (renamed-dfa2 (rename-from dfa2 (- (length renamed-dfa1) 1) ()))
        (initial2 (first (first renamed-dfa2)))
        (composite (append renamed-dfa1 renamed-dfa2))
        )
    (not (distinguishable initial1 initial2 composite))))
    

; Determine whether two states of a dfa are distinguishable.
; This is done by first partitioning the states into equivalence classes.
; Any pair of states in the same class are indistinguishable.

(define (distinguishable state1 state2 dfa)
 (not (in-same-block state1 state2 (ultimate-refinement dfa))))


; The ultimate partitioning of states into equivalence classes is done by an
; iterative process. First the initial partition is formed, wherein states with
; the same output are in the same block of a partition (so that the number of blocks
; is equal to the number of distinct output values).
   
(define (initial-partition dfa)
  (let (
         (possible-outputs (remove-duplicates (map second dfa)))
         )
       (map (lambda (output) 
              (map first 
                   (filter (lambda (row) (equal? output (second row)))
                           dfa)))
            possible-outputs)
         ))


; A partition of the states of a dfa is refined as follows:
; A necessary condition for two states to be in the same block of a refined partition
; is that the be in the same block of the original partition. Then they are also
; in the same block of the refined partition provided that, for each input symbol,
; their respective next states are in the same block of the original partition.
; Each block of the original partition thus yields one or more blocks of the
; refined partition. The function being mapped below yields the new blocks from
; a given block. The results from each block are appended together to form the new
; partition.

(define (refine partition dfa)
  (mappend (lambda (stateset) 
             (refine-block stateset partition dfa)) 
           partition))


; refine-block refines a single block of a partition into one or more blocks
; by gathering the next states that are in the same block of the original partition.

(define (refine-block block partition dfa)
  (if (null? block)
      ()
      (let* (
            (state (first block))
            (gathered (gather state (rest block) partition dfa))
            )
         (cons 
          (cons state gathered)   ;; need to remove gathered states
          (refine-block (remove-all gathered (rest block)) partition dfa)))))


; Remove elements of L from M
; The elements must occur in the same order

(define (remove-all L M)
  (if (null? L)
      M
      (if (equal? (first L) (first M))
          (remove-all (rest L) (rest M))
          (cons (first M) (remove-all L (rest M))))))


; Gather the states from a block
  
(define (gather state1 block partition dfa)
  (if (null? block)
      ()
      (let* ((state2 (first block))
             (gathered-rest (gather state1 (rest block) partition dfa)))
        (if (next-states-equivalent state1 state2 partition dfa)
            (cons state2 gathered-rest)
            gathered-rest))))
  

; Determine whether or not two states have equivalent next states pairwise,
; for each possible input letter.

(define (next-states-equivalent state1 state2 partition dfa)
  (let (
        (state1-transitions (rest (rest (get-row state1 dfa))))
        (state2-transitions (rest (rest (get-row state2 dfa))))
        )
    (next-states-equivalent-helper state1-transitions state2-transitions partition dfa)))


; Helper function for next-states-equivalent

(define (next-states-equivalent-helper transitions1 transitions2 partition dfa)
  (if (null? transitions1)
      (if (null? transitions2)
          #t
          (error "mism-matched transitions lists" transitions1 transitions2))
      (if (null? transitions2)
          (error "mism-matched transitions lists" transitions1 transitions2)
         (let (
               (transition1 (first transitions1))
               (transition2 (first transitions2))
               )
           (if (equal? (first transition1) (first transition2))
               (and 
                 (in-same-block (second transition1)
                                (second transition2)  partition)
                 (next-states-equivalent-helper (rest transitions1) (rest transitions2) partition dfa))
              (error "mism-matched transitions lists" transitions1 transitions2))))))


; Determine whether or not two states are in the same block of a partition.
; (The reason for the 'if' is that member generally returns a list (of from where the member
; starts to the end of the list), but we only want a truth value.

(define (in-same-block state1 state2 partition)
  (if (member state2 (find-block state1 partition))
      #t
      #f))
  

; Find the block of a partition containing a specified state.

(define (find-block state partition)
  (if (null? partition)
      (error "state not found in partition" state partition)
      (if (member state (first partition))
          (first partition)
          (find-block state (rest partition)))))


; Map a function returning a list over a list, then append the results together.

(define (mappend f L)
  (foldr append () (map f L)))


; Find the ultimate refinement of a given partition of states in a dfa,
; given an initial partition.

(define (ultimate-refinement dfa)
  (ultimate-refinement-helper (initial-partition dfa) dfa))

(define (ultimate-refinement-helper partition dfa)
  (let ((new-partition (refine partition dfa)))
    (if (equal? partition new-partition)
        partition
        (ultimate-refinement-helper new-partition dfa))))


; Determine whether or not two regular expressions are equivalent
; This is done by converting each to a dfa, then testing 
; equivalence of the resulting dfa's.

(define (re-equivalent re1 re2)
  (let ((letters (union (get-letters re1) (get-letters re2))))
    (dfa-equivalent (re2dfa letters re1) (re2dfa letters re2))))


; get the letters used in a regular expression

(define (get-letters regex)
  (get-letters-helper regex ()))

(define (get-letters-helper regex accum)
  (cond
    ((or
      (isEmpty  regex)
      (isLambda regex)) ())
    ((isLetter regex) (if (member regex accum) accum (cons regex accum)))
    ((or
      (isUnion  regex) 
      (isConcat regex)
      (isStar   regex)) (union-all (map get-letters (rest regex))))
    (else (((error "invalid regular expression " regex))))))

(test (get-letters '(+ (* (+ 0 1)) (^ 2 3 0))) '(1 2 3 0))


; Examples just testing derivatives

(test (deriv 0 0) lam)
(test (deriv 0 1) emp)

(test (deriv 0 '(+ 0 1)) lam)
(test (deriv 0 '(+ 1 2)) emp)

(test (deriv 0 '(^ 0 1)) 1)
(test (deriv 1 '(^ 0 1)) emp)

(test (deriv 0 '(^ 0 1 0)) '(^ 1 0))
(test (deriv 1 '(^ 1 1 0 1)) '(^ 1 0 1))

(test (deriv 0 '(+ 0 (^ 0 0) (^ 0 0 0))) '(+ lambda 0 (^ 0 0)))

(test (deriv 1 '(+ 0 (^ 1 0) (^ 1 0 0))) '(+ 0 (^ 0 0)))

(test (deriv 0 '(^ 0 (+ 0 1))) '(+ 0 1))
(test (deriv 0 '(^ 1 (+ 0 1))) emp)
(test (deriv 0 '(^ (+ 0 1) 0)) 0)

(test (deriv 0 '(^ (+ 0 1) (+ 0 1))) '(+ 0 1))

(test (deriv 0 '(* 0)) '(* 0))
(test (deriv 0 '(* (+ 0 1))) '(* (+ 0 1)))

(test (deriv 0 '(* (+ 0 1))) '(* (+ 0 1)))
(test (deriv 1 '(* (+ 0 1))) '(* (+ 0 1)))

(test (deriv 0 '(^ 1 (* (+ 0 1)))) emp)
(test (deriv 1 '(^ 1 (* (+ 0 1)))) '(* (+ 0 1)))

(test (deriv 0 '(^ (* 1) 0)) lam)
(test (deriv 0 '(^ (* 1) 0 0)) 0)

(test (deriv 0 '(* (+ 0 1))) '(* (+ 0 1)))
(test (deriv 1 '(* (+ 0 1))) '(* (+ 0 1)))

(test (deriv 0 '(* (^ 0 1))) '(^ 1 (* (^ 0 1))))
(test (deriv 1 '(* (^ 0 1))) emp)

(test (deriv 0 '(* (^ 0 (+ 0 1)))) '(^ (+ 0 1) (* (^ 0 (+ 0 1)))))

(test (deriv 0 '(* (^ (+ 0 1)(+ 0 1)))) '(^ (+ 0 1) (* (^ (+ 0 1)(+ 0 1)))))
(test (deriv 1 '(* (^ (+ 0 1)(+ 0 1)))) '(^ (+ 0 1) (* (^ (+ 0 1)(+ 0 1)))))


; Examples showing conversion to DFA:

(test (re2dfa '(0 1) '(+ 0 1))               ; (+ 0 1) is the regular expression
      
      '(((+ 0 1) 0 (0 lambda) (1 lambda))   ; There are three states, each a regular expression
        (lambda  1 (0 empty)  (1 empty))    ; Each state has an output (the second element)
        (empty   0 (0 empty)  (1 empty))))  ; and two transitions.

(test (re2dfa '(0 1) '(^ 0 1))
      
      '(((^ 0 1) 0 (0 1)     (1 empty))
        (1       0 (0 empty) (1 lambda))
        (empty   0 (0 empty) (1 empty))
        (lambda  1 (0 empty) (1 empty))))

(test (re2dfa '(0 1) '(* 0))
      
      '(((* 0)   1 (0 (* 0)) (1 empty)) 
        (empty   0 (0 empty) (1 empty))))


; Because the result of a regular-expression to dfa conversion is not necessarily unique,
; we test equivalence in the more complex cases, rather than requiring identity.

(test (dfa-equivalent
      (re2dfa '(0 1) '(^ 0 (* (+ 0 1)) 1))    ; Here we see states other than the initial one
                                              ; being non-trivial regular expressions.
      '(((^ 0 (* (+ 0 1)) 1) 0               ; There are four states in this example.
         (0 (^ (* (+ 0 1)) 1)) 
         (1 empty))
        
        ((^ (* (+ 0 1)) 1)   0 
         (0 (^ (* (+ 0 1)) 1)) 
         (1 (+ (^ (* (+ 0 1)) 1) lambda)))
        
        (empty 0 
         (0 empty)
         (1 empty))
        
        ((+ (^ (* (+ 0 1)) 1) lambda) 1 
         (0 (^ (* (+ 0 1)) 1)) 
         (1 (+ (^ (* (+ 0 1)) 1) lambda)))))
      #t)


(test (dfa-equivalent
      (re2dfa '(0 1) '(^ (+ 0 1) (* (+ 0 (^ 1 1)))))
      
      '(((^ (+ 0 1) (* (+ 0 (^ 1 1)))) 0
         (0 (* (+ 0 (^ 1 1))))
         (1 (* (+ 0 (^ 1 1)))))
        
        ((* (+ 0 (^ 1 1))) 1 
         (0 (* (+ 0 (^ 1 1)))) 
         (1 (^ 1 (* (+ 0 (^ 1 1))))))
        
        ((^ 1 (* (+ 0 (^ 1 1)))) 0 
         (0 empty) 
         (1 (* (+ 0 (^ 1 1)))))
        
        (empty 0 
         (0 empty) 
         (1 empty))))
      #t)

(test (dfa-equivalent
     (re2dfa '(0 1) '(^ 1 (* (+ (^ 0 1) (^ 1 0))) 0 1 0))
      
    '(((^ 1 (* (+ (^ 0 1) (^ 1 0))) 0 1 0) 0
      (0 empty)
      (1 (^ (* (+ (^ 0 1) (^ 1 0))) 0 1 0)))
      
     (empty 0 
      (0 empty) 
      (1 empty))
     
     ((^ (* (+ (^ 0 1) (^ 1 0))) 0 1 0) 0 
       (0 (+ (^ (^ 1 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0) (^ 1 0)))
       (1 (^ (^ 0 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0)))
     
     ((+ (^ (^ 1 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0) (^ 1 0)) 0
      (0 empty) 
      (1 (+ (^ (* (+ (^ 0 1) (^ 1 0))) 0 1 0) 0)))
     
     ((^ (^ 0 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0) 0
      (0 (^ (* (+ (^ 0 1) (^ 1 0))) 0 1 0)) 
      (1 empty))
     
     ((+ (^ (* (+ (^ 0 1) (^ 1 0))) 0 1 0) 0) 0
      (0 (+ (+ (^ (^ 1 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0) (^ 1 0)) lambda))
      (1 (^ (^ 0 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0)))
     
     ((+ (+ (^ (^ 1 (* (+ (^ 0 1) (^ 1 0)))) 0 1 0) (^ 1 0)) lambda) 1
      (0 empty) 
      (1 (+ (^ (* (+ (^ 0 1) (^ 1 0))) 0 1 0) 0)))))
      #t)

(test (re2dfa '(1) '(* (* 1)))
      '(((* (* 1)) 1 (1 (* 1))) 
        ((* 1) 1 (1 (* 1)))))


(define dfa1
      '((0 0 (0 1) (1 1)) 
        (1 1 (0 2) (1 2))
        (2 0 (0 2) (1 2))))

(test (prettify (re2dfa '(0 1) '(+ 0 1))) dfa1)


(define dfa2
      '((0 0 (0 1) (1 2)) 
        (1 0 (0 2) (1 3)) 
        (2 0 (0 2) (1 2)) 
        (3 1 (0 2) (1 2))))
  
(test (prettify (re2dfa '(0 1) '(^ 0 1))) dfa2)


(define dfa3
      '((0 1 (0 0) (1 1))
        (1 0 (0 1) (1 1))))

(test (prettify (re2dfa '(0 1) '(* 0))) dfa3)


(define dfa4
        '((0 0 (0 1) (1 2)) 
         (1 0 (0 1) (1 3))
         (2 0 (0 2) (1 2)) 
         (3 1 (0 1) (1 3))))

(test (prettify (re2dfa '(0 1) '(^ 0 (* (+ 0 1)) 1))) dfa4)


(define dfa5
       '((0 0 (0 1) (1 1)) 
        (1 1 (0 1) (1 2))
        (2 0 (0 3) (1 1)) 
        (3 0 (0 3) (1 3))))

(test (prettify (re2dfa '(0 1) '(^ (+ 0 1) (* (+ 0 (^ 1 1)))))) dfa5)


(define dfa6
      '((0 0 (0 1) (1 2)) 
        (1 0 (0 1) (1 1))
        (2 0 (0 3) (1 4)) 
        (3 0 (0 1) (1 5)) 
        (4 0 (0 2) (1 1)) 
        (5 0 (0 6) (1 4))
        (6 1 (0 1) (1 5))))

(test (prettify (re2dfa '(0 1) '(^ 1 (* (+ (^ 0 1) (^ 1 0))) 0 1 0))) dfa6)


(define dfa7
      '((0 0 (0 2) (1 1)) 
        (1 1 (0 4) (1 2))
        (2 0 (0 3) (1 1)) 
        (3 0 (0 3) (1 3))
        (4 1 (0 1) (1 2))))

(define dfa8
      '((0 0 (0 2) (1 1)) 
        (1 1 (0 1) (1 2))
        (2 0 (0 3) (1 1)) 
        (3 0 (0 3) (1 3))))


(define p1 (initial-partition dfa6))

(test p1 '((0 1 2 3 4 5) (6)))

(test (in-same-block 2 4 p1) #t)
(test (in-same-block 2 6 p1) #f)

(test (refine p1 dfa6) '((0 1 2 3 4) (5) (6)))

(test (ultimate-refinement dfa6) '((0) (1) (4) (2) (3) (5) (6)))

(test (ultimate-refinement dfa7) '((0) (2) (3) (1 4)))

(test (distinguishable 2 4 dfa1) #t)

(test (distinguishable 1 4 dfa7) #f)

(test (dfa-equivalent dfa1 dfa1) #t)

(test (dfa-equivalent dfa1 dfa2) #f)

(test (dfa-equivalent dfa1 dfa3) #f)

(test (dfa-equivalent dfa7 dfa8) #t)

; Tests for regular-expression equivalence

(test (re-equivalent '(* (+ 0 1)) '(* (+ 1 0))) #t)

(test (re-equivalent '(* (^ 0 1)) '(^ (+ 1 0))) #f)

(test (re-equivalent '(* (^ 0 1)) '(* (^ 1 0))) #f)

(test (re-equivalent '(+ lambda (* 1)) '(* 1)) #t)

(test (re-equivalent '(+ 1 (* 1)) '(* 1)) #t)

(test (re-equivalent '(^ 1 (* 1)) '(* 1)) #f)

(test (re-equivalent '(^ 1 (* 1)) '(^ (* 1) 1)) #t)

(test (re-equivalent '(^ empty 1) 'empty) #t)

(test (re-equivalent 'empty '(^ empty 1)) #t)

(test (re-equivalent '(* (^ (+ 0 1) (+ 0 1))) '(* (+ (^ 0 0) (^ 0 1) (^ 1 0) (^ 1 1)))) #t)

(test (re-equivalent '(* (+ 0 1)) '(* (+ 0 1))) #t)

(test (re-equivalent '(* (* 1)) '(* 1)) #t)

(test (re-equivalent '(^ (* 1) (* 1)) '(* 1)) #t)

(test (re-equivalent '(* (^ (* 1) (* 1))) '(* 1)) #t)

(test (re-equivalent '(* (^ (* 0) (* 1))) '(* (+ 0 1))) #t)

(test (re-equivalent '(* (+ lambda 1)) '(* 1)) #t)

(test (dfa-equivalent
       (re2dfa '(0 1) '(^ (* (+ 0 1)) (+ 1 0)))

      '(((^ (* (+ 0 1)) (+ 1 0)) 0
         (0 (+ lambda (^ (* (+ 0 1)) (+ 1 0)))) 
         (1 (+ lambda (^ (* (+ 0 1)) (+ 1 0)))))
        
        ((+ lambda (^ (* (+ 0 1)) (+ 1 0))) 1 
         (0 (+ lambda (^ (* (+ 0 1)) (+ 1 0)))) 
         (1 (+ lambda (^ (* (+ 0 1)) (+ 1 0)))))))
      
      #t)

(test (dfa-equivalent
       (re2dfa '(0 1) '(^ (* (+ 0 1)) (* (+ 1 0))))
       '(((^ (* (+ 0 1)) (* (+ 1 0))) 1 
          (0 (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0)))) 
          (1 (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0)))))
         
        ((+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) 1 
          (0 (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0)))) 
          (1 (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0)))))))
      #t)

(test (dfa-equivalent
       (re2dfa '(0 1) '(* (^ (* (+ 0 1)) (* (+ 1 0)))))

       '(((* (^ (* (+ 0 1)) (* (+ 1 0)))) 1
          (0 (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0)))))) 
          (1 (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0)))))))
         
        ((^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0))))) 1
          (0 (+ lambda (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0)))))))
          (1 (+ lambda (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0))))))))
        
        ((+ lambda (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0)))))) 1
          (0 (+ lambda (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0)))))))
          (1 (+ lambda (^ (+ (^ (* (+ 0 1)) (* (+ 1 0))) (* (+ 1 0))) (* (^ (* (+ 0 1)) (* (+ 1 0))))))))))
      #t)

(test (re-equivalent '(* (^ (* (+ 0 1)) (* (+ 1 0)))) '(* (+ 0 1))) #t)

(test (re-equivalent '(* (+ 0 1)) '(* (+ 1 0))) #t)

(test (re-equivalent '(+ (* 0) (^ (* 0) (* (^ 1 (* 0)))))'(* (+ 0 1))) #t)


; some test cases for dfa2re

(define (dfa2re dfa) emp) ; dummy

(test (dfa2re dfa1) '(+ 0 1))

(test (dfa2re dfa2) '(^ 0 1))

(test (dfa2re dfa3) '(* 0))

; on some of these, dfa2re gives a more complicated result

(test (re-equivalent (dfa2re dfa4) '(^ 0 (* (+ 0 1)) 1)) #t)

(test (re-equivalent (dfa2re dfa5) '(^ (+ 0 1) (* (+ 0 (^ 1 1))))) #t)

(test (re-equivalent (dfa2re dfa6) '(^ 1 (* (+ (^ 0 1) (^ 1 0))) 0 1 0)) #t)

(define multiple-of-3
       '((0 1 (0 0) (1 1))
         (1 0 (0 2) (1 0))
         (2 0 (0 1) (1 2))))
  
(test (re-equivalent (dfa2re multiple-of-3) '(* (+ 0 (^ 1 (* (^ 0 (* 1) 0)) 1)))) #t)

(tester 'show)