2017-08-18 16:57:17 +05:30
|
|
|
(library
|
|
|
|
(regex)
|
2017-08-18 17:51:17 +05:30
|
|
|
(export lit
|
2017-08-18 20:32:43 +05:30
|
|
|
seq
|
2017-08-18 17:51:17 +05:30
|
|
|
alt
|
|
|
|
opt
|
|
|
|
star
|
|
|
|
plus
|
2017-08-19 01:54:38 +05:30
|
|
|
get-thread-count
|
2017-08-18 23:58:07 +05:30
|
|
|
compile-rx)
|
2017-08-18 16:57:17 +05:30
|
|
|
(import (chezscheme)
|
2017-08-18 20:32:43 +05:30
|
|
|
(fmt fmt)
|
2017-08-18 17:51:17 +05:30
|
|
|
(loops)
|
2017-08-18 16:57:17 +05:30
|
|
|
(matchable))
|
|
|
|
|
|
|
|
;; Simple regex library, because it's friday and I'm bored.
|
|
|
|
;; Playing with the ideas in: https://swtch.com/~rsc/regexp/regexp2.html
|
|
|
|
;; which reminded me of reading through the source code to Sam in '93.
|
|
|
|
|
|
|
|
;; Rather than parsing a string we'll use expressions.
|
|
|
|
;; (lit <string>)
|
2017-08-18 20:32:43 +05:30
|
|
|
;; (seq rx1 rx2)
|
2017-08-18 16:57:17 +05:30
|
|
|
;; (alt rx1 rx2)
|
|
|
|
;; (opt rx)
|
|
|
|
;; (star rx)
|
|
|
|
;; (plus rx)
|
|
|
|
;;
|
2017-08-18 17:51:17 +05:30
|
|
|
;; The expressions get compiled into a vector of vm instructions.
|
2017-08-18 16:57:17 +05:30
|
|
|
;; (char c)
|
|
|
|
;; (match)
|
|
|
|
;; (jmp x)
|
|
|
|
;; (split x y)
|
|
|
|
|
|
|
|
;; instructions are closures that manipulate the thread
|
|
|
|
|
|
|
|
;; FIXME: slow
|
2017-08-18 17:51:17 +05:30
|
|
|
(define (append-instr code . i) (append code i))
|
|
|
|
(define (label-instr l) `(label ,l))
|
|
|
|
(define (jmp-instr l) `(jmp ,l))
|
|
|
|
(define (char-instr c) `(char ,c))
|
|
|
|
(define (split-instr l1 l2) `(split ,l1 ,l2))
|
|
|
|
(define (match-instr) '(match))
|
|
|
|
(define (match-instr? instr) (equal? '(match) instr))
|
2017-08-18 16:57:17 +05:30
|
|
|
|
|
|
|
(define (label-code label code)
|
|
|
|
(cons (label-instr label) code))
|
|
|
|
|
|
|
|
;; Compiles to a list of labelled instructions that can later be flattened
|
|
|
|
;; into a linear sequence.
|
2017-08-18 17:51:17 +05:30
|
|
|
(define (lit str)
|
|
|
|
(map char-instr (string->list str)))
|
|
|
|
|
2017-08-18 20:32:43 +05:30
|
|
|
(define (seq rx1 rx2)
|
2017-08-18 17:51:17 +05:30
|
|
|
(append rx1 rx2))
|
|
|
|
|
|
|
|
(define (alt rx1 rx2)
|
|
|
|
(let ((label1 (gensym))
|
|
|
|
(label2 (gensym))
|
|
|
|
(tail (gensym)))
|
|
|
|
(let ((c1 (label-code label1
|
|
|
|
(append-instr rx1 (jmp-instr tail))))
|
|
|
|
(c2 (label-code label2 rx2)))
|
|
|
|
(cons (split-instr label1 label2)
|
|
|
|
(append-instr (append c1 c2) (label-instr tail))))))
|
|
|
|
|
|
|
|
(define (opt rx)
|
|
|
|
(let ((head (gensym))
|
|
|
|
(tail (gensym)))
|
|
|
|
(cons (split-instr head tail)
|
|
|
|
(label-code head
|
|
|
|
(append-instr rx (label-instr tail))))))
|
|
|
|
|
|
|
|
(define (star rx)
|
|
|
|
(let ((head (gensym))
|
2017-08-18 22:31:31 +05:30
|
|
|
(body (gensym))
|
2017-08-18 17:51:17 +05:30
|
|
|
(tail (gensym)))
|
2017-08-18 22:31:31 +05:30
|
|
|
(label-code head
|
|
|
|
(cons (split-instr body tail)
|
|
|
|
(label-code body
|
|
|
|
(append-instr rx
|
|
|
|
(jmp-instr head)
|
|
|
|
(label-instr tail)))))))
|
2017-08-18 17:51:17 +05:30
|
|
|
|
|
|
|
(define (plus rx)
|
|
|
|
(let ((head (gensym))
|
|
|
|
(tail (gensym)))
|
|
|
|
(label-code head
|
|
|
|
(append-instr rx
|
|
|
|
(split-instr head tail)
|
|
|
|
(label-instr tail)))))
|
|
|
|
|
|
|
|
(define (label-locations code)
|
|
|
|
(let ((locs (make-eq-hashtable)))
|
|
|
|
(let loop ((pc 0)
|
|
|
|
(code code))
|
|
|
|
(if (null? code)
|
|
|
|
locs
|
|
|
|
(match (car code)
|
|
|
|
(('label l)
|
|
|
|
(begin
|
|
|
|
(hashtable-set! locs l pc)
|
|
|
|
(loop pc (cdr code))))
|
|
|
|
(instr
|
|
|
|
(loop (+ 1 pc) (cdr code))))))))
|
|
|
|
|
|
|
|
(define (remove-labels code locs)
|
|
|
|
(let loop ((pc 0)
|
|
|
|
(code code)
|
|
|
|
(acc '()))
|
|
|
|
(if (null? code)
|
|
|
|
(reverse acc)
|
|
|
|
(match (car code)
|
|
|
|
(('label l)
|
|
|
|
(loop pc (cdr code) acc))
|
|
|
|
|
|
|
|
(('jmp l)
|
|
|
|
(loop (+ 1 pc) (cdr code)
|
|
|
|
(cons `(jmp ,(hashtable-ref locs l #f)) acc)))
|
|
|
|
|
|
|
|
(('split l1 l2)
|
|
|
|
(loop (+ 1 pc) (cdr code)
|
|
|
|
(cons `(split ,(hashtable-ref locs l1 #f)
|
|
|
|
,(hashtable-ref locs l2 #f))
|
|
|
|
acc)))
|
|
|
|
|
|
|
|
(instr (loop (+ 1 pc) (cdr code) (cons instr acc)))))))
|
|
|
|
|
|
|
|
(define (optimise-jumps! code)
|
|
|
|
(upto (n (vector-length code))
|
|
|
|
(match (vector-ref code n)
|
|
|
|
(('jmp l)
|
|
|
|
(when (match-instr? (vector-ref code l))
|
|
|
|
(vector-set! code n (match-instr))))
|
|
|
|
|
|
|
|
(('split l1 l2)
|
|
|
|
(when (or (match-instr? (vector-ref code l1))
|
|
|
|
(match-instr? (vector-ref code l2)))
|
|
|
|
(vector-set! code n (match-instr))))
|
|
|
|
|
|
|
|
(_ _)))
|
|
|
|
code)
|
|
|
|
|
2017-08-18 23:58:07 +05:30
|
|
|
(define (compile-rx% rx)
|
2017-08-18 17:51:17 +05:30
|
|
|
(let ((rx (append-instr rx (match-instr))))
|
|
|
|
(optimise-jumps!
|
|
|
|
(list->vector
|
|
|
|
(remove-labels rx (label-locations rx))))))
|
2017-08-18 16:57:17 +05:30
|
|
|
|
|
|
|
;; A 'thread' consists of an index into the instructions. A 'bundle' holds
|
|
|
|
;; the current threads. Note there cannot be more threads than instructions,
|
|
|
|
;; so a bundle is represented as a bitvector the same length as the
|
|
|
|
;; instructions. Threads are run in lock step, all taking the same input.
|
2017-08-18 20:32:43 +05:30
|
|
|
|
2017-08-19 01:54:38 +05:30
|
|
|
(define-record-type thread-set (fields (mutable size) (mutable stack) (mutable seen)))
|
2017-08-18 20:32:43 +05:30
|
|
|
|
|
|
|
(define (mk-thread-set count)
|
2017-08-19 01:54:38 +05:30
|
|
|
(make-thread-set 0 (make-vector count) (make-vector count #f)))
|
2017-08-18 20:32:43 +05:30
|
|
|
|
2017-08-18 23:02:05 +05:30
|
|
|
(define (clear-thread-set! ts)
|
2017-08-19 01:54:38 +05:30
|
|
|
(thread-set-size-set! ts 0)
|
2017-08-18 23:02:05 +05:30
|
|
|
(vector-fill! (thread-set-seen ts) #f))
|
|
|
|
|
2017-08-19 01:54:38 +05:30
|
|
|
(define thread-count 0)
|
|
|
|
(define (get-thread-count)
|
|
|
|
thread-count)
|
|
|
|
|
2017-08-18 20:32:43 +05:30
|
|
|
(define (add-thread! ts i)
|
|
|
|
(unless (vector-ref (thread-set-seen ts) i)
|
2017-08-19 01:54:38 +05:30
|
|
|
;(fmt #t (dsp "adding thread ") (num i) nl)
|
|
|
|
(set! thread-count (+ 1 thread-count))
|
2017-08-18 20:32:43 +05:30
|
|
|
(vector-set! (thread-set-seen ts) i #t)
|
2017-08-19 01:54:38 +05:30
|
|
|
(vector-set! (thread-set-stack ts) (thread-set-size ts) i)
|
|
|
|
(thread-set-size-set! ts (+ 1 (thread-set-size ts)))))
|
2017-08-18 20:32:43 +05:30
|
|
|
|
|
|
|
(define (pop-thread! ts)
|
2017-08-19 01:54:38 +05:30
|
|
|
(if (zero? (thread-set-size ts))
|
2017-08-18 20:32:43 +05:30
|
|
|
#f
|
2017-08-19 01:54:38 +05:30
|
|
|
(begin
|
|
|
|
(thread-set-size-set! ts (- (thread-set-size ts) 1))
|
|
|
|
(vector-ref (thread-set-stack ts) (thread-set-size ts)))))
|
2017-08-18 20:32:43 +05:30
|
|
|
|
|
|
|
(define (no-threads? ts)
|
2017-08-19 01:54:38 +05:30
|
|
|
(zero? (thread-set-size ts)))
|
2017-08-18 20:32:43 +05:30
|
|
|
|
|
|
|
(define (any-matches? ts code)
|
|
|
|
(call/cc
|
|
|
|
(lambda (k)
|
|
|
|
(while (i (pop-thread! ts))
|
|
|
|
(if (match-instr? (vector-ref code i))
|
|
|
|
(k #t)))
|
|
|
|
#f)))
|
|
|
|
|
2017-08-18 23:05:40 +05:30
|
|
|
(define-syntax swap
|
|
|
|
(syntax-rules ()
|
|
|
|
((_ x y)
|
|
|
|
(let ((tmp x))
|
|
|
|
(set! x y)
|
|
|
|
(set! y tmp)))))
|
|
|
|
|
2017-08-18 23:58:07 +05:30
|
|
|
(define (compile-rx rx)
|
|
|
|
(let ((code (compile-rx% rx)))
|
2017-08-19 01:54:38 +05:30
|
|
|
;(fmt #t (dsp "running ") (pretty code) nl)
|
2017-08-18 23:58:07 +05:30
|
|
|
(let ((code-len (vector-length code)))
|
|
|
|
(let ((threads (mk-thread-set code-len))
|
|
|
|
(next-threads (mk-thread-set code-len)))
|
|
|
|
|
|
|
|
(define (compile-instr instr)
|
|
|
|
(match instr
|
|
|
|
(('match)
|
2017-08-19 01:21:24 +05:30
|
|
|
(lambda (in-c pc) 'match))
|
2017-08-18 23:58:07 +05:30
|
|
|
|
|
|
|
(('char c)
|
2017-08-19 01:21:24 +05:30
|
|
|
(lambda (in-c pc)
|
2017-08-18 23:58:07 +05:30
|
|
|
(when (char=? c in-c)
|
|
|
|
(add-thread! next-threads (+ 1 pc)))))
|
|
|
|
|
|
|
|
(('jmp l)
|
2017-08-19 01:21:24 +05:30
|
|
|
(lambda (in-c pc)
|
2017-08-18 23:58:07 +05:30
|
|
|
(add-thread! threads l)))
|
|
|
|
|
|
|
|
(('split l1 l2)
|
2017-08-19 01:21:24 +05:30
|
|
|
(lambda (in-c pc)
|
2017-08-18 23:58:07 +05:30
|
|
|
(add-thread! threads l1)
|
|
|
|
(add-thread! threads l2)))))
|
|
|
|
|
|
|
|
;; compile to thunks to avoid calling match in the loop.
|
|
|
|
(let ((code (vector-copy code)))
|
2017-08-19 01:21:24 +05:30
|
|
|
|
|
|
|
(define (step in-c)
|
|
|
|
(let loop ((pc (pop-thread! threads)))
|
|
|
|
(if pc
|
|
|
|
(if (eq? 'match ((vector-ref code pc) in-c pc))
|
|
|
|
'match
|
|
|
|
(loop (pop-thread! threads)))
|
|
|
|
#f)))
|
|
|
|
|
2017-08-18 23:58:07 +05:30
|
|
|
(upto (n code-len)
|
|
|
|
(vector-set! code n (compile-instr (vector-ref code n))))
|
|
|
|
|
|
|
|
(lambda (txt)
|
2017-08-19 01:21:24 +05:30
|
|
|
(add-thread! threads 0)
|
|
|
|
(let ((txt-len (string-length txt)))
|
|
|
|
(let c-loop ((c-index 0))
|
|
|
|
(if (< c-index txt-len)
|
|
|
|
(if (eq? 'match (step (string-ref txt c-index)))
|
|
|
|
#t
|
|
|
|
(if (no-threads? next-threads)
|
|
|
|
#f
|
|
|
|
(begin
|
|
|
|
(swap threads next-threads)
|
|
|
|
(clear-thread-set! next-threads)
|
|
|
|
(c-loop (+ 1 c-index)))))
|
|
|
|
(any-matches? threads code))))))))))
|
2017-08-18 20:32:43 +05:30
|
|
|
|
2017-08-18 16:57:17 +05:30
|
|
|
)
|