summaryrefslogtreecommitdiff
path: root/vim/bundle/slimv/slime/nregex.lisp
diff options
context:
space:
mode:
Diffstat (limited to 'vim/bundle/slimv/slime/nregex.lisp')
-rw-r--r--vim/bundle/slimv/slime/nregex.lisp523
1 files changed, 523 insertions, 0 deletions
diff --git a/vim/bundle/slimv/slime/nregex.lisp b/vim/bundle/slimv/slime/nregex.lisp
new file mode 100644
index 0000000..43586ef
--- /dev/null
+++ b/vim/bundle/slimv/slime/nregex.lisp
@@ -0,0 +1,523 @@
+;;;
+;;; This code was written by:
+;;;
+;;; Lawrence E. Freil <lef@freil.com>
+;;; National Science Center Foundation
+;;; Augusta, Georgia 30909
+;;;
+;;; This program was released into the public domain on 2005-08-31.
+;;; (See the slime-devel mailing list archive for details.)
+;;;
+;;; nregex.lisp - My 4/8/92 attempt at a Lisp based regular expression
+;;; parser.
+;;;
+;;; This regular expression parser operates by taking a
+;;; regular expression and breaking it down into a list
+;;; consisting of lisp expressions and flags. The list
+;;; of lisp expressions is then taken in turned into a
+;;; lambda expression that can be later applied to a
+;;; string argument for parsing.
+;;;;
+;;;; Modifications made 6 March 2001 By Chris Double (chris@double.co.nz)
+;;;; to get working with Corman Lisp 1.42, add package statement and export
+;;;; relevant functions.
+;;;;
+
+(in-package :cl-user)
+
+;; Renamed to slime-nregex avoid name clashes with other versions of
+;; this file. -- he
+
+;;;; CND - 6/3/2001
+(defpackage slime-nregex
+ (:use #:common-lisp)
+ (:export
+ #:regex
+ #:regex-compile
+ ))
+
+;;;; CND - 6/3/2001
+(in-package :slime-nregex)
+
+;;;
+;;; First we create a copy of macros to help debug the beast
+(eval-when (:compile-toplevel :load-toplevel :execute)
+(defvar *regex-debug* nil) ; Set to nil for no debugging code
+)
+
+(defmacro info (message &rest args)
+ (if *regex-debug*
+ `(format *standard-output* ,message ,@args)))
+
+;;;
+;;; Declare the global variables for storing the paren index list.
+;;;
+(defvar *regex-groups* (make-array 10))
+(defvar *regex-groupings* 0)
+
+;;;
+;;; Declare a simple interface for testing. You probably wouldn't want
+;;; to use this interface unless you were just calling this once.
+;;;
+(defun regex (expression string)
+ "Usage: (regex <expression> <string)
+ This function will call regex-compile on the expression and then apply
+ the string to the returned lambda list."
+ (let ((findit (cond ((stringp expression)
+ (regex-compile expression))
+ ((listp expression)
+ expression)))
+ (result nil))
+ (if (not (funcall (if (functionp findit)
+ findit
+ (eval `(function ,findit))) string))
+ (return-from regex nil))
+ (if (= *regex-groupings* 0)
+ (return-from regex t))
+ (dotimes (i *regex-groupings*)
+ (push (funcall 'subseq
+ string
+ (car (aref *regex-groups* i))
+ (cadr (aref *regex-groups* i)))
+ result))
+ (reverse result)))
+
+;;;
+;;; Declare some simple macros to make the code more readable.
+;;;
+(defvar *regex-special-chars* "?*+.()[]\\${}")
+
+(defmacro add-exp (list)
+ "Add an item to the end of expression"
+ `(setf expression (append expression ,list)))
+
+;;;
+;;; Define a function that will take a quoted character and return
+;;; what the real character should be plus how much of the source
+;;; string was used. If the result is a set of characters, return an
+;;; array of bits indicating which characters should be set. If the
+;;; expression is one of the sub-group matches return a
+;;; list-expression that will provide the match.
+;;;
+(defun regex-quoted (char-string &optional (invert nil))
+ "Usage: (regex-quoted <char-string> &optional invert)
+ Returns either the quoted character or a simple bit vector of bits set for
+ the matching values"
+ (let ((first (char char-string 0))
+ (result (char char-string 0))
+ (used-length 1))
+ (cond ((eql first #\n)
+ (setf result #\NewLine))
+ ((eql first #\c)
+ (setf result #\Return))
+ ((eql first #\t)
+ (setf result #\Tab))
+ ((eql first #\d)
+ (setf result #*0000000000000000000000000000000000000000000000001111111111000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
+ ((eql first #\D)
+ (setf result #*1111111111111111111111111111111111111111111111110000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
+ ((eql first #\w)
+ (setf result #*0000000000000000000000000000000000000000000000001111111111000000011111111111111111111111111000010111111111111111111111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
+ ((eql first #\W)
+ (setf result #*1111111111111111111111111111111111111111111111110000000000111111100000000000000000000000000111101000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
+ ((eql first #\b)
+ (setf result #*0000000001000000000000000000000011000000000010100000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
+ ((eql first #\B)
+ (setf result #*1111111110111111111111111111111100111111111101011111111111011111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
+ ((eql first #\s)
+ (setf result #*0000000001100000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
+ ((eql first #\S)
+ (setf result #*1111111110011111111111111111111101111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
+ ((and (>= (char-code first) (char-code #\0))
+ (<= (char-code first) (char-code #\9)))
+ (if (and (> (length char-string) 2)
+ (and (>= (char-code (char char-string 1)) (char-code #\0))
+ (<= (char-code (char char-string 1)) (char-code #\9))
+ (>= (char-code (char char-string 2)) (char-code #\0))
+ (<= (char-code (char char-string 2)) (char-code #\9))))
+ ;;
+ ;; It is a single character specified in octal
+ ;;
+ (progn
+ (setf result (do ((x 0 (1+ x))
+ (return 0))
+ ((= x 2) return)
+ (setf return (+ (* return 8)
+ (- (char-code (char char-string x))
+ (char-code #\0))))))
+ (setf used-length 3))
+ ;;
+ ;; We have a group number replacement.
+ ;;
+ (let ((group (- (char-code first) (char-code #\0))))
+ (setf result `((let ((nstring (subseq string (car (aref *regex-groups* ,group))
+ (cadr (aref *regex-groups* ,group)))))
+ (if (< length (+ index (length nstring)))
+ (return-from compare nil))
+ (if (not (string= string nstring
+ :start1 index
+ :end1 (+ index (length nstring))))
+ (return-from compare nil)
+ (incf index (length nstring)))))))))
+ (t
+ (setf result first)))
+ (if (and (vectorp result) invert)
+ (bit-xor result #*1111111110011111111111111111111101111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 t))
+ (values result used-length)))
+
+;;;
+;;; Now for the main regex compiler routine.
+;;;
+(defun regex-compile (source &key (anchored nil))
+ "Usage: (regex-compile <expression> [ :anchored (t/nil) ])
+ This function take a regular expression (supplied as source) and
+ compiles this into a lambda list that a string argument can then
+ be applied to. It is also possible to compile this lambda list
+ for better performance or to save it as a named function for later
+ use"
+ (info "Now entering regex-compile with \"~A\"~%" source)
+ ;;
+ ;; This routine works in two parts.
+ ;; The first pass take the regular expression and produces a list of
+ ;; operators and lisp expressions for the entire regular expression.
+ ;; The second pass takes this list and produces the lambda expression.
+ (let ((expression '()) ; holder for expressions
+ (group 1) ; Current group index
+ (group-stack nil) ; Stack of current group endings
+ (result nil) ; holder for built expression.
+ (fast-first nil)) ; holder for quick unanchored scan
+ ;;
+ ;; If the expression was an empty string then it alway
+ ;; matches (so lets leave early)
+ ;;
+ (if (= (length source) 0)
+ (return-from regex-compile
+ '(lambda (&rest args)
+ (declare (ignore args))
+ t)))
+ ;;
+ ;; If the first character is a caret then set the anchored
+ ;; flags and remove if from the expression string.
+ ;;
+ (cond ((eql (char source 0) #\^)
+ (setf source (subseq source 1))
+ (setf anchored t)))
+ ;;
+ ;; If the first sequence is .* then also set the anchored flags.
+ ;; (This is purely for optimization, it will work without this).
+ ;;
+ (if (>= (length source) 2)
+ (if (string= source ".*" :start1 0 :end1 2)
+ (setf anchored t)))
+ ;;
+ ;; Also, If this is not an anchored search and the first character is
+ ;; a literal, then do a quick scan to see if it is even in the string.
+ ;; If not then we can issue a quick nil,
+ ;; otherwise we can start the search at the matching character to skip
+ ;; the checks of the non-matching characters anyway.
+ ;;
+ ;; If I really wanted to speed up this section of code it would be
+ ;; easy to recognize the case of a fairly long multi-character literal
+ ;; and generate a Boyer-Moore search for the entire literal.
+ ;;
+ ;; I generate the code to do a loop because on CMU Lisp this is about
+ ;; twice as fast a calling position.
+ ;;
+ (if (and (not anchored)
+ (not (position (char source 0) *regex-special-chars*))
+ (not (and (> (length source) 1)
+ (position (char source 1) *regex-special-chars*))))
+ (setf fast-first `((if (not (dotimes (i length nil)
+ (if (eql (char string i)
+ ,(char source 0))
+ (return (setf start i)))))
+ (return-from final-return nil)))))
+ ;;
+ ;; Generate the very first expression to save the starting index
+ ;; so that group 0 will be the entire string matched always
+ ;;
+ (add-exp '((setf (aref *regex-groups* 0)
+ (list index nil))))
+ ;;
+ ;; Loop over each character in the regular expression building the
+ ;; expression list as we go.
+ ;;
+ (do ((eindex 0 (1+ eindex)))
+ ((= eindex (length source)))
+ (let ((current (char source eindex)))
+ (info "Now processing character ~A index = ~A~%" current eindex)
+ (case current
+ ((#\.)
+ ;;
+ ;; Generate code for a single wild character
+ ;;
+ (add-exp '((if (>= index length)
+ (return-from compare nil)
+ (incf index)))))
+ ((#\$)
+ ;;
+ ;; If this is the last character of the expression then
+ ;; anchor the end of the expression, otherwise let it slide
+ ;; as a standard character (even though it should be quoted).
+ ;;
+ (if (= eindex (1- (length source)))
+ (add-exp '((if (not (= index length))
+ (return-from compare nil))))
+ (add-exp '((if (not (and (< index length)
+ (eql (char string index) #\$)))
+ (return-from compare nil)
+ (incf index))))))
+ ((#\*)
+ (add-exp '(ASTRISK)))
+
+ ((#\+)
+ (add-exp '(PLUS)))
+
+ ((#\?)
+ (add-exp '(QUESTION)))
+
+ ((#\()
+ ;;
+ ;; Start a grouping.
+ ;;
+ (incf group)
+ (push group group-stack)
+ (add-exp `((setf (aref *regex-groups* ,(1- group))
+ (list index nil))))
+ (add-exp `(,group)))
+ ((#\))
+ ;;
+ ;; End a grouping
+ ;;
+ (let ((group (pop group-stack)))
+ (add-exp `((setf (cadr (aref *regex-groups* ,(1- group)))
+ index)))
+ (add-exp `(,(- group)))))
+ ((#\[)
+ ;;
+ ;; Start of a range operation.
+ ;; Generate a bit-vector that has one bit per possible character
+ ;; and then on each character or range, set the possible bits.
+ ;;
+ ;; If the first character is carat then invert the set.
+ (let* ((invert (eql (char source (1+ eindex)) #\^))
+ (bitstring (make-array 256 :element-type 'bit
+ :initial-element
+ (if invert 1 0)))
+ (set-char (if invert 0 1)))
+ (if invert (incf eindex))
+ (do ((x (1+ eindex) (1+ x)))
+ ((eql (char source x) #\]) (setf eindex x))
+ (info "Building range with character ~A~%" (char source x))
+ (cond ((and (eql (char source (1+ x)) #\-)
+ (not (eql (char source (+ x 2)) #\])))
+ (if (>= (char-code (char source x))
+ (char-code (char source (+ 2 x))))
+ (error "Invalid range \"~A-~A\". Ranges must be in acending order"
+ (char source x) (char source (+ 2 x))))
+ (do ((j (char-code (char source x)) (1+ j)))
+ ((> j (char-code (char source (+ 2 x))))
+ (incf x 2))
+ (info "Setting bit for char ~A code ~A~%" (code-char j) j)
+ (setf (sbit bitstring j) set-char)))
+ (t
+ (cond ((not (eql (char source x) #\]))
+ (let ((char (char source x)))
+ ;;
+ ;; If the character is quoted then find out what
+ ;; it should have been
+ ;;
+ (if (eql (char source x) #\\ )
+ (let ((length))
+ (multiple-value-setq (char length)
+ (regex-quoted (subseq source x) invert))
+ (incf x length)))
+ (info "Setting bit for char ~A code ~A~%" char (char-code char))
+ (if (not (vectorp char))
+ (setf (sbit bitstring (char-code (char source x))) set-char)
+ (bit-ior bitstring char t))))))))
+ (add-exp `((let ((range ,bitstring))
+ (if (>= index length)
+ (return-from compare nil))
+ (if (= 1 (sbit range (char-code (char string index))))
+ (incf index)
+ (return-from compare nil)))))))
+ ((#\\ )
+ ;;
+ ;; Intreprete the next character as a special, range, octal, group or
+ ;; just the character itself.
+ ;;
+ (let ((length)
+ (value))
+ (multiple-value-setq (value length)
+ (regex-quoted (subseq source (1+ eindex)) nil))
+ (cond ((listp value)
+ (add-exp value))
+ ((characterp value)
+ (add-exp `((if (not (and (< index length)
+ (eql (char string index)
+ ,value)))
+ (return-from compare nil)
+ (incf index)))))
+ ((vectorp value)
+ (add-exp `((let ((range ,value))
+ (if (>= index length)
+ (return-from compare nil))
+ (if (= 1 (sbit range (char-code (char string index))))
+ (incf index)
+ (return-from compare nil)))))))
+ (incf eindex length)))
+ (t
+ ;;
+ ;; We have a literal character.
+ ;; Scan to see how many we have and if it is more than one
+ ;; generate a string= verses as single eql.
+ ;;
+ (let* ((lit "")
+ (term (dotimes (litindex (- (length source) eindex) nil)
+ (let ((litchar (char source (+ eindex litindex))))
+ (if (position litchar *regex-special-chars*)
+ (return litchar)
+ (progn
+ (info "Now adding ~A index ~A to lit~%" litchar
+ litindex)
+ (setf lit (concatenate 'string lit
+ (string litchar)))))))))
+ (if (= (length lit) 1)
+ (add-exp `((if (not (and (< index length)
+ (eql (char string index) ,current)))
+ (return-from compare nil)
+ (incf index))))
+ ;;
+ ;; If we have a multi-character literal then we must
+ ;; check to see if the next character (if there is one)
+ ;; is an astrisk or a plus or a question mark. If so then we must not use this
+ ;; character in the big literal.
+ (progn
+ (if (or (eql term #\*)
+ (eql term #\+)
+ (eql term #\?))
+ (setf lit (subseq lit 0 (1- (length lit)))))
+ (add-exp `((if (< length (+ index ,(length lit)))
+ (return-from compare nil))
+ (if (not (string= string ,lit :start1 index
+ :end1 (+ index ,(length lit))))
+ (return-from compare nil)
+ (incf index ,(length lit)))))))
+ (incf eindex (1- (length lit))))))))
+ ;;
+ ;; Plug end of list to return t. If we made it this far then
+ ;; We have matched!
+ (add-exp '((setf (cadr (aref *regex-groups* 0))
+ index)))
+ (add-exp '((return-from final-return t)))
+ ;;
+;;; (print expression)
+ ;;
+ ;; Now take the expression list and turn it into a lambda expression
+ ;; replacing the special flags with lisp code.
+ ;; For example: A BEGIN needs to be replace by an expression that
+ ;; saves the current index, then evaluates everything till it gets to
+ ;; the END then save the new index if it didn't fail.
+ ;; On an ASTRISK I need to take the previous expression and wrap
+ ;; it in a do that will evaluate the expression till an error
+ ;; occurs and then another do that encompases the remainder of the
+ ;; regular expression and iterates decrementing the index by one
+ ;; of the matched expression sizes and then returns nil. After
+ ;; the last expression insert a form that does a return t so that
+ ;; if the entire nested sub-expression succeeds then the loop
+ ;; is broken manually.
+ ;;
+ (setf result (copy-tree nil))
+ ;;
+ ;; Reversing the current expression makes building up the
+ ;; lambda list easier due to the nexting of expressions when
+ ;; and astrisk has been encountered.
+ (setf expression (reverse expression))
+ (do ((elt 0 (1+ elt)))
+ ((>= elt (length expression)))
+ (let ((piece (nth elt expression)))
+ ;;
+ ;; Now check for PLUS, if so then ditto the expression and then let the
+ ;; ASTRISK below handle the rest.
+ ;;
+ (cond ((eql piece 'PLUS)
+ (cond ((listp (nth (1+ elt) expression))
+ (setf result (append (list (nth (1+ elt) expression))
+ result)))
+ ;;
+ ;; duplicate the entire group
+ ;; NOTE: This hasn't been implemented yet!!
+ (t
+ (error "GROUP repeat hasn't been implemented yet~%")))))
+ (cond ((listp piece) ;Just append the list
+ (setf result (append (list piece) result)))
+ ((eql piece 'QUESTION) ; Wrap it in a block that won't fail
+ (cond ((listp (nth (1+ elt) expression))
+ (setf result
+ (append `((progn (block compare
+ ,(nth (1+ elt)
+ expression))
+ t))
+ result))
+ (incf elt))
+ ;;
+ ;; This is a QUESTION on an entire group which
+ ;; hasn't been implemented yet!!!
+ ;;
+ (t
+ (error "Optional groups not implemented yet~%"))))
+ ((or (eql piece 'ASTRISK) ; Do the wild thing!
+ (eql piece 'PLUS))
+ (cond ((listp (nth (1+ elt) expression))
+ ;;
+ ;; This is a single character wild card so
+ ;; do the simple form.
+ ;;
+ (setf result
+ `((let ((oindex index))
+ (block compare
+ (do ()
+ (nil)
+ ,(nth (1+ elt) expression)))
+ (do ((start index (1- start)))
+ ((< start oindex) nil)
+ (let ((index start))
+ (block compare
+ ,@result))))))
+ (incf elt))
+ (t
+ ;;
+ ;; This is a subgroup repeated so I must build
+ ;; the loop using several values.
+ ;;
+ ))
+ )
+ (t t)))) ; Just ignore everything else.
+ ;;
+ ;; Now wrap the result in a lambda list that can then be
+ ;; invoked or compiled, however the user wishes.
+ ;;
+ (if anchored
+ (setf result
+ `(lambda (string &key (start 0) (end (length string)))
+ (setf *regex-groupings* ,group)
+ (block final-return
+ (block compare
+ (let ((index start)
+ (length end))
+ ,@result)))))
+ (setf result
+ `(lambda (string &key (start 0) (end (length string)))
+ (setf *regex-groupings* ,group)
+ (block final-return
+ (let ((length end))
+ ,@fast-first
+ (do ((marker start (1+ marker)))
+ ((> marker end) nil)
+ (let ((index marker))
+ (if (block compare
+ ,@result)
+ (return t)))))))))))
+
+;; (provide 'nregex)