Function: rx--generate-alt

rx--generate-alt is a byte-compiled function defined in rx.el.gz.

Signature

(rx--generate-alt NEGATED INTERVALS CLASSES)

Documentation

Generate a character alternative. Return (REGEXP . PRECEDENCE).

If NEGATED is non-nil, negate the result; INTERVALS is a sorted list of disjoint intervals and CLASSES a list of named character classes.

Source Code

;; Defined in /usr/src/emacs/lisp/emacs-lisp/rx.el.gz
(defun rx--generate-alt (negated intervals classes)
  "Generate a character alternative.  Return (REGEXP . PRECEDENCE).
If NEGATED is non-nil, negate the result; INTERVALS is a sorted
list of disjoint intervals and CLASSES a list of named character
classes."
  ;; No, this is not pretty code.  You try doing it in a way that is both
  ;; elegant and efficient.  Or just one of the two.  I dare you.

  ;; Detect whether the interval set is better described in
  ;; complemented form.  This is not just a matter of aesthetics: any
  ;; range that straddles the char-raw boundary will be mutilated by the
  ;; regexp engine.  Ranges from ASCII to raw bytes will exclude the
  ;; all non-ASCII non-raw bytes, and ranges from non-ASCII Unicode
  ;; to raw bytes are ignored.
  (unless (or classes
              ;; Any interval set covering #x3fff7f should be negated.
              (rx--every (lambda (iv) (not (<= (car iv) #x3fff7f (cdr iv))))
                         intervals))
    (setq negated (not negated))
    (setq intervals (rx--interval-set-complement intervals)))
  (cond
   ;; Single character.
   ((and intervals (eq (caar intervals) (cdar intervals))
         (null (cdr intervals))
         (null classes))
    (let ((ch (caar intervals)))
      (if negated
          (if (eq ch ?\n)
              ;; Single negated newline.
              (rx--translate-symbol 'nonl)
            ;; Single negated character (other than newline).
            (cons (list (string ?\[ ?^ ch ?\])) t))
        ;; Single literal character.
        (cons (list (regexp-quote (char-to-string ch))) t))))

   ;; Empty set (or any char).
   ((and (null intervals) (null classes))
    (if negated
        (rx--translate-symbol 'anychar)
      (rx--empty)))

   ;; More than one character, or at least one class.
   (t
    (let ((dash nil) (caret nil))
      ;; Move ] and range ]-x to the start.
      (let ((rbrac-l (assq ?\] intervals)))
        (when rbrac-l
          (setq intervals (cons rbrac-l (remq rbrac-l intervals)))))

      ;; Split x-] and move the lone ] to the start.
      (let ((rbrac-r (rassq ?\] intervals)))
        (when (and rbrac-r (not (eq (car rbrac-r) ?\])))
          (setcdr rbrac-r ?\\)
          (setq intervals (cons '(?\] . ?\]) intervals))))

      ;; Split ,-- (which would end up as ,- otherwise).
      (let ((dash-r (rassq ?- intervals)))
        (when (eq (car dash-r) ?,)
          (setcdr dash-r ?,)
          (setq dash "-")))

      ;; Remove - (lone or at start of interval)
      (let ((dash-l (assq ?- intervals)))
        (when dash-l
          (if (eq (cdr dash-l) ?-)
              (setq intervals (remq dash-l intervals))   ; Remove lone -
            (setcar dash-l ?.))                          ; Reduce --x to .-x
          (setq dash "-")))

      ;; Deal with leading ^ and range ^-x in non-negated set.
      (when (and (eq (caar intervals) ?^)
                 (not negated))
        (if (eq (cdar intervals) ?^)
            ;; single leading ^
            (if (or (cdr intervals) classes)
                ;; something else to put before the ^
                (progn
                  (setq intervals (cdr intervals))   ; remove lone ^
                  (setq caret "^"))                  ; put ^ (almost) last
              ;; nothing else but a lone -
              (setq intervals (cons '(?- . ?-) intervals))  ; move - first
              (setq dash nil))
          ;; split ^-x to _-x^
          (setq intervals `((?_ . ,(cdar intervals)) (?^ . ?^)
                            . ,(cdr intervals)))))

      (cons
       (list
        (concat
         "["
         (and negated "^")
         (mapconcat (lambda (iv)
                      (cond ((eq (car iv) (cdr iv))
                             (char-to-string (car iv)))
                            ((eq (1+ (car iv)) (cdr iv))
                             (string (car iv) (cdr iv)))
                            ;; Ranges that go between normal chars and raw bytes
                            ;; must be split to avoid being mutilated
                            ;; by Emacs's regexp parser.
                            ((<= (car iv) #x3fff7f (cdr iv))
                             (string (car iv) ?- #x3fff7f
                                     #x3fff80 ?- (cdr iv)))
                            (t
                             (string (car iv) ?- (cdr iv)))))
                    intervals)
         (mapconcat (lambda (cls) (format "[:%s:]" cls)) classes)
         caret                          ; ^ or nothing
         dash                           ; - or nothing
         "]"))
       t)))))