Function: mail-extr-voodoo
mail-extr-voodoo is a byte-compiled function defined in
mail-extr.el.gz.
Signature
(mail-extr-voodoo MBOX-BEG MBOX-END CANONICALIZATION-BUFFER)
Source Code
;; Defined in /usr/src/emacs/lisp/mail/mail-extr.el.gz
(defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
(unless (and mail-extr-disable-voodoo
(or (not (stringp mail-extr-disable-voodoo))
(progn
(goto-char (point-min))
(re-search-forward mail-extr-disable-voodoo nil t))))
(let ((word-count 0)
(case-fold-search nil)
mixed-case-flag lower-case-flag ;;upper-case-flag
suffix-flag last-name-comma-flag
initial
begin-again-flag
drop-this-word-if-trailing-flag
drop-last-word-if-trailing-flag
word-found-flag
this-word-beg last-word-beg
name-beg name-end
name-done-flag
)
(save-excursion
(set-syntax-table mail-extr-address-text-syntax-table)
;; Get rid of comments.
(goto-char (point-min))
(while (not (eobp))
;; Initialize for this iteration of the loop.
(skip-chars-forward "^({[\"'`")
(let ((cbeg (point)))
(set-syntax-table mail-extr-address-text-comment-syntax-table)
(if (memq (following-char) '(?\' ?\`))
(search-forward "'" nil 'move
(if (eq ?\' (following-char)) 2 1))
(or (mail-extr-safe-move-sexp 1)
(goto-char (point-max))))
(set-syntax-table mail-extr-address-text-syntax-table)
(when (eq (char-after cbeg) ?\()
;; Delete the comment itself.
(delete-region cbeg (point))
;; Canonicalize whitespace where the comment was.
(skip-chars-backward " \t")
(if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
(replace-match "")
(setq cbeg (point))
(skip-chars-forward " \t")
(if (bobp)
(delete-region (point) cbeg)
(just-one-space))))))
;; This was moved above.
;; Fix . used as space
;; But it belongs here because it occurs not only as
;; rypens@reks.uia.ac.be (Piet.Rypens)
;; but also as
;; "Piet.Rypens" <rypens@reks.uia.ac.be>
;;(goto-char (point-min))
;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
;; (replace-match "\\1 \\2" t))
(unless (search-forward " " nil t)
(goto-char (point-min))
(cond ((search-forward "_" nil t)
;; Handle the *idiotic* use of underlines as spaces.
;; Example: fml@foo.bar.dom (First_M._Last)
(goto-char (point-min))
(while (search-forward "_" nil t)
(replace-match " " t)))
((search-forward "." nil t)
;; Fix . used as space
;; Example: danj1@cb.att.com (daniel.jacobson)
(goto-char (point-min))
(while (re-search-forward mail-extr-bad-dot-pattern nil t)
(replace-match "\\1 \\2" t)))))
;; Loop over the words (and other junk) in the name.
(goto-char (point-min))
(while (not name-done-flag)
(when word-found-flag
;; Last time through this loop we skipped over a word.
(setq last-word-beg this-word-beg)
(setq drop-last-word-if-trailing-flag
drop-this-word-if-trailing-flag)
(setq word-found-flag nil))
(when begin-again-flag
;; Last time through the loop we found something that
;; indicates we should pretend we are beginning again from
;; the start.
(setq word-count 0)
(setq last-word-beg nil)
(setq drop-last-word-if-trailing-flag nil)
(setq mixed-case-flag nil)
(setq lower-case-flag nil)
;; (setq upper-case-flag nil)
(setq begin-again-flag nil))
;; Initialize for this iteration of the loop.
(mail-extr-skip-whitespace-forward)
(if (eq word-count 0) (narrow-to-region (point) (point-max)))
(setq this-word-beg (point))
(setq drop-this-word-if-trailing-flag nil)
;; Decide what to do based on what we are looking at.
(cond
;; Delete title
((and (eq word-count 0)
(looking-at mail-extr-full-name-prefixes))
(goto-char (match-end 0))
(narrow-to-region (point) (point-max)))
;; Stop after name suffix
((and (>= word-count 2)
(looking-at mail-extr-full-name-suffix-pattern))
(mail-extr-skip-whitespace-backward)
(setq suffix-flag (point))
(if (eq ?, (following-char))
(forward-char 1)
(insert ?,))
;; Enforce at least one space after comma
(or (eq ?\ (following-char))
(insert ?\ ))
(mail-extr-skip-whitespace-forward)
(cond ((memq (following-char) '(?j ?J ?s ?S))
(capitalize-word 1)
(if (eq (following-char) ?.)
(forward-char 1)
(insert ?.)))
(t
(upcase-word 1)))
(setq word-found-flag t)
(setq name-done-flag t))
;; Handle SCA names
((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
(goto-char (match-beginning 1))
(narrow-to-region (point) (point-max))
(setq begin-again-flag t))
;; Check for initial last name followed by comma
((and (eq ?, (following-char))
(eq word-count 1))
(forward-char 1)
(setq last-name-comma-flag t)
(or (eq ?\ (following-char))
(insert ?\ )))
;; Stop before trailing comma-separated comment
;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
;; *** This case is redundant???
;;((eq ?, (following-char))
;; (setq name-done-flag t))
;; Delete parenthesized/quoted comment/nickname
((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
(setq mailextr-cbeg (point))
(set-syntax-table mail-extr-address-text-comment-syntax-table)
(cond ((memq (following-char) '(?\' ?\`))
(or (search-forward "'" nil t
(if (eq ?\' (following-char)) 2 1))
(delete-char 1)))
(t
(or (mail-extr-safe-move-sexp 1)
(goto-char (point-max)))))
(set-syntax-table mail-extr-address-text-syntax-table)
(setq mailextr-cend (point))
(cond
;; Handle case of entire name being quoted
((and (eq word-count 0)
(looking-at " *\\'")
(>= (- mailextr-cend mailextr-cbeg) 2))
(narrow-to-region (1+ mailextr-cbeg) (1- mailextr-cend))
(goto-char (point-min)))
(t
;; Handle case of quoted initial
(if (and (or (= 3 (- mailextr-cend mailextr-cbeg))
(and (= 4 (- mailextr-cend mailextr-cbeg))
(eq ?. (char-after (+ 2 mailextr-cbeg)))))
(not (looking-at " *\\'")))
(setq initial (char-after (1+ mailextr-cbeg)))
(setq initial nil))
(delete-region mailextr-cbeg mailextr-cend)
(if initial
(insert initial ". ")))))
;; Handle *Stupid* VMS date stamps
((looking-at mail-extr-stupid-vms-date-stamp-pattern)
(replace-match "" t))
;; Handle Chinese characters.
((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
(goto-char (match-end 0))
(setq word-found-flag t))
;; Skip initial garbage characters.
;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
((and (eq word-count 0)
(looking-at mail-extr-leading-garbage))
(goto-char (match-end 0))
;; *** Skip backward over these???
;; (skip-chars-backward "& \"")
(narrow-to-region (point) (point-max)))
;; Various stopping points
((or
;; Stop before ALL CAPS acronyms, if preceded by mixed-case
;; words. Example: XT-DEM.
(and (>= word-count 2)
mixed-case-flag
(looking-at mail-extr-weird-acronym-pattern)
(not (looking-at mail-extr-roman-numeral-pattern)))
;; Stop before trailing alternative address
(looking-at mail-extr-alternative-address-pattern)
;; Stop before trailing comment not introduced by comma
;; THIS CASE MUST BE AFTER AN EARLIER CASE.
(looking-at mail-extr-trailing-comment-start-pattern)
;; Stop before telephone numbers
(and (>= word-count 1)
(looking-at mail-extr-telephone-extension-pattern)))
(setq name-done-flag t))
;; Delete ham radio call signs
((looking-at mail-extr-ham-call-sign-pattern)
(delete-region (match-beginning 0) (match-end 0)))
;; Fixup initials
((looking-at mail-extr-initial-pattern)
(or (eq (following-char) (upcase (following-char)))
(setq lower-case-flag t))
(forward-char 1)
(if (eq ?. (following-char))
(forward-char 1)
(insert ?.))
(or (eq ?\ (following-char))
(insert ?\ ))
(setq word-found-flag t))
;; Handle BITNET LISTSERV list names.
((and (eq word-count 0)
(looking-at mail-extr-listserv-list-name-pattern))
(narrow-to-region (match-beginning 1) (match-end 1))
(setq word-found-flag t)
(setq name-done-flag t))
;; Handle & substitution, when & is last and is not first.
((and (> word-count 0)
(eq ?\ (preceding-char))
(eq (following-char) ?&)
(eq (1+ (point)) (point-max)))
(delete-char 1)
(capitalize-region
(point)
(progn
(insert-buffer-substring canonicalization-buffer
mbox-beg mbox-end)
(point)))
(setq disable-initial-guessing-flag t)
(setq word-found-flag t))
;; Handle & between names, as in "Bob & Susie".
((and (> word-count 0) (eq (following-char) ?\&))
(setq name-beg (point))
(setq name-end (1+ name-beg))
(setq word-found-flag t)
(goto-char name-end))
;; Regular name words
((looking-at mail-extr-name-pattern)
(setq name-beg (point))
(setq name-end (match-end 0))
;; Certain words will be dropped if they are at the end.
(and (>= word-count 2)
(not lower-case-flag)
(or
;; Trailing 4-or-more letter lowercase words preceded by
;; mixed case or uppercase words will be dropped.
(looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'")
;; Drop a trailing word which is terminated with a period.
(eq ?. (char-after (1- name-end))))
(setq drop-this-word-if-trailing-flag t))
;; Set the flags that indicate whether we have seen a lowercase
;; word, a mixed case word, and an uppercase word.
(if (re-search-forward "[[:lower:]]" name-end t)
(if (progn
(goto-char name-beg)
(re-search-forward "[[:upper:]]" name-end t))
(setq mixed-case-flag t)
(setq lower-case-flag t))
;; (setq upper-case-flag t)
)
(goto-char name-end)
(setq word-found-flag t))
;; Allow a number as a word, if it doesn't mean anything else.
((looking-at "[0-9]+\\>")
(setq name-beg (point))
(setq name-end (match-end 0))
(goto-char name-end)
(setq word-found-flag t))
(t
(setq name-done-flag t)
))
;; Count any word that we skipped over.
(if word-found-flag
(setq word-count (1+ word-count))))
;; If the last thing in the name is 2 or more periods, or one or more
;; other sentence terminators (but not a single period) then keep them
;; and the preceding word. This is for the benefit of whole sentences
;; in the name field: it's better behavior than dropping the last word
;; of the sentence...
(if (and (not suffix-flag)
(looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
(goto-char (setq suffix-flag (point-max))))
;; Drop everything after point and certain trailing words.
(narrow-to-region (point-min)
(or (and drop-last-word-if-trailing-flag
last-word-beg)
(point)))
;; Xerox's mailers SUCK!!!!!!
;; We simply refuse to believe that any last name is PARC or ADOC.
;; If it looks like that is the last name, that there is no meaningful
;; here at all. Actually I guess it would be best to map patterns
;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
;; actually know that that is what's going on.
(unless suffix-flag
(goto-char (point-min))
(let ((case-fold-search t))
(if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
(erase-buffer))))
;; If last name first put it at end (but before suffix)
(when last-name-comma-flag
(goto-char (point-min))
(search-forward ",")
(setq name-end (1- (point)))
(goto-char (or suffix-flag (point-max)))
(or (eq ?\ (preceding-char))
(insert ?\ ))
(insert-buffer-substring (current-buffer) (point-min) name-end)
(goto-char name-end)
(skip-chars-forward "\t ,")
(narrow-to-region (point) (point-max)))
;; Delete leading and trailing junk characters.
;; *** This is probably completely unneeded now.
;;(goto-char (point-max))
;;(skip-chars-backward mail-extr-non-end-name-chars)
;;(if (eq ?. (following-char))
;; (forward-char 1))
;;(narrow-to-region (point)
;; (progn
;; (goto-char (point-min))
;; (skip-chars-forward mail-extr-non-begin-name-chars)
;; (point)))
;; Compress whitespace
(goto-char (point-min))
(while (re-search-forward "[ \t\n]+" nil t)
(replace-match (if (eobp) "" " ") t))
))))