Function: rfc2047-encode-region
rfc2047-encode-region is a byte-compiled function defined in
rfc2047.el.gz.
Signature
(rfc2047-encode-region B E &optional DONT-FOLD)
Documentation
Encode words in region B to E that need encoding.
By default, the region is treated as containing RFC 822 (or later) addresses.
Dynamically bind rfc2047-encoding-type to change that.
Aliases
mail-encode-encoded-word-region
Source Code
;; Defined in /usr/src/emacs/lisp/mail/rfc2047.el.gz
(defun rfc2047-encode-region (b e &optional dont-fold)
"Encode words in region B to E that need encoding.
By default, the region is treated as containing RFC 822 (or later) addresses.
Dynamically bind `rfc2047-encoding-type' to change that."
(save-restriction
(narrow-to-region b e)
(let ((encodable-regexp (if rfc2047-encode-encoded-words
"[^\000-\177]+\\|=\\?"
"[^\000-\177]+"))
start ; start of current token
end begin csyntax
;; Whether there's an encoded word before the current token,
;; either immediately or separated by space.
last-encoded
(orig-text (buffer-substring-no-properties b e)))
(if (eq 'mime rfc2047-encoding-type)
;; Simple case. Continuous words in which all those contain
;; non-ASCII characters are encoded collectively. Encoding
;; ASCII words, including `Re:' used in Subject headers, is
;; avoided for interoperability with non-MIME clients and
;; for making it easy to find keywords.
(progn
(goto-char (point-min))
(while (progn (skip-chars-forward " \t\n")
(not (eobp)))
(setq start (point))
(while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)")
(progn
(setq end (match-end 0))
(re-search-forward encodable-regexp end t)))
(goto-char end))
(if (> (point) start)
(rfc2047-encode start (point))
(goto-char end))))
(with-syntax-table rfc2047-syntax-table
(goto-char (point-min))
(condition-case err ; in case of unbalanced quotes
;; Look for RFC 822 (or later) style: sequences of atoms, quoted
;; strings, specials, whitespace. (Specials mustn't be
;; encoded.)
(while (not (eobp))
;; Skip whitespace.
(skip-chars-forward " \t\n")
(setq start (point))
(cond
((not (char-after))) ; eob
;; else token start
((eq ?\" (setq csyntax (char-syntax (char-after))))
;; Quoted word.
(forward-sexp)
(setq end (point))
;; Does it need encoding?
(goto-char start)
(if (re-search-forward encodable-regexp end 'move)
;; It needs encoding. Strip the quotes first,
;; since encoded words can't occur in quotes.
(progn
(goto-char end)
(delete-char -1)
(goto-char start)
(delete-char 1)
(when last-encoded
;; There was a preceding quoted word. We need
;; to include any separating whitespace in this
;; word to avoid it getting lost.
(skip-chars-backward " \t")
;; A space is needed between the encoded words.
(insert ? )
(setq start (point)
end (1+ end)))
;; Adjust the end position for the deleted quotes.
(rfc2047-encode start (- end 2))
(setq last-encoded t)) ; record that it was encoded
(setq last-encoded nil)))
((eq ?. csyntax)
;; Skip other delimiters, but record that they've
;; potentially separated quoted words.
(forward-char)
(setq last-encoded nil))
((eq ?\) csyntax)
(error "Unbalanced parentheses"))
((eq ?\( csyntax)
;; Look for the end of parentheses.
(forward-list)
;; Encode text as an unstructured field.
(let ((rfc2047-encoding-type 'mime))
(rfc2047-encode-region (1+ start) (1- (point))))
(skip-chars-forward ")"))
(t ; normal token/whitespace sequence
;; Find the end.
;; Skip one ASCII word, or encode continuous words
;; in which all those contain non-ASCII characters.
(setq end nil)
(while (not (or end (eobp)))
(when (looking-at "[\000-\177]+")
(setq begin (point)
end (match-end 0))
(when (progn
(while (and (or (re-search-forward
"[ \t\n]\\|\\Sw" end 'move)
(setq end nil))
(eq ?\\ (char-syntax (char-before))))
;; Skip backslash-quoted characters.
(forward-char))
end)
(setq end (match-beginning 0))
(if rfc2047-encode-encoded-words
(progn
(goto-char begin)
(when (search-forward "=?" end 'move)
(goto-char (match-beginning 0))
(setq end nil)))
(goto-char end))))
;; Where the value nil of `end' means there may be
;; text to have to be encoded following the point.
;; Otherwise, the point reached to the end of ASCII
;; words separated by whitespace or a special char.
(unless end
(when (looking-at encodable-regexp)
(goto-char (setq begin (match-end 0)))
(while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)")
(setq end (match-end 0))
(progn
(while (re-search-forward
encodable-regexp end t))
(< begin (point)))
(goto-char begin)
(or (not (re-search-forward "\\Sw" end t))
(progn
(goto-char (match-beginning 0))
nil)))
(goto-char end))
(when (looking-at "[^ \t\n]+")
(setq end (match-end 0))
(if (re-search-forward "\\Sw+" end t)
;; There are special characters better
;; to be encoded so that MTAs may parse
;; them safely.
(cond ((= end (point)))
((looking-at (concat "\\sw*\\("
encodable-regexp
"\\)"))
(setq end nil))
(t
(goto-char (1- (match-end 0)))
(unless (= (point) (match-beginning 0))
;; Separate encodable text and
;; delimiter.
(insert " "))))
(goto-char end)
(skip-chars-forward " \t\n")
(if (and (looking-at "[^ \t\n]+")
(string-match encodable-regexp
(match-string 0)))
(setq end nil)
(goto-char end)))))))
(skip-chars-backward " \t\n")
(setq end (point))
(goto-char start)
(if (re-search-forward encodable-regexp end 'move)
(progn
(unless (memq (char-before start) '(nil ?\t ? ))
(if (progn
(goto-char start)
(skip-chars-backward "^ \t\n")
(and (looking-at "\\Sw+")
(= (match-end 0) start)))
;; Also encode bogus delimiters.
(setq start (point))
;; Separate encodable text and delimiter.
(goto-char start)
(insert " ")
(setq start (1+ start)
end (1+ end))))
(rfc2047-encode start end)
(setq last-encoded t))
(setq last-encoded nil)))))
(error
(if (or debug-on-quit debug-on-error)
(signal (car err) (cdr err))
(error "Invalid data for rfc2047 encoding: %s"
(replace-regexp-in-string "[ \t\n]+" " " orig-text))))))))
(unless dont-fold
(rfc2047-fold-region b (point)))
(goto-char (point-max))))