Function: find-auto-coding

find-auto-coding is a byte-compiled function defined in mule.el.gz.

Signature

(find-auto-coding FILENAME SIZE)

Documentation

Find a coding system for a file FILENAME of which SIZE bytes follow point.

These bytes should include at least the first 1k of the file and the last 3k of the file, but the middle may be omitted.

The function checks FILENAME against the variable auto-coding-alist. If FILENAME doesn't match any entries in the variable, it checks the contents of the current buffer following point against auto-coding-regexp-alist. If no match is found, it checks for a coding: tag in the first one or two lines following point. If no coding: tag is found, it checks any local variables list in the last
3K bytes out of the SIZE bytes. Finally, if none of these methods
succeed, it checks to see if any function in auto-coding-functions gives a match.

If a coding system is specified, the return value is a cons
(CODING . SOURCE), where CODING is the specified coding system and
SOURCE is a symbol auto-coding-alist, auto-coding-regexp-alist,
:coding, or auto-coding-functions indicating by what CODING is
specified. Note that the validity of CODING is not checked; it's the caller's responsibility to check it.

If nothing is specified, the return value is nil.

Source Code

;; Defined in /usr/src/emacs/lisp/international/mule.el.gz
(defun find-auto-coding (filename size)
  "Find a coding system for a file FILENAME of which SIZE bytes follow point.
These bytes should include at least the first 1k of the file
and the last 3k of the file, but the middle may be omitted.

The function checks FILENAME against the variable `auto-coding-alist'.
If FILENAME doesn't match any entries in the variable, it checks the
contents of the current buffer following point against
`auto-coding-regexp-alist'.  If no match is found, it checks for a
`coding:' tag in the first one or two lines following point.  If no
`coding:' tag is found, it checks any local variables list in the last
3K bytes out of the SIZE bytes.  Finally, if none of these methods
succeed, it checks to see if any function in `auto-coding-functions'
gives a match.

If a coding system is specified, the return value is a cons
\(CODING . SOURCE), where CODING is the specified coding system and
SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist',
`:coding', or `auto-coding-functions' indicating by what CODING is
specified.  Note that the validity of CODING is not checked;
it's the caller's responsibility to check it.

If nothing is specified, the return value is nil."
  (or (let ((coding-system (auto-coding-alist-lookup filename)))
	(if coding-system
	    (cons coding-system 'auto-coding-alist)))
      ;; Try using `auto-coding-regexp-alist'.
      (let ((coding-system (auto-coding-regexp-alist-lookup (point)
							    (+ (point) size))))
	(if coding-system
	    (cons coding-system 'auto-coding-regexp-alist)))
      (let* ((case-fold-search t)
	     (head-start (point))
	     (head-end (+ head-start (min size 1024)))
	     (tail-start (+ head-start (max (- size 3072) 0)))
	     (tail-end (+ head-start size))
	     coding-system head-found tail-found char-trans)
	;; Try a short cut by searching for the string "coding:"
	;; and for "unibyte:" at the head and tail of SIZE bytes.
	(setq head-found (or (search-forward "coding:" head-end t)
			     (search-forward "unibyte:" head-end t)
			     (search-forward "enable-character-translation:"
					     head-end t)))
	(if (and head-found (> head-found tail-start))
	    ;; Head and tail are overlapped.
	    (setq tail-found head-found)
	  (goto-char tail-start)
	  (setq tail-found (or (search-forward "coding:" tail-end t)
			       (search-forward "unibyte:" tail-end t)
			       (search-forward "enable-character-translation:"
					       tail-end t))))

	;; At first check the head.
	(when head-found
	  (goto-char head-start)
	  (setq head-end (set-auto-mode-1))
	  (setq head-start (point))
	  (when (and head-end (< head-found head-end))
	    (goto-char head-start)
	    (when (and set-auto-coding-for-load
		       (re-search-forward
			"\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
			head-end t))
              (display-warning 'mule
                               (format "\"unibyte: t\" (in %s) is obsolete; \
use \"coding: 'raw-text\" instead."
                                       (file-relative-name filename))
                               :warning)
	      (setq coding-system 'raw-text))
	    (when (and (not coding-system)
		       (re-search-forward
			"\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
			head-end t))
	      (setq coding-system (intern (match-string 2))))
	    (when (re-search-forward
		   "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)"
		   head-end t)
	      (setq char-trans (match-string 2)))))

	;; If no coding: tag in the head, check the tail.
	;; Here we must pay attention to the case that the end-of-line
	;; is just "\r" and we can't use "^" nor "$" in regexp.
	(when (and tail-found (or (not coding-system) (not char-trans)))
	  (goto-char tail-start)
	  (re-search-forward "[\r\n]\^L" tail-end t)
	  (if (re-search-forward
	       "[\r\n]\\([^\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
	       tail-end t)
	      ;; The prefix is what comes before "local variables:" in its
	      ;; line.  The suffix is what comes after "local variables:"
	      ;; in its line.
	      (let* ((prefix (regexp-quote (match-string 1)))
		     (suffix (regexp-quote (match-string 2)))
		     (re-coding
		      (concat
		       "[\r\n]" prefix
		       ;; N.B. without the \n below, the regexp can
		       ;; eat newlines.
		       "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
		       suffix "[\r\n]"))
		     (re-unibyte
		      (concat
		       "[\r\n]" prefix
		       "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
		       suffix "[\r\n]"))
		     (re-char-trans
		      (concat
		       "[\r\n]" prefix
		       "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
		       suffix "[\r\n]"))
		     (re-end
		      (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
			      "[\r\n]?"))
		     (pos (1- (point))))
		(forward-char -1)	; skip back \r or \n.
		(re-search-forward re-end tail-end 'move)
		(setq tail-end (point))
		(goto-char pos)
		(when (and set-auto-coding-for-load
			   (re-search-forward re-unibyte tail-end t))
                  (display-warning 'mule "\"unibyte: t\" is obsolete; \
use \"coding: 'raw-text\" instead." :warning)
		  (setq coding-system 'raw-text))
		(when (and (not coding-system)
			   (re-search-forward re-coding tail-end t))
		  (setq coding-system (intern (match-string 1))))
		(when (and (not char-trans)
			   (re-search-forward re-char-trans tail-end t))
		  (setq char-trans (match-string 1))))))
	(if coding-system
	    ;; If the coding-system name ends with "!", remove it and
	    ;; set char-trans to "nil".
	    (let ((name (symbol-name coding-system)))
	      (if (= (aref name (1- (length name))) ?!)
		  (setq coding-system (intern (substring name 0 -1))
			char-trans "nil"))))
	(when (and char-trans
		   (not (setq char-trans (intern char-trans))))
	  (make-local-variable 'enable-character-translation)
	  (setq enable-character-translation nil))
	(if coding-system
	    (cons coding-system :coding)))
      ;; Finally, try all the `auto-coding-functions'.
      (let ((funcs auto-coding-functions)
	    (coding-system nil))
	(while (and funcs (not coding-system))
	  (setq coding-system (ignore-errors
				(save-excursion
				  (goto-char (point-min))
				  (funcall (pop funcs) size)))))
	(if coding-system
	    (cons coding-system 'auto-coding-functions)))))