Function: ucs-names

ucs-names is a byte-compiled function defined in mule-cmds.el.gz.

Signature

(ucs-names)

Documentation

Return table of CHAR-NAME keys and CHAR-CODE values cached in ucs-names(var)/ucs-names(fun).

Probably introduced at or before Emacs version 26.1.

Source Code

;; Defined in /usr/src/emacs/lisp/international/mule-cmds.el.gz
(defun ucs-names ()
  "Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'."
  (or ucs-names
      ;; Sometimes these ranges will need adjusting as codepoints are
      ;; added to unicode.  The test case
      ;; 'mule-cmds-tests--ucs-names-missing-names' will tell you
      ;; which are missing (Bug#65997).
      (let ((ranges
	     '((#x0000 . #x33FF)
	       ;; (#x3400 . #x4DBF) CJK Ideographs Extension A
	       (#x4DC0 . #x4DFF)
	       ;; (#x4E00 . #x9FFF) CJK Unified Ideographs
	       (#xA000 . #xD7FF)
	       ;; (#xD800 . #xF8FF) Surrogate/Private
	       (#xFB00 . #x143FA)
               (#x14400 . #x14646)
	       ;; (#x14647 . #x160FF) unused
               (#x16100 . #x16139)
               ;; (#x1613A . #x167FF) unused
	       (#x16800 . #x16F9F)
               (#x16FE0 . #x16FF6)
               ;; (#x17000 . #x187FF) Tangut Ideographs
               ;; (#x18800 . #x18AFF) Tangut Components
               ;; (#x18B00 . #x18CFF) Khitan Small Script
               ;; (#x18D00 . #x18D1E) Tangut Ideograph Supplement
               ;; (#x18D1F . #x18D7F) unused
	       ;; (#x18D80 . #x18DF2) Tangut Components
               ;; (#x18DF3 . #x18DFF) unused
	       (#x1AFF0 . #x1B122)
               ;; (#x1B123 . #x1B131) unused
               (#x1B132 . #x1B132)
               ;; (#x1B133 . #x1B14F) unused
               (#x1B150 . #x1B16F)
               (#x1B170 . #x1B2FF)
	       ;; (#x1B300 . #x1BBFF) unused
               (#x1BC00 . #x1BCAF)
	       ;; (#x1BCB0 . #x1CBFF) unused
               (#x1CC00 . #x1FFFF)
	       ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
	       (#xE0000 . #xE01FF)))
            (gc-cons-threshold (max gc-cons-threshold 10000000))
	    (names (make-hash-table :size 42943 :test #'equal)))
	(require 'charprop) ;; Usually preloaded, but not during bootstrap.
        (dolist (range ranges)
          (let ((c (car range))
                (end (cdr range)))
	    (while (<= c end)
	      (let ((new-name (get-char-code-property c 'name))
		    (old-name (get-char-code-property c 'old-name)))
                ;; This code used to push both old-name and new-name
                ;; on the assumption that the new-name codepoint would
                ;; always be higher, which was true for a long time.
                ;; As of at latest 2023-09-15, this is no longer true,
                ;; so we now skip the old-name if it conflicts with an
                ;; existing new-name (Bug#65997).
                (if new-name (puthash new-name c names))
                (when (and old-name
                           (not (gethash old-name names)))
                  (puthash old-name c names))
                ;; Unicode uses the spelling "lamda" in character
                ;; names, instead of "lambda", due to "preferences
                ;; expressed by the Greek National Body" (Bug#30513).
                ;; Some characters have an old-name with the "lambda"
                ;; spelling, but others don't.  Add the traditional
                ;; spelling for more convenient completion.
                (when (and (not old-name) new-name
                           (string-match "\\<LAMDA\\>" new-name))
                  (puthash (replace-match "LAMBDA" t t new-name) c names))
                (setq c (1+ c))))))
        ;; Special case for "BELL" which is apparently the only char which
        ;; doesn't have a new name and whose old-name is shadowed by a newer
        ;; char with that name.
        (puthash "BELL (BEL)" ?\a names)
        (setq ucs-names names))))