Function: translate-region-internal

translate-region-internal is a function defined in editfns.c.

Signature

(translate-region-internal START END TABLE)

Documentation

Internal use only.

From START to END, translate characters according to TABLE. TABLE is a string or a char-table; the Nth character in it is the mapping for the character with code N. It returns the number of characters changed.

Source Code

// Defined in /usr/src/emacs/src/editfns.c
{
  int translatable_chars = MAX_CHAR + 1;
  bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  bool string_multibyte UNINIT;

  validate_region (&start, &end);
  if (STRINGP (table))
    {
      if (! multibyte)
	table = string_make_unibyte (table);
      translatable_chars = min (translatable_chars, SBYTES (table));
      string_multibyte = STRING_MULTIBYTE (table);
    }
  else if (! (CHAR_TABLE_P (table)
	      && EQ (XCHAR_TABLE (table)->purpose, Qtranslation_table)))
    error ("Not a translation table");

  ptrdiff_t pos = XFIXNUM (start);
  ptrdiff_t pos_byte = CHAR_TO_BYTE (pos);
  ptrdiff_t end_pos = XFIXNUM (end);
  modify_text (pos, end_pos);

  ptrdiff_t characters_changed = 0;

  while (pos < end_pos)
    {
      unsigned char *p = BYTE_POS_ADDR (pos_byte);
      unsigned char *str UNINIT;
      unsigned char buf[MAX_MULTIBYTE_LENGTH];
      int len, oc;

      if (multibyte)
	oc = string_char_and_length (p, &len);
      else
	oc = *p, len = 1;
      if (oc < translatable_chars)
	{
	  int nc; /* New character.  */
	  int str_len UNINIT;
	  Lisp_Object val;

	  if (STRINGP (table))
	    {
	      /* Reload as signal_after_change in last iteration may GC.  */
	      unsigned char *tt = SDATA (table);

	      if (string_multibyte)
		{
		  str = tt + string_char_to_byte (table, oc);
		  nc = string_char_and_length (str, &str_len);
		}
	      else
		{
		  nc = tt[oc];
		  if (! ASCII_CHAR_P (nc) && multibyte)
		    {
		      str_len = BYTE8_STRING (nc, buf);
		      str = buf;
		    }
		  else
		    {
		      str_len = 1;
		      str = tt + oc;
		    }
		}
	    }
	  else
	    {
	      nc = oc;
	      val = CHAR_TABLE_REF (table, oc);
	      if (CHARACTERP (val))
		{
		  nc = XFIXNAT (val);
		  str_len = CHAR_STRING (nc, buf);
		  str = buf;
		}
	      else if (VECTORP (val) || (CONSP (val)))
		{
		  /* VAL is [TO_CHAR ...] or (([FROM-CHAR ...] .  TO) ...)
		     where TO is TO-CHAR or [TO-CHAR ...].  */
		  nc = -1;
		}
	    }

	  if (nc != oc && nc >= 0)
	    {
	      /* Simple one char to one char translation.  */
	      if (len != str_len)
		{
		  Lisp_Object string;

		  /* This is less efficient, because it moves the gap,
		     but it should handle multibyte characters correctly.  */
		  string = make_multibyte_string ((char *) str, 1, str_len);
		  replace_range (pos, pos + 1, string,
				 true, false, true, false, false);
		  len = str_len;
		}
	      else
		{
		  record_change (pos, 1);
		  while (str_len-- > 0)
		    *p++ = *str++;
		  signal_after_change (pos, 1, 1);
		  update_compositions (pos, pos + 1, CHECK_BORDER);

#ifdef HAVE_TREE_SITTER
		  /* In the previous branch, replace_range() notifies
                     changes to tree-sitter, but in this branch, we
                     modified buffer content manually, so we need to
                     notify tree-sitter manually.  */
		  treesit_record_change (pos_byte, pos_byte + len,
					 pos_byte + len);
#endif
		}
	      characters_changed++;
	    }
	  else if (nc < 0)
	    {
	      if (CONSP (val))
		{
		  val = check_translation (pos, pos_byte, end_pos, val);
		  if (NILP (val))
		    {
		      pos_byte += len;
		      pos++;
		      continue;
		    }
		  /* VAL is ([FROM-CHAR ...] . TO).  */
		  len = ASIZE (XCAR (val));
		  val = XCDR (val);
		}
	      else
		len = 1;

	      Lisp_Object string
		= (VECTORP (val)
		   ? Fconcat (1, &val)
		   : Fmake_string (make_fixnum (1), val, Qnil));
	      replace_range (pos, pos + len, string, true, false, true, false,
			     false);
	      pos_byte += SBYTES (string);
	      pos += SCHARS (string);
	      characters_changed += SCHARS (string);
	      end_pos += SCHARS (string) - len;
	      continue;
	    }
	}
      pos_byte += len;
      pos++;
    }

  return make_fixnum (characters_changed);
}