Function: unencodable-char-position

unencodable-char-position is a function defined in coding.c.

Signature

(unencodable-char-position START END CODING-SYSTEM &optional COUNT STRING)

Documentation

Return position of first un-encodable character in a region.

START and END specify the region and CODING-SYSTEM specifies the encoding to check. Return nil if CODING-SYSTEM does encode the region.

If optional 4th argument COUNT is non-nil, it specifies at most how many un-encodable characters to search. In this case, the value is a list of positions.

If optional 5th argument STRING is non-nil, it is a string to search for un-encodable characters. In that case, START and END are indexes to the string and treated as in substring.

Source Code

// Defined in /usr/src/emacs/src/coding.c
{
  EMACS_INT n;
  struct coding_system coding;
  Lisp_Object attrs, charset_list, translation_table;
  Lisp_Object positions;
  ptrdiff_t from, to;
  const unsigned char *p, *stop, *pend;
  bool ascii_compatible;

  setup_coding_system (Fcheck_coding_system (coding_system), &coding);
  attrs = CODING_ID_ATTRS (coding.id);
  if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
    return Qnil;
  ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
  charset_list = CODING_ATTR_CHARSET_LIST (attrs);
  translation_table = get_translation_table (attrs, 1, NULL);

  if (NILP (string))
    {
      validate_region (&start, &end);
      from = XFIXNUM (start);
      to = XFIXNUM (end);
      if (NILP (BVAR (current_buffer, enable_multibyte_characters))
	  || (ascii_compatible
	      && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
	return Qnil;
      p = CHAR_POS_ADDR (from);
      pend = CHAR_POS_ADDR (to);
      if (from < GPT && to >= GPT)
	stop = GPT_ADDR;
      else
	stop = pend;
    }
  else
    {
      CHECK_STRING (string);
      validate_subarray (string, start, end, SCHARS (string), &from, &to);
      if (! STRING_MULTIBYTE (string))
	return Qnil;
      p = SDATA (string) + string_char_to_byte (string, from);
      stop = pend = SDATA (string) + string_char_to_byte (string, to);
      if (ascii_compatible && (to - from) == (pend - p))
	return Qnil;
    }

  if (NILP (count))
    n = 1;
  else
    {
      CHECK_FIXNAT (count);
      n = XFIXNUM (count);
    }

  positions = Qnil;
  charset_map_loaded = 0;
  while (1)
    {
      int c;

      if (ascii_compatible)
	while (p < stop && ASCII_CHAR_P (*p))
	  p++, from++;
      if (p >= stop)
	{
	  if (p >= pend)
	    break;
	  stop = pend;
	  p = GAP_END_ADDR;
	}

      c = string_char_advance (&p);
      if (! (ASCII_CHAR_P (c) && ascii_compatible)
	  && ! char_charset (translate_char (translation_table, c),
			     charset_list, NULL))
	{
	  positions = Fcons (make_fixnum (from), positions);
	  n--;
	  if (n == 0)
	    break;
	}

      from++;
      if (charset_map_loaded && NILP (string))
	{
	  p = CHAR_POS_ADDR (from);
	  pend = CHAR_POS_ADDR (to);
	  if (from < GPT && to >= GPT)
	    stop = GPT_ADDR;
	  else
	    stop = pend;
	  charset_map_loaded = 0;
	}
    }

  return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
}