Function: set-buffer-multibyte

set-buffer-multibyte is a function defined in buffer.c.

Signature

(set-buffer-multibyte FLAG)

Documentation

Set the multibyte flag of the current buffer to FLAG.

If FLAG is t, this makes the buffer a multibyte buffer. If FLAG is nil, this makes the buffer a single-byte buffer. In these cases, the buffer contents remain unchanged as a sequence of bytes but the contents viewed as characters do change. If FLAG is to, this makes the buffer a multibyte buffer by changing all eight-bit bytes to eight-bit characters. If the multibyte flag was really changed, undo information of the current buffer is cleared.

View in manual

Probably introduced at or before Emacs version 20.3.

Source Code

// Defined in /usr/src/emacs/src/buffer.c
// Skipping highlighting due to helpful-max-highlight.
{
  struct Lisp_Marker *tail, *markers;
  Lisp_Object btail, other;
  ptrdiff_t begv, zv;
  bool narrowed = (BEG != BEGV || Z != ZV);
  bool modified_p = !NILP (Fbuffer_modified_p (Qnil));
  Lisp_Object old_undo = BVAR (current_buffer, undo_list);

  if (current_buffer->base_buffer)
    error ("Cannot do `set-buffer-multibyte' on an indirect buffer");

  /* Do nothing if nothing actually changes.  */
  if (NILP (flag) == NILP (BVAR (current_buffer, enable_multibyte_characters)))
    return flag;

  /* Don't record these buffer changes.  We will put a special undo entry
     instead.  */
  bset_undo_list (current_buffer, Qt);

  /* If the cached position is for this buffer, clear it out.  */
  clear_charpos_cache (current_buffer);

  if (NILP (flag))
    begv = BEGV_BYTE, zv = ZV_BYTE;
  else
    begv = BEGV, zv = ZV;

  if (narrowed)
    error ("Changing multibyteness in a narrowed buffer");

  invalidate_buffer_caches (current_buffer, BEGV, ZV);

  if (NILP (flag))
    {
      ptrdiff_t pos, stop;
      unsigned char *p;

      /* Do this first, so it can use CHAR_TO_BYTE
	 to calculate the old correspondences.  */
      set_intervals_multibyte (false);
      set_overlays_multibyte (false);

      bset_enable_multibyte_characters (current_buffer, Qnil);

      Z = Z_BYTE;
      BEGV = BEGV_BYTE;
      ZV = ZV_BYTE;
      GPT = GPT_BYTE;
      TEMP_SET_PT_BOTH (PT_BYTE, PT_BYTE);


      for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
	tail->charpos = tail->bytepos;

      /* Convert multibyte form of 8-bit characters to unibyte.  */
      pos = BEG;
      stop = GPT;
      p = BEG_ADDR;
      while (1)
	{
	  if (pos == stop)
	    {
	      if (pos == Z)
		break;
	      p = GAP_END_ADDR;
	      stop = Z;
	    }
	  if (ASCII_CHAR_P (*p))
	    p++, pos++;
	  else if (CHAR_BYTE8_HEAD_P (*p))
	    {
	      int bytes, c = string_char_and_length (p, &bytes);
	      /* Delete all bytes for this 8-bit character but the
		 last one, and change the last one to the character
		 code.  */
	      bytes--;
	      del_range_2 (pos, pos, pos + bytes, pos + bytes, 0);
	      p = GAP_END_ADDR;
	      *p++ = c;
	      pos++;
	      if (begv > pos)
		begv -= bytes;
	      if (zv > pos)
		zv -= bytes;
	      stop = Z;
	    }
	  else
	    {
	      int bytes = BYTES_BY_CHAR_HEAD (*p);
	      p += bytes, pos += bytes;
	    }
	}
    }
  else
    {
      ptrdiff_t pt = PT;
      ptrdiff_t pos, stop;
      unsigned char *p, *pend;

      /* Be sure not to have a multibyte sequence striding over the GAP.
	 Ex: We change this: "...abc\302 _GAP_ \241def..."
	     to: "...abc _GAP_ \302\241def..."  */

      if (EQ (flag, Qt)
	  && GPT_BYTE > 1 && GPT_BYTE < Z_BYTE
	  && ! CHAR_HEAD_P (*(GAP_END_ADDR)))
	{
	  unsigned char *q = GPT_ADDR - 1;

	  while (! CHAR_HEAD_P (*q) && q > BEG_ADDR) q--;
	  if (LEADING_CODE_P (*q))
	    {
	      ptrdiff_t new_gpt = GPT_BYTE - (GPT_ADDR - q);

	      move_gap_both (new_gpt, new_gpt);
	    }
	}

      /* Make the buffer contents valid as multibyte by converting
	 8-bit characters to multibyte form.  */
      pos = BEG;
      stop = GPT;
      p = BEG_ADDR;
      pend = GPT_ADDR;
      while (1)
	{
	  int bytes;

	  if (pos == stop)
	    {
	      if (pos == Z)
		break;
	      p = GAP_END_ADDR;
	      pend = Z_ADDR;
	      stop = Z;
	    }

	  if (ASCII_CHAR_P (*p))
	    p++, pos++;
	  else if (EQ (flag, Qt)
		   && 0 < (bytes = multibyte_length (p, pend, true, false)))
	    p += bytes, pos += bytes;
	  else
	    {
	      unsigned char tmp[MAX_MULTIBYTE_LENGTH];
	      int c;

	      c = BYTE8_TO_CHAR (*p);
	      bytes = CHAR_STRING (c, tmp);
	      *p = tmp[0];
	      TEMP_SET_PT_BOTH (pos + 1, pos + 1);
	      bytes--;
	      insert_1_both ((char *) tmp + 1, bytes, bytes, 1, 0, 0);
	      /* Now the gap is after the just inserted data.  */
	      pos = GPT;
	      p = GAP_END_ADDR;
	      if (pos <= begv)
		begv += bytes;
	      if (pos <= zv)
		zv += bytes;
	      if (pos <= pt)
		pt += bytes;
	      pend = Z_ADDR;
	      stop = Z;
	    }
	}

      if (pt != PT)
	TEMP_SET_PT (pt);

      /* Do this first, so that chars_in_text asks the right question.
	 set_intervals_multibyte needs it too.  */
      bset_enable_multibyte_characters (current_buffer, Qt);

      GPT_BYTE = advance_to_char_boundary (GPT_BYTE);
      GPT = chars_in_text (BEG_ADDR, GPT_BYTE - BEG_BYTE) + BEG;

      Z = chars_in_text (GAP_END_ADDR, Z_BYTE - GPT_BYTE) + GPT;

      BEGV_BYTE = advance_to_char_boundary (BEGV_BYTE);
      if (BEGV_BYTE > GPT_BYTE)
	BEGV = chars_in_text (GAP_END_ADDR, BEGV_BYTE - GPT_BYTE) + GPT;
      else
	BEGV = chars_in_text (BEG_ADDR, BEGV_BYTE - BEG_BYTE) + BEG;

      ZV_BYTE = advance_to_char_boundary (ZV_BYTE);
      if (ZV_BYTE > GPT_BYTE)
	ZV = chars_in_text (GAP_END_ADDR, ZV_BYTE - GPT_BYTE) + GPT;
      else
	ZV = chars_in_text (BEG_ADDR, ZV_BYTE - BEG_BYTE) + BEG;

      {
	ptrdiff_t byte = advance_to_char_boundary (PT_BYTE);
	ptrdiff_t position;

	if (byte > GPT_BYTE)
	  position = chars_in_text (GAP_END_ADDR, byte - GPT_BYTE) + GPT;
	else
	  position = chars_in_text (BEG_ADDR, byte - BEG_BYTE) + BEG;
	TEMP_SET_PT_BOTH (position, byte);
      }

      tail = markers = BUF_MARKERS (current_buffer);

      /* This prevents BYTE_TO_CHAR (that is, buf_bytepos_to_charpos) from
	 getting confused by the markers that have not yet been updated.
	 It is also a signal that it should never create a marker.  */
      BUF_MARKERS (current_buffer) = NULL;

      for (; tail; tail = tail->next)
	{
	  tail->bytepos = advance_to_char_boundary (tail->bytepos);
	  tail->charpos = BYTE_TO_CHAR (tail->bytepos);
	}

      /* Make sure no markers were put on the chain
	 while the chain value was incorrect.  */
      if (BUF_MARKERS (current_buffer))
	emacs_abort ();

      BUF_MARKERS (current_buffer) = markers;

      /* Do this last, so it can calculate the new correspondences
	 between chars and bytes.  */
      /* FIXME: Is it worth the trouble, really?  Couldn't we just throw
         away all the text-properties instead of trying to guess how
         to adjust them?  AFAICT the result is not reliable anyway.  */
      set_intervals_multibyte (true);
      set_overlays_multibyte (true);
    }

  if (!EQ (old_undo, Qt))
    {
      /* Represent all the above changes by a special undo entry.  */
      bset_undo_list (current_buffer,
		      Fcons (list3 (Qapply,
				    intern ("set-buffer-multibyte"),
				    NILP (flag) ? Qt : Qnil),
			     old_undo));
    }

  current_buffer->prevent_redisplay_optimizations_p = 1;

  /* If buffer is shown in a window, let redisplay consider other windows.  */
  if (buffer_window_count (current_buffer))
    windows_or_buffers_changed = 10;

  /* Copy this buffer's new multibyte status
     into all of its indirect buffers.  */
  FOR_EACH_LIVE_BUFFER (btail, other)
    {
      struct buffer *o = XBUFFER (other);
      if (o->base_buffer == current_buffer && BUFFER_LIVE_P (o))
	{
	  BVAR (o, enable_multibyte_characters)
	    = BVAR (current_buffer, enable_multibyte_characters);
	  o->prevent_redisplay_optimizations_p = true;
	}
    }

  /* Restore the modifiedness of the buffer.  */
  if (!modified_p && !NILP (Fbuffer_modified_p (Qnil)))
    Fset_buffer_modified_p (Qnil);

  /* Update coding systems of this buffer's process (if any).  */
  {
    Lisp_Object process;

    process = Fget_buffer_process (Fcurrent_buffer ());
    if (PROCESSP (process))
      setup_process_coding_systems (process);
  }

  return flag;
}