Function: treesit-query-capture

treesit-query-capture is a function defined in treesit.c.

Signature

(treesit-query-capture NODE QUERY &optional BEG END NODE-ONLY GROUPED)

Documentation

Query NODE with patterns in QUERY.

Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name assigned to the node in PATTERN. NODE is the captured node.

QUERY is either a string query, a sexp query, or a compiled query. See Info node (elisp)Pattern Matching for how to write a query in either string or sexp form. When using repeatedly, a compiled query is much faster than a string or sexp one, so it is recommend to compile your query if it will be used repeatedly.

BEG and END, if both non-nil, specify the region of buffer positions in which the query is executed. Any matching node whose span overlaps with the region between BEG and END are captured, it doesn't have to be completely in the region.

If GROUPED is non-nil, ther function groups the returned list of captures into matches and return a list of MATCH, where each MATCH is a list of the form (CAPTURE_NAME . NODE).

If NODE-ONLY is non-nil, return nodes only, and don't include CAPTURE_NAME.

Besides a node, NODE can be a parser, in which case the root node of that parser is used. NODE can also be a language symbol, in which case the root node of a parser for that language is used. If such a parser doesn't exist, it is created.

Signal treesit-query-error if QUERY is malformed or something else goes wrong. You can use treesit-query-validate to validate and debug the query.

Other relevant functions are documented in the treesit group.

View in manual

Shortdoc

;; treesit
(treesit-query-capture node '((identifier) @id "return" @ret))
    e.g. => ((id . #<treesit-node (identifier) in 195-196>) (ret . #<treesit-node

Source Code

// Defined in /usr/src/emacs/src/treesit.c
// Skipping highlighting due to helpful-max-highlight.
{
  if (!(TS_COMPILED_QUERY_P (query)
	|| CONSP (query) || STRINGP (query)))
    wrong_type_argument (Qtreesit_query_p, query);

  treesit_initialize ();

  /* Resolve NODE into an actual node, signals if node not
     up-to-date.  */
  Lisp_Object lisp_node = treesit_resolve_node (node);
  /* As of right now, the node returned by treesit_resolve_node always
     passes treesit_check_node; but it might not be true in the future,
     so adding the line below just to be safe.  */
  treesit_check_node (lisp_node);

  /* Extract C values from Lisp objects.  */
  TSNode treesit_node = XTS_NODE (lisp_node)->node;
  Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser;

  const TSLanguage *lang
    = ts_parser_language (XTS_PARSER (lisp_parser)->parser);

  /* Check BEG and END.  */
  struct buffer *buf = XBUFFER (XTS_PARSER (lisp_parser)->buffer);
  if (!NILP (beg))
    treesit_check_position (beg, buf);
  if (!NILP (end))
    treesit_check_position (end, buf);

  /* Initialize query objects.  At the end of this block, we should
     have a working TSQuery and a TSQueryCursor.  */
  TSQuery *treesit_query;
  TSQueryCursor *cursor;
  bool needs_to_free_query_and_cursor;
  Lisp_Object signal_symbol;
  Lisp_Object signal_data;
  if (!treesit_initialize_query (query, lang, &treesit_query, &cursor,
				 &needs_to_free_query_and_cursor,
				 &signal_symbol, &signal_data))
    xsignal (signal_symbol, signal_data);

  /* WARN: After this point, if NEEDS_TO_FREE_QUERY_AND_CURSOR is true,
     free TREESIT_QUERY and CURSOR before every signal and return.  */

  /* Set query range.  */
  if (!NILP (beg) && !NILP (end))
    {
      ptrdiff_t visible_beg
	= XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
      ptrdiff_t beg_byte = CHAR_TO_BYTE (XFIXNUM (beg));
      ptrdiff_t end_byte = CHAR_TO_BYTE (XFIXNUM (end));
      /* We never let tree-sitter run on buffers too large, so these
	 assertion should never hit.  */
      eassert (beg_byte - visible_beg <= UINT32_MAX);
      eassert (end_byte - visible_beg <= UINT32_MAX);
      ts_query_cursor_set_byte_range (cursor,
				      (uint32_t) (beg_byte - visible_beg),
				      (uint32_t) (end_byte - visible_beg));
    }

  /* Execute query.  */
  ts_query_cursor_exec (cursor, treesit_query, treesit_node);
  TSQueryMatch match;

  /* Go over each match, collect captures and predicates.  Include the
     captures in the RESULT list unconditionally as we get them, then
     test for predicates.  If predicates pass, then all good, if
     predicates don't pass, revert the result back to the result
     before this loop (PREV_RESULT).  (Predicates control the entire
     match.)  This way we don't need to create a list of captures in
     every for loop and nconc it to RESULT every time.  That is indeed
     the initial implementation in which Yoav found nconc being the
     bottleneck (98.4% of the running time spent on nconc).  */
  uint32_t patterns_count = ts_query_pattern_count (treesit_query);
  Lisp_Object result = Qnil;
  Lisp_Object prev_result = result;
  Lisp_Object predicates_table = make_vector (patterns_count, Qt);
  Lisp_Object predicate_signal_data = Qnil;

  struct buffer *old_buf = current_buffer;
  set_buffer_internal (buf);

  while (ts_query_cursor_next_match (cursor, &match))
    {
      /* Depends on the value of GROUPED, we have two modes of
         operation.

         If GROUPED is nil (mode 1), we return a list of captures; in
         this case, we append the captures first, and revert back if the
         captures don't match.

         If GROUPED is non-nil (mode 2), we return a list of match
         groups; in this case, we collect captures into a list first,
         and append to the results after verifying that the group
         matches.  */

      /* Mode 1: Record the checkpoint that we may roll back to.  */
      prev_result = result;
      /* Mode 2: Create a list storing captures of this match group.  */
      Lisp_Object match_group = Qnil;
      /* 1. Get captured nodes.  */
      const TSQueryCapture *captures = match.captures;
      for (int idx = 0; idx < match.capture_count; idx++)
	{
	  uint32_t capture_name_len;
	  TSQueryCapture capture = captures[idx];
	  Lisp_Object captured_node = make_treesit_node (lisp_parser,
							 capture.node);

	  Lisp_Object cap;
	  if (NILP (node_only))
	    {
	      const char *capture_name
		= ts_query_capture_name_for_id (treesit_query, capture.index,
						&capture_name_len);
	      cap = Fcons (intern_c_string_1 (capture_name, capture_name_len),
			   captured_node);
	    }
	  else
	    cap = captured_node;

	  if (NILP (grouped))
	    result = Fcons (cap, result); /* Mode 1. */
	  else
	    match_group = Fcons (cap, match_group); /* Mode 2. */
	}
      /* 2. Get predicates and check whether this match can be
         included in the result list.  */
      Lisp_Object predicates = AREF (predicates_table, match.pattern_index);
      if (BASE_EQ (predicates, Qt))
	{
	  predicates = treesit_predicates_for_pattern (treesit_query,
						       match.pattern_index);
	  ASET (predicates_table, match.pattern_index, predicates);
	}

      /* captures_lisp = Fnreverse (captures_lisp); */
      /* Mode 1.  */
      struct capture_range captures_range = { result, prev_result };
      /* Mode 2.  */
      if (!NILP (grouped))
	{
	  captures_range.start = match_group;
	  captures_range.end = Qnil;
	}
      bool match
	= treesit_eval_predicates (captures_range, predicates,
				   &predicate_signal_data);

      if (!NILP (predicate_signal_data))
	break;

      /* Mode 1: Predicates didn't pass, roll back.  */
      if (!match && NILP (grouped))
	result = prev_result;
      /* Mode 2: Predicates pass, add this match group.  */
      if (match && !NILP (grouped))
	result = Fcons (Fnreverse (match_group), result);
    }

  /* Final clean up.  */
  if (needs_to_free_query_and_cursor)
    {
      ts_query_delete (treesit_query);
      ts_query_cursor_delete (cursor);
    }
  set_buffer_internal (old_buf);

  /* Some capture predicate signaled an error.  */
  if (!NILP (predicate_signal_data))
    xsignal (Qtreesit_query_error, predicate_signal_data);

  return Fnreverse (result);
}