Function: treesit-query-capture

treesit-query-capture is a function defined in treesit.c.

Signature

(treesit-query-capture NODE QUERY &optional BEG END NODE-ONLY)

Documentation

Query NODE with patterns in QUERY.

Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name assigned to the node in PATTERN. NODE is the captured node.

QUERY is either a string query, a sexp query, or a compiled query. See Info node (elisp)Pattern Matching for how to write a query in either string or sexp form. When using repeatedly, a compiled query is much faster than a string or sexp one, so it is recommend to compile your query if it will be used repeatedly.

BEG and END, if both non-nil, specify the region of buffer positions in which the query is executed. Any matching node whose span overlaps with the region between BEG and END are captured, it doesn't have to be completely in the region.

If NODE-ONLY is non-nil, return a list of nodes.

Besides a node, NODE can also be a parser, in which case the root node of that parser is used. NODE can also be a language symbol, in which case the root node of a parser for that language is used. If such a parser doesn't exist, it is created.

Signal treesit-query-error if QUERY is malformed or something else goes wrong. You can use treesit-query-validate to validate and debug the query.

Other relevant functions are documented in the treesit group.

View in manual

Shortdoc

;; treesit
(treesit-query-capture node '((identifier) @id "return" @ret))
    e.g. => ((id . #<treesit-node (identifier) in 195-196>) (ret . #<treesit-node

Source Code

// Defined in /usr/src/emacs/src/treesit.c
// Skipping highlighting due to helpful-max-highlight.
{
  if (!(TS_COMPILED_QUERY_P (query)
	|| CONSP (query) || STRINGP (query)))
    wrong_type_argument (Qtreesit_query_p, query);

  treesit_initialize ();

  /* Resolve NODE into an actual node.  */
  Lisp_Object lisp_node;
  if (TS_NODEP (node))
    {
      treesit_check_node (node); /* Check if up-to-date.  */
      lisp_node = node;
    }
  else if (TS_PARSERP (node))
    {
      treesit_check_parser (node); /* Check if deleted.  */
      lisp_node = Ftreesit_parser_root_node (node);
    }
  else if (SYMBOLP (node))
    {
      Lisp_Object parser
	= Ftreesit_parser_create (node, Fcurrent_buffer (), Qnil);
      lisp_node = Ftreesit_parser_root_node (parser);
    }
  else
    xsignal2 (Qwrong_type_argument,
	      list4 (Qor, Qtreesit_node_p, Qtreesit_parser_p, Qsymbolp),
	      node);

  /* Extract C values from Lisp objects.  */
  TSNode treesit_node
    = XTS_NODE (lisp_node)->node;
  Lisp_Object lisp_parser
    = XTS_NODE (lisp_node)->parser;
  ptrdiff_t visible_beg
    = XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
  const TSLanguage *lang
    = ts_parser_language (XTS_PARSER (lisp_parser)->parser);

  /* Check BEG and END.  */
  struct buffer *buf = XBUFFER (XTS_PARSER (lisp_parser)->buffer);
  if (!NILP (beg))
    treesit_check_position (beg, buf);
  if (!NILP (end))
    treesit_check_position (end, buf);

  /* Initialize query objects.  At the end of this block, we should
     have a working TSQuery and a TSQueryCursor.  */
  TSQuery *treesit_query;
  TSQueryCursor *cursor;
  bool needs_to_free_query_and_cursor;
  if (TS_COMPILED_QUERY_P (query))
    {
      Lisp_Object signal_symbol = Qnil;
      Lisp_Object signal_data = Qnil;
      treesit_query = treesit_ensure_query_compiled (query, &signal_symbol,
						     &signal_data);
      cursor = XTS_COMPILED_QUERY (query)->cursor;
      /* We don't need to free ts_query and cursor because they
	 are stored in a lisp object, which is tracked by gc.  */
      needs_to_free_query_and_cursor = false;
      if (treesit_query == NULL)
	xsignal (signal_symbol, signal_data);
    }
  else
    {
      /* Since query is not TS_COMPILED_QUERY, it can only be a string
	 or a cons.  */
      if (CONSP (query))
	query = Ftreesit_query_expand (query);
      char *query_string = SSDATA (query);
      uint32_t error_offset;
      TSQueryError error_type;
      treesit_query = ts_query_new (lang, query_string, strlen (query_string),
				    &error_offset, &error_type);
      if (treesit_query == NULL)
	xsignal (Qtreesit_query_error,
		 treesit_compose_query_signal_data (error_offset,
						    error_type, query));
      cursor = ts_query_cursor_new ();
      needs_to_free_query_and_cursor = true;
    }

  /* WARN: After this point, free treesit_query and cursor before every
     signal and return.  */

  /* Set query range.  */
  if (!NILP (beg) && !NILP (end))
    {
      ptrdiff_t beg_byte = CHAR_TO_BYTE (XFIXNUM (beg));
      ptrdiff_t end_byte = CHAR_TO_BYTE (XFIXNUM (end));
      /* We never let tree-sitter run on buffers too large, so these
	 assertion should never hit.  */
      eassert (beg_byte - visible_beg <= UINT32_MAX);
      eassert (end_byte - visible_beg <= UINT32_MAX);
      ts_query_cursor_set_byte_range (cursor,
				      (uint32_t) (beg_byte - visible_beg),
				      (uint32_t) (end_byte - visible_beg));
    }

  /* Execute query.  */
  ts_query_cursor_exec (cursor, treesit_query, treesit_node);
  TSQueryMatch match;

  /* Go over each match, collect captures and predicates.  Include the
     captures in the RESULT list unconditionally as we get them, then
     test for predicates.  If predicates pass, then all good, if
     predicates don't pass, revert the result back to the result
     before this loop (PREV_RESULT).  (Predicates control the entire
     match.) This way we don't need to create a list of captures in
     every for loop and nconc it to RESULT every time.  That is indeed
     the initial implementation in which Yoav found nconc being the
     bottleneck (98.4% of the running time spent on nconc).  */
  uint32_t patterns_count = ts_query_pattern_count (treesit_query);
  Lisp_Object result = Qnil;
  Lisp_Object prev_result = result;
  Lisp_Object predicates_table = make_vector (patterns_count, Qt);
  while (ts_query_cursor_next_match (cursor, &match))
    {
      /* Record the checkpoint that we may roll back to.  */
      prev_result = result;
      /* Get captured nodes.  */
      const TSQueryCapture *captures = match.captures;
      for (int idx = 0; idx < match.capture_count; idx++)
	{
	  uint32_t capture_name_len;
	  TSQueryCapture capture = captures[idx];
	  Lisp_Object captured_node = make_treesit_node (lisp_parser,
							 capture.node);

	  Lisp_Object cap;
	  if (NILP (node_only))
	    {
	      const char *capture_name
		= ts_query_capture_name_for_id (treesit_query, capture.index,
						&capture_name_len);
	      cap = Fcons (intern_c_string_1 (capture_name, capture_name_len),
			   captured_node);
	    }
	  else
	    cap = captured_node;

	  result = Fcons (cap, result);
	}
      /* Get predicates.  */
      Lisp_Object predicates = AREF (predicates_table, match.pattern_index);
      if (EQ (predicates, Qt))
	{
	  predicates = treesit_predicates_for_pattern (treesit_query,
						       match.pattern_index);
	  ASET (predicates_table, match.pattern_index, predicates);
	}

      /* captures_lisp = Fnreverse (captures_lisp); */
      struct capture_range captures_range = { result, prev_result };
      if (!treesit_eval_predicates (captures_range, predicates))
	/* Predicates didn't pass, roll back.  */
	result = prev_result;
    }
  if (needs_to_free_query_and_cursor)
    {
      ts_query_delete (treesit_query);
      ts_query_cursor_delete (cursor);
    }
  return Fnreverse (result);
}