[LLDB] Add `ScalarLiteralNode` and literal parsing in DIL #152308

kuilpd · 2025-08-06T13:51:21Z

This patch introduces ScalarLiteralNode without any uses by other nodes yet. It also includes lexing and parsing for integer and floating point numbers, and makes a function getAutoSenseRadix in StringRef global.

llvmbot · 2025-08-06T13:51:44Z

@llvm/pr-subscribers-llvm-support
@llvm/pr-subscribers-llvm-adt

@llvm/pr-subscribers-lldb

Author: Ilia Kuklin (kuilpd)

Changes

This patch introduces ScalarLiteralNode without any uses by other nodes yet. It also includes lexing and parsing for integer and floating point numbers, and makes a function getAutoSenseRadix in StringRef global.

Patch is 25.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152308.diff

17 Files Affected:

(modified) lldb/docs/dil-expr-lang.ebnf (+3)
(modified) lldb/include/lldb/ValueObject/DILAST.h (+37)
(modified) lldb/include/lldb/ValueObject/DILEval.h (+7)
(modified) lldb/include/lldb/ValueObject/DILLexer.h (+2-1)
(modified) lldb/include/lldb/ValueObject/DILParser.h (+3)
(modified) lldb/source/ValueObject/DILAST.cpp (+5)
(modified) lldb/source/ValueObject/DILEval.cpp (+110)
(modified) lldb/source/ValueObject/DILLexer.cpp (+43-11)
(modified) lldb/source/ValueObject/DILParser.cpp (+72)
(modified) lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py (+2-2)
(modified) lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py (+1-1)
(added) lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile (+3)
(added) lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py (+39)
(added) lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp (+3)
(modified) lldb/unittests/ValueObject/DILLexerTests.cpp (+15-6)
(modified) llvm/include/llvm/ADT/StringRef.h (+2)
(modified) llvm/lib/Support/StringRef.cpp (+3-3)

diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf
index 783432dabd6db..da1796d936c6a 100644
--- a/lldb/docs/dil-expr-lang.ebnf
+++ b/lldb/docs/dil-expr-lang.ebnf
@@ -31,6 +31,9 @@ identifier = ? C99 Identifier ? ;
 
 integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ;
 
+numeric_literal = ? Integer constant: hexademical, decimal, octal, binary ?
+                | ? Floating constant ? ;
+
 register = "$" ? Register name ? ;
 
 nested_name_specifier = type_name "::"
diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h
index 709f0639135f1..a0e5909a8c6a7 100644
--- a/lldb/include/lldb/ValueObject/DILAST.h
+++ b/lldb/include/lldb/ValueObject/DILAST.h
@@ -23,6 +23,7 @@ enum class NodeKind {
   eErrorNode,
   eIdentifierNode,
   eMemberOfNode,
+  eScalarLiteralNode,
   eUnaryOpNode,
 };
 
@@ -178,6 +179,40 @@ class BitFieldExtractionNode : public ASTNode {
   int64_t m_last_index;
 };
 
+class ScalarLiteralNode : public ASTNode {
+public:
+  ScalarLiteralNode(uint32_t location, Scalar value, uint32_t radix,
+                    bool is_unsigned, bool is_long, bool is_longlong)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long),
+        m_is_longlong(is_longlong) {}
+
+  ScalarLiteralNode(uint32_t location, Scalar value, bool is_float)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_is_float(is_float) {}
+
+  llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
+
+  Scalar GetValue() const & { return m_value; }
+  uint32_t GetRadix() const { return m_radix; }
+  bool IsUnsigned() const { return m_is_unsigned; }
+  bool IsLong() const { return m_is_long; }
+  bool IsLongLong() const { return m_is_longlong; }
+  bool IsFloat() const { return m_is_float; }
+
+  static bool classof(const ASTNode *node) {
+    return node->GetKind() == NodeKind::eScalarLiteralNode;
+  }
+
+private:
+  Scalar m_value;
+  uint32_t m_radix;
+  bool m_is_unsigned;
+  bool m_is_long;
+  bool m_is_longlong;
+  bool m_is_float;
+};
+
 /// This class contains one Visit method for each specialized type of
 /// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
 /// the correct function in the DIL expression evaluator for evaluating that
@@ -195,6 +230,8 @@ class Visitor {
   Visit(const ArraySubscriptNode *node) = 0;
   virtual llvm::Expected<lldb::ValueObjectSP>
   Visit(const BitFieldExtractionNode *node) = 0;
+  virtual llvm::Expected<lldb::ValueObjectSP>
+  Visit(const ScalarLiteralNode *node) = 0;
 };
 
 } // namespace lldb_private::dil
diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h
index 45e29b3ddcd7b..22a6c5bd0af9a 100644
--- a/lldb/include/lldb/ValueObject/DILEval.h
+++ b/lldb/include/lldb/ValueObject/DILEval.h
@@ -54,6 +54,13 @@ class Interpreter : Visitor {
   Visit(const ArraySubscriptNode *node) override;
   llvm::Expected<lldb::ValueObjectSP>
   Visit(const BitFieldExtractionNode *node) override;
+  llvm::Expected<lldb::ValueObjectSP>
+  Visit(const ScalarLiteralNode *node) override;
+
+  llvm::Expected<CompilerType>
+  PickLiteralType(lldb::TypeSystemSP type_system,
+                  std::shared_ptr<ExecutionContextScope> ctx,
+                  const ScalarLiteralNode *literal);
 
   // Used by the interpreter to create objects, perform casts, etc.
   lldb::TargetSP m_target;
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
index 9c1ba97680253..a9f01785c6c20 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -28,11 +28,12 @@ class Token {
     arrow,
     coloncolon,
     eof,
+    floating_constant,
     identifier,
+    integer_constant,
     l_paren,
     l_square,
     minus,
-    numeric_constant,
     period,
     r_paren,
     r_square,
diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h
index 9eda7bac4a364..90df109337dcf 100644
--- a/lldb/include/lldb/ValueObject/DILParser.h
+++ b/lldb/include/lldb/ValueObject/DILParser.h
@@ -96,6 +96,9 @@ class DILParser {
   std::string ParseIdExpression();
   std::string ParseUnqualifiedId();
   std::optional<int64_t> ParseIntegerConstant();
+  ASTNodeUP ParseNumericLiteral();
+  ASTNodeUP ParseIntegerLiteral();
+  ASTNodeUP ParseFloatingPointLiteral();
 
   void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);
 
diff --git a/lldb/source/ValueObject/DILAST.cpp b/lldb/source/ValueObject/DILAST.cpp
index b1cd824c2299e..38215ae18f6ce 100644
--- a/lldb/source/ValueObject/DILAST.cpp
+++ b/lldb/source/ValueObject/DILAST.cpp
@@ -37,4 +37,9 @@ BitFieldExtractionNode::Accept(Visitor *v) const {
   return v->Visit(this);
 }
 
+llvm::Expected<lldb::ValueObjectSP>
+ScalarLiteralNode::Accept(Visitor *v) const {
+  return v->Visit(this);
+}
+
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp
index 6f28434c646cd..ee667e9306ca5 100644
--- a/lldb/source/ValueObject/DILEval.cpp
+++ b/lldb/source/ValueObject/DILEval.cpp
@@ -7,7 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/ValueObject/DILEval.h"
+#include "lldb/Core/Module.h"
 #include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/TypeSystem.h"
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/ValueObject/DILAST.h"
@@ -402,4 +404,112 @@ Interpreter::Visit(const BitFieldExtractionNode *node) {
   return child_valobj_sp;
 }
 
+static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr<StackFrame> ctx) {
+  SymbolContext symbol_context =
+      ctx->GetSymbolContext(lldb::eSymbolContextCompUnit);
+  auto language = symbol_context.comp_unit->GetLanguage();
+
+  symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule);
+  auto type_system =
+      symbol_context.module_sp->GetTypeSystemForLanguage(language);
+
+  if (type_system)
+    return *type_system;
+
+  return lldb::TypeSystemSP();
+}
+
+static CompilerType GetBasicType(lldb::TypeSystemSP type_system,
+                                 lldb::BasicType basic_type) {
+  if (type_system)
+    if (auto compiler_type = type_system.get()->GetBasicTypeFromAST(basic_type))
+      return compiler_type;
+
+  CompilerType empty_type;
+  return empty_type;
+}
+
+llvm::Expected<CompilerType>
+Interpreter::PickLiteralType(lldb::TypeSystemSP type_system,
+                             std::shared_ptr<ExecutionContextScope> ctx,
+                             const ScalarLiteralNode *literal) {
+  Scalar scalar = literal->GetValue();
+  if (scalar.GetType() == Scalar::e_float) {
+    if (literal->IsFloat())
+      return GetBasicType(type_system, lldb::eBasicTypeFloat);
+    return GetBasicType(type_system, lldb::eBasicTypeDouble);
+  } else if (scalar.GetType() == Scalar::e_int) {
+    // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be
+    // an unsigned integer.
+    bool unsigned_is_allowed =
+        literal->IsUnsigned() || literal->GetRadix() != 10;
+
+    // Try int/unsigned int.
+    uint64_t int_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeInt)
+                        .GetByteSize(ctx.get()))
+      int_byte_size = *temp;
+    unsigned int_size = int_byte_size * CHAR_BIT;
+    llvm::APInt apint = scalar.GetAPSInt();
+    if (!literal->IsLong() && !literal->IsLongLong() &&
+        apint.isIntN(int_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(int_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeInt);
+      if (unsigned_is_allowed)
+        return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt);
+    }
+    // Try long/unsigned long.
+    uint64_t long_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong)
+                        .GetByteSize(ctx.get()))
+      long_byte_size = *temp;
+    unsigned long_size = long_byte_size * CHAR_BIT;
+    if (!literal->IsLongLong() && apint.isIntN(long_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(long_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeLong);
+      if (unsigned_is_allowed)
+        return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong);
+    }
+    // Try long long/unsigned long long.
+    uint64_t long_long_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong)
+                        .GetByteSize(ctx.get()))
+      long_long_byte_size = *temp;
+    unsigned long_long_size = long_long_byte_size * CHAR_BIT;
+    if (apint.isIntN(long_long_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeLongLong);
+      // If we still couldn't decide a type, we probably have something that
+      // does not fit in a signed long long, but has no U suffix. Also known as:
+      //
+      //  warning: integer literal is too large to be represented in a signed
+      //  integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal]
+      //
+      return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong);
+    }
+    return llvm::make_error<DILDiagnosticError>(
+        m_expr,
+        "integer literal is too large to be represented in any integer type",
+        literal->GetLocation());
+  }
+  return llvm::make_error<DILDiagnosticError>(
+      m_expr, "unable to create a const literal", literal->GetLocation());
+}
+
+llvm::Expected<lldb::ValueObjectSP>
+Interpreter::Visit(const ScalarLiteralNode *node) {
+  auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope);
+  if (type_system) {
+    auto type = PickLiteralType(type_system, m_exe_ctx_scope, node);
+    if (type) {
+      Scalar scalar = node->GetValue();
+      return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type,
+                                                      "result");
+    } else
+      return type.takeError();
+  }
+  return llvm::make_error<DILDiagnosticError>(
+      m_expr, "unable to create a const literal", node->GetLocation());
+}
+
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
index eaefaf484bc18..00f9a0c515461 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -28,16 +28,18 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
     return "coloncolon";
   case Kind::eof:
     return "eof";
+  case Kind::floating_constant:
+    return "floating_constant";
   case Kind::identifier:
     return "identifier";
+  case Kind::integer_constant:
+    return "integer_constant";
   case Kind::l_paren:
     return "l_paren";
   case Kind::l_square:
     return "l_square";
   case Kind::minus:
     return "minus";
-  case Kind::numeric_constant:
-    return "numeric_constant";
   case Kind::period:
     return "period";
   case Kind::r_paren:
@@ -72,12 +74,39 @@ static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
 
 static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
 
-static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
-                                               llvm::StringRef &remainder) {
-  if (IsDigit(remainder[0])) {
-    llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
-    remainder = remainder.drop_front(number.size());
-    return number;
+static std::optional<llvm::StringRef> IsNumber(llvm::StringRef &remainder,
+                                               bool &isFloat) {
+  llvm::StringRef::iterator cur_pos = remainder.begin();
+  if (*cur_pos == '.') {
+    auto next_pos = cur_pos + 1;
+    if (next_pos == remainder.end() || !IsDigit(*next_pos))
+      return std::nullopt;
+  }
+  if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') {
+    while (IsNumberBodyChar(*cur_pos))
+      cur_pos++;
+
+    if (*cur_pos == '.') {
+      isFloat = true;
+      cur_pos++;
+      while (IsNumberBodyChar(*cur_pos))
+        cur_pos++;
+
+      // Check if there's an exponent
+      char prev_ch = *(cur_pos - 1);
+      if (prev_ch == 'e' || prev_ch == 'E' || prev_ch == 'p' ||
+          prev_ch == 'P') {
+        if (*(cur_pos) == '+' || *(cur_pos) == '-') {
+          cur_pos++;
+          while (IsNumberBodyChar(*cur_pos))
+            cur_pos++;
+        }
+      }
+    }
+
+    llvm::StringRef number = remainder.substr(0, cur_pos - remainder.begin());
+    if (remainder.consume_front(number))
+      return number;
   }
   return std::nullopt;
 }
@@ -106,9 +135,12 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
     return Token(Token::eof, "", (uint32_t)expr.size());
 
   uint32_t position = cur_pos - expr.begin();
-  std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
-  if (maybe_number)
-    return Token(Token::numeric_constant, maybe_number->str(), position);
+  bool isFloat = false;
+  std::optional<llvm::StringRef> maybe_number = IsNumber(remainder, isFloat);
+  if (maybe_number) {
+    auto kind = isFloat ? Token::floating_constant : Token::integer_constant;
+    return Token(kind, maybe_number->str(), position);
+  }
   std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
   if (maybe_word)
     return Token(Token::identifier, maybe_word->str(), position);
diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp
index eac41fab90763..35eb6d62b7ba4 100644
--- a/lldb/source/ValueObject/DILParser.cpp
+++ b/lldb/source/ValueObject/DILParser.cpp
@@ -179,10 +179,13 @@ ASTNodeUP DILParser::ParsePostfixExpression() {
 // Parse a primary_expression.
 //
 //  primary_expression:
+//    numeric_literal
 //    id_expression
 //    "(" expression ")"
 //
 ASTNodeUP DILParser::ParsePrimaryExpression() {
+  if (CurToken().IsOneOf({Token::integer_constant, Token::floating_constant}))
+    return ParseNumericLiteral();
   if (CurToken().IsOneOf(
           {Token::coloncolon, Token::identifier, Token::l_paren})) {
     // Save the source location for the diagnostics message.
@@ -346,6 +349,7 @@ void DILParser::BailOut(const std::string &error, uint32_t loc,
   m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
 }
 
+// FIXME: Remove this once subscript operator uses ScalarLiteralNode.
 // Parse a integer_literal.
 //
 //  integer_literal:
@@ -370,6 +374,74 @@ std::optional<int64_t> DILParser::ParseIntegerConstant() {
   return std::nullopt;
 }
 
+// Parse a numeric_literal.
+//
+//  numeric_literal:
+//    ? Token::integer_constant ?
+//    ? Token::floating_constant ?
+//
+ASTNodeUP DILParser::ParseNumericLiteral() {
+  ASTNodeUP numeric_constant;
+  if (CurToken().Is(Token::integer_constant))
+    numeric_constant = ParseIntegerLiteral();
+  else
+    numeric_constant = ParseFloatingPointLiteral();
+  if (numeric_constant->GetKind() == NodeKind::eErrorNode) {
+    BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
+                          CurToken()),
+            CurToken().GetLocation(), CurToken().GetSpelling().length());
+    return std::make_unique<ErrorNode>();
+  }
+  m_dil_lexer.Advance();
+  return numeric_constant;
+}
+
+ASTNodeUP DILParser::ParseIntegerLiteral() {
+  Token token = CurToken();
+  auto spelling = token.GetSpelling();
+  llvm::StringRef spelling_ref = spelling;
+
+  auto radix = llvm::getAutoSenseRadix(spelling_ref);
+  bool is_unsigned = false, is_long = false, is_longlong = false;
+  if (spelling_ref.consume_back_insensitive("ll"))
+    is_longlong = true;
+  if (spelling_ref.consume_back_insensitive("l"))
+    is_long = true;
+  if (spelling_ref.consume_back_insensitive("u"))
+    is_unsigned = true;
+
+  llvm::APInt raw_value;
+  if (!spelling_ref.getAsInteger(radix, raw_value)) {
+    Scalar scalar_value(raw_value);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(),
+                                               scalar_value, radix, is_unsigned,
+                                               is_long, is_longlong);
+  }
+  return std::make_unique<ErrorNode>();
+}
+
+ASTNodeUP DILParser::ParseFloatingPointLiteral() {
+  Token token = CurToken();
+  auto spelling = token.GetSpelling();
+  llvm::StringRef spelling_ref = spelling;
+
+  bool is_float = false;
+  llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
+  if (spelling_ref.consume_back_insensitive("f")) {
+    is_float = true;
+    raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
+  }
+
+  auto StatusOrErr = raw_float.convertFromString(
+      spelling_ref, llvm::APFloat::rmNearestTiesToEven);
+  if (!errorToBool(StatusOrErr.takeError())) {
+    Scalar scalar_value(raw_float);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(),
+                                               scalar_value, is_float);
+  }
+  return std::make_unique<ErrorNode>();
+}
+
 void DILParser::Expect(Token::Kind kind) {
   if (CurToken().IsNot(kind)) {
     BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
index 0f56057189395..1d0340160f5e4 100644
--- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
+++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
@@ -66,7 +66,7 @@ def test_subscript(self):
         self.expect(
             "frame var 'int_arr[1.0]'",
             error=True,
-            substrs=["expected 'r_square', got: <'.'"],
+            substrs=["failed to parse integer constant: <'1.0' (floating_constant)>"],
         )
 
         # Base should be a "pointer to T" and index should be of an integral type.
@@ -88,7 +88,7 @@ def test_subscript(self):
         self.expect(
             "frame var '1[2]'",
             error=True,
-            substrs=["Unexpected token"],
+            substrs=["subscripted value is not an array or pointer"],
         )
 
         # Base should not be a pointer to void
diff --git a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
index 38c72131d797c..28eba4f1a70bc 100644
--- a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
+++ b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
@@ -35,7 +35,7 @@ def test_frame_var(self):
         self.expect(
             "frame variable '*1'",
             error=True,
-            substrs=["Unexpected token: <'1' (numeric_constant)>"],
+            substrs=["dereference failed: not a pointer, reference or array type"],
         )
         self.expect(
             "frame variable '*val'",
diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
new file mode 100644
index 0000000000000..57a636ebb0829
--- /dev/null
+++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
@@ -0,0 +1,39 @@
+"""
+Test DIL arithmetic.
+"""
+
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+from lldbsuite.test import lldbutil
+
+
+class TestFrameVarDILArithmetic(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def test_arithmetic(self):
+        self.build()
+        lldbutil.run_to_source_breakpoint(
+            self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp")
+        )
+
+        self.runCmd("settings set target.experimental.use-DIL true")
+
+        # Check number parsing
+        self.expect_var_path("1.0", value="1", type="double")
+        self.expect_var_path("1.0f", value="1", type="float")
+...
[truncated]

kuilpd · 2025-08-06T13:51:54Z

PickLiteralType follows C++ logic, but this can later be put in a plugin if the logic doesn't apply everywhere. I'm not sure how to test this though, since the type is picked by width, so the resulting type name can be different on different machines.

I also split lexing number into 2 tokens to avoid parsing a single number token twice just to determine which number that is. I had to make getAutoSenseRadix in StringRef global to avoid copying that code into DIL, hopefully it's OK for it to be part of this patch and not a separate PR.

kuhar

Can you move the ADT changes to a separate PR?

Needed in #152308

…arsing

labath

I have a feeling that this would actually be better off as two node types (float and integer), as each stage (lexer, parser and evaluator) handles the two differently. Maybe then the integer node could use llvm::APInt directly, which would make the is_unsigned field look less out of place.

The implementation makes sense to me, but I don't know if we've reached consensus that this is the way this should work.

labath · 2025-08-08T10:24:28Z

lldb/include/lldb/ValueObject/DILAST.h

+
+  llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
+
+  Scalar GetValue() const & { return m_value; }


What's up with the &? I haven't used these before. Is there any benefit to that?

Um, it's probably an accident. I think it means that it can only be called on an lvalues, but there's no point for it here.

labath · 2025-08-08T10:26:32Z

lldb/source/ValueObject/DILEval.cpp

+static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr<StackFrame> ctx) {
+  SymbolContext symbol_context =
+      ctx->GetSymbolContext(lldb::eSymbolContextCompUnit);
+  auto language = symbol_context.comp_unit->GetLanguage();


I think there's more auto here than we usually have in llvm: https://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable

labath · 2025-08-08T10:28:10Z

lldb/source/ValueObject/DILEval.cpp

+  auto type_system =
+      symbol_context.module_sp->GetTypeSystemForLanguage(language);
+
+  if (type_system)
+    return *type_system;
+
+  return lldb::TypeSystemSP();


Is this different than return symbol_context.module_sp->GetTypeSystemForLanguage(language) ? without the type of auto, I can't see...

labath · 2025-08-08T10:32:02Z

lldb/source/ValueObject/DILParser.cpp

+  if (spelling_ref.consume_back_insensitive("ll"))
+    is_longlong = true;
+  if (spelling_ref.consume_back_insensitive("l"))
+    is_long = true;
+  if (spelling_ref.consume_back_insensitive("u"))
+    is_unsigned = true;


c++ allows both llu and ull, so this will need to be smarter if you want to accept both.

labath · 2025-08-08T10:36:55Z

lldb/source/ValueObject/DILLexer.cpp

+    if (next_pos == remainder.end() || !IsDigit(*next_pos))
+      return std::nullopt;
+  }
+  if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') {


Suggested change

if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') {

if (IsDigit(*cur_pos) || *cur_pos == '.') {

labath · 2025-08-08T10:40:17Z

lldb/source/ValueObject/DILLexer.cpp

+      cur_pos++;
+      while (IsNumberBodyChar(*cur_pos))
+        cur_pos++;


How does anything in this function deal with running off the end of the String(Ref)?

(This is kind of why I prefer using the StringRef methods (consume_front, drop_while, etc.) instead of working with iterators/indexes directly.)

I guess data in StringRef is not guaranteed to be null-terminated, I added a check for it.
I tried implementing it with StringRef functions, but I need to consume parts multiple times, or even only one character, and every time it produces a separate StringRef; it was kind of messy.

labath · 2025-08-08T10:45:50Z

lldb/source/ValueObject/DILEval.cpp

+llvm::Expected<lldb::ValueObjectSP>
+Interpreter::Visit(const ScalarLiteralNode *node) {
+  auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope);
+  if (type_system) {


Invert the condition and make this an early exit: https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code

labath · 2025-08-08T10:47:13Z

lldb/source/ValueObject/DILEval.cpp

+        auto apsint = scalar.GetAPSInt();
+        auto type_bitsize = type->GetBitSize(m_exe_ctx_scope.get());
+        if (type_bitsize) {
+          llvm::APInt adjusted = apsint.zextOrTrunc(*type_bitsize);
+          scalar = Scalar(adjusted);
+        }


Can this use Scalar::TruncOrExtendTo?

Yes, I didn't notice this function.

labath · 2025-08-08T10:48:01Z

lldb/source/ValueObject/DILEval.cpp

+    if (literal->IsFloat())
+      return GetBasicType(type_system, lldb::eBasicTypeFloat);
+    return GetBasicType(type_system, lldb::eBasicTypeDouble);
+  } else if (scalar.GetType() == Scalar::e_int) {


No else after return: https://llvm.org/docs/CodingStandards.html#don-t-use-else-after-a-return

kuilpd · 2025-08-08T15:05:57Z

I have a feeling that this would actually be better off as two node types (float and integer), as each stage (lexer, parser and evaluator) handles the two differently. Maybe then the integer node could use llvm::APInt directly, which would make the is_unsigned field look less out of place.

The implementation makes sense to me, but I don't know if we've reached consensus that this is the way this should work.

The node will also have boolean literals and nullptr later, they are all stored in a Scalar, so I don't think we should do 4 nodes for this. I can add them in this PR as well.

We didn't really discuss how literals are parsed and stored, only the math and type promotion part, which I will get to in the next PR.

cmtice · 2025-08-08T22:40:11Z

lldb/include/lldb/ValueObject/DILAST.h

+                    bool is_unsigned, bool is_long, bool is_longlong)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long),
+        m_is_longlong(is_longlong) {}


This leaves m_float uninitialized. Not sure if that's a problem?

cmtice · 2025-08-08T22:41:33Z

lldb/include/lldb/ValueObject/DILAST.h

+
+  ScalarLiteralNode(uint32_t location, Scalar value, bool is_float)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_is_float(is_float) {}


This leaves m_is_unsigned, m_radix, m_is_long and m_is_longlong uninitialized. m_radix being uninitialized might be a problem.

kuilpd requested review from labath, cmtice and jimingham August 6, 2025 13:51

kuilpd requested a review from JDevlieghere as a code owner August 6, 2025 13:51

kuilpd added the lldb label Aug 6, 2025

llvmbot added llvm:support llvm:adt labels Aug 6, 2025

kuhar reviewed Aug 6, 2025

View reviewed changes

kuilpd mentioned this pull request Aug 7, 2025

[ADT] Make getAutoSenseRadix in StringRef global #152503

Merged

kuilpd added a commit that referenced this pull request Aug 7, 2025

[ADT] Make getAutoSenseRadix in StringRef global (#152503)

2ff44d7

Needed in #152308

kuilpd added 4 commits August 7, 2025 22:50

Add ScalarLiteralNode

55fbd47

Lex and parse floating numbers; split number tokens to avoid double p…

578d43d

…arsing

Automatically pick integer literal type

703232b

Adjust APInt bitwidth

3b9d987

kuilpd force-pushed the DIL-upstream-add-scalar branch from 27466c3 to 3b9d987 Compare August 7, 2025 18:26

kuhar requested review from kuhar and removed request for kuhar August 7, 2025 18:40

labath reviewed Aug 8, 2025

View reviewed changes

kuilpd added 3 commits August 8, 2025 17:56

Adjust code formatting

7927d47

Fix suffix parsing

738da75

Add remainder.end() guard

658da31

cmtice reviewed Aug 8, 2025

View reviewed changes


		llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;

		Scalar GetValue() const & { return m_value; }

	if (IsDigit((cur_pos)) \|\| (cur_pos) == '.') {
	if (IsDigit(cur_pos) \|\| cur_pos == '.') {

[LLDB] Add ScalarLiteralNode and literal parsing in DIL #152308

Are you sure you want to change the base?

[LLDB] Add ScalarLiteralNode and literal parsing in DIL #152308

Uh oh!

Conversation

kuilpd commented Aug 6, 2025

Uh oh!

llvmbot commented Aug 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kuilpd commented Aug 6, 2025

Uh oh!

kuhar left a comment

Choose a reason for hiding this comment

Uh oh!

labath left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kuilpd commented Aug 8, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

[LLDB] Add `ScalarLiteralNode` and literal parsing in DIL #152308

[LLDB] Add `ScalarLiteralNode` and literal parsing in DIL #152308

llvmbot commented Aug 6, 2025 •

edited

Loading