VUnit · LarsAsplund · Mar 12, 2026 · Feb 26, 2026
diff --git a/docs/news.d/1120.feature.rst b/docs/news.d/1120.feature.rst
@@ -0,0 +1,5 @@
+Added support for extended identifiers.
+
+A known exception is that extended identifiers within a port or generic clause that uses parenthesis must 
+have the same number of opening and closing parenthesis. For example, a generic named ``\foo(bar)\`` is
+accepted but ``\foo(bar\`` is not.
diff --git a/tests/unit/test_vhdl_parser.py b/tests/unit/test_vhdl_parser.py
@@ -662,6 +662,39 @@ def test_two_adjacent_hyphens_in_a_literal(self):
         stimulus = 'signal a : std_logic_vector(3 downto 0) := "----";'
         self.assertEqual(remove_comments(stimulus), stimulus)
 
+    def test_external_identifier(self):
+        # This test is focused on the special cases of external identifier parsing.
+        design_file = VHDLDesignFile.parse(
+            """
+entity standard_identifier is
+  generic (
+    -- Extended identifiers with parenthesis will be accepted if they are balanced.
+    -- Otherwise they will interfere with finding the closing parenthesis to the
+    -- generic clause. Same thing with port clause. This is an acceptable limitation
+    -- for now.
+    \\foo(bar)\\ : integer
+    );
+end entity;
+
+entity non-standard-identifier is -- This entity won't be found because of illegal identifier pattern.
+end package;
+
+entity \\extended-identifier\\ is
+end entity \\extended-identifier\\;
+
+package \\a.package\\ is
+end package \\a.package\\;
+"""
+        )
+        entities = design_file.entities
+        self.assertEqual(len(entities), 2)
+        self.assertEqual(entities[0].identifier, "standard_identifier")
+        self.assertEqual(entities[1].identifier, "\\extended-identifier\\")
+
+        packages = design_file.packages
+        self.assertEqual(len(packages), 1)
+        self.assertEqual(packages[0].identifier, "\\a.package\\")
+
     def parse_single_entity(self, code):
         """
         Helper function to parse a single entity

diff --git a/vunit/vhdl_parser.py b/vunit/vhdl_parser.py
@@ -42,6 +42,9 @@ def parse(self, file_name):
         )
 
 
+_ID_PATTERN = r"[A-Za-z]\w*|\\[^\n\r\\]+\\"
+
+
 class VHDLDesignFile(object):  # pylint: disable=too-many-instance-attributes
     """
     Contains VHDL objects found within a file
@@ -86,7 +89,7 @@ def parse(cls, code):
         )
 
     _component_re = re.compile(
-        r"[a-zA-Z]\w*\s*\:\s*(?:component)?\s*(?:(?:[a-zA-Z]\w*)\.)?([a-zA-Z]\w*)\s*"
+        rf"(?:{_ID_PATTERN})\s*\:\s*(?:component)?\s*(?:(?:{_ID_PATTERN})\.)?({_ID_PATTERN})\s*"
         r"(?:generic|port) map\s*\([\s\w\=\>\,\.\)\(\+\-\*\/\'\"]*\);",
         re.IGNORECASE,
     )
@@ -101,13 +104,13 @@ def __init__(self, identifier):
         self.identifier = identifier
 
     _package_body_pattern = re.compile(
-        r"""
+        rf"""
         \b                             # Word boundary
         package                        # package keyword
         \s+                            # At least one whitespace
         body                           # body keyword
         \s+                            # At least one whitespace
-        (?P<package>[a-zA-Z][\w]*)     # A package
+        (?P<package>{_ID_PATTERN})     # A package
         \s+                            # At least one whitespace
         is                             # is keyword
         """,
@@ -126,23 +129,23 @@ def find(cls, code):
 
 class VHDLConfiguration(object):
     """
-    A configuratio declaration
+    A configuration declaration
     """
 
     def __init__(self, identifier, entity):
         self.identifier = identifier
         self.entity = entity
 
     _configuration_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         configuration         # configuration keyword
         \s+                   # At least one whitespace
-        (?P<id>[a-zA-Z][\w]*) # An identifier
+        (?P<id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         of                    # of keyword
         \s+                   # At least one whitespace
-        (?P<entity_id>[a-zA-Z][\w]*) # An identifier
+        (?P<entity_id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         is                    # is keyword
         """,
@@ -168,15 +171,15 @@ def __init__(self, identifier, entity):
         self.entity = entity
 
     _architecture_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         architecture          # architecture keyword
         \s+                   # At least one whitespace
-        (?P<id>[a-zA-Z][\w]*) # An identifier
+        (?P<id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         of                    # of keyword
         \s+                   # At least one whitespace
-        (?P<entity_id>[a-zA-Z][\w]*) # An identifier
+        (?P<entity_id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         is                    # is keyword
         """,
@@ -194,7 +197,9 @@ def find(cls, code):
             yield VHDLArchitecture(identifier, entity_id)
 
 
-PACKAGE_INSTANCE_PATTERN = r"\bpackage\s+(?P<new_name>[a-zA-Z]\w*)\s+is\s+new\s+(?P<lib>[a-zA-Z]\w*)\.(?P<name>[a-zA-Z]\w*)"  # pylint: disable=line-too-long
+PACKAGE_INSTANCE_PATTERN = (
+    rf"\bpackage\s+(?P<new_name>{_ID_PATTERN})\s+is\s+new\s+(?P<lib>{_ID_PATTERN})\.(?P<name>{_ID_PATTERN})"
+)
 
 
 class VHDLPackage(object):
@@ -209,11 +214,11 @@ def __init__(self, identifier, enumeration_types, record_types, array_types):
         self.array_types = array_types
 
     _package_start_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         package               # package keyword
         \s+                   # At least one whitespace
-        (?P<id>[a-zA-Z][\w]*) # An identifier
+        (?P<id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         is                    # is keyword
         """,
@@ -233,7 +238,7 @@ def _find_normal_packages(cls, code):
                 end                           # end keyword
                 (\s+package)?                 # Optional package keyword
                 (\s+"""
-                + identifier
+                + re.escape(identifier)
                 + r""")? # Optional identifier
                 [\s]*                         # Potential whitespaces
                 ;                             # Semicolon
@@ -324,11 +329,11 @@ def add_port(self, identifier, mode, subtype_code, init_value=None):
         )
 
     _entity_start_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         entity                # entity keyword
         \s+                   # At least one whitespace
-        (?P<id>[a-zA-Z][\w]*) # An identifier
+        (?P<id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         is                    # is keyword
         """,
@@ -351,7 +356,7 @@ def find(cls, code):
                 (entity)?                     # Optional entity keyword
                 [\s]*                         # Potential whitespaces
                 ("""
-                + identifier
+                + re.escape(identifier)
                 + r""")?    # Optional identifier
                 [\s]*                         # Potential whitespaces
                 ;                             # Semicolon
@@ -371,11 +376,11 @@ def parse(cls, code):
         # Extract identifier
         re_flags = re.MULTILINE | re.IGNORECASE | re.VERBOSE
         entity_start = re.compile(
-            r"""
+            rf"""
             \b                    # Word boundary
             entity                # entity keyword
             \s+                   # At least one whitespace
-            (?P<id>[a-zA-Z][\w]*) # An identifier
+            (?P<id>{_ID_PATTERN}) # An identifier
             \s+                   # At least one whitespace
             is                    # is keyword
             """,
@@ -454,12 +459,12 @@ def _find_port_clause(cls, code):
     @staticmethod
     def _split_not_in_par(string, sep):
         """
-        Split string at all occurences of sep but not inside of a parenthesis or quoute
+        Split string at all occurrences of sep but not inside of a parenthesis or quote
         """
         result = []
         count = 0
         split = []
-        quouted = False
+        quoted = False
         escaped = False
 
         for idx, char in enumerate(string):
@@ -472,7 +477,7 @@ def _split_not_in_par(string, sep):
                 if next_char == '"':
                     escaped = True
                 else:
-                    quouted = not quouted
+                    quoted = not quoted
             else:
                 escaped = False
 
@@ -481,7 +486,7 @@ def _split_not_in_par(string, sep):
             elif char in ")":
                 count -= 1
 
-            if char == sep and count == 0 and not quouted:
+            if char == sep and count == 0 and not quoted:
                 result.append("".join(split))
                 split = []
             else:
@@ -560,11 +565,11 @@ def __init__(self, identifier):
         self.identifier = identifier
 
     _context_start_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         context               # context keyword
         \s+                   # At least one whitespace
-        (?P<id>[a-zA-Z][\w]*) # An identifier
+        (?P<id>{_ID_PATTERN}) # An identifier
         \s+                   # At least one whitespace
         is                    # is keyword
         """,
@@ -600,10 +605,10 @@ def parse(cls, code):
         # Extract type mark and find out if it's an array type and if a constraint is given.
         re_flags = re.MULTILINE | re.IGNORECASE | re.VERBOSE
         subtype_indication_start = re.compile(
-            r"""
+            rf"""
             ^                             # Beginning of line
             [\s]*                         # Potential whitespaces
-            (?P<type_mark>[a-zA-Z][\w]*)   # An type mark
+            (?P<type_mark>{_ID_PATTERN})   # An type mark
             [\s]*                         # Potential whitespaces
             (?P<constraint>\(.*\))?
             """,
@@ -705,16 +710,16 @@ def __init__(self, identifier, literals):
         self.literals = literals
 
     _enum_declaration_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         type
         \s+
-        (?P<id>[a-zA-Z][\w]*)       # An identifier
+        (?P<id>{_ID_PATTERN})       # An identifier
         \s+
         is
         \s*\(\s*
-        (?P<literals>[a-zA-Z][\w]* # First enumeration literal
-        (\s*,\s*[a-zA-Z][\w]*)*)   # More enumeration literals
+        (?P<literals>(?:{_ID_PATTERN}) # First enumeration literal
+        (\s*,\s*(?:{_ID_PATTERN}))*)   # More enumeration literals
         \s*\)\s*;""",
         re.MULTILINE | re.IGNORECASE | re.VERBOSE,
     )
@@ -746,11 +751,11 @@ def __init__(self, identifier, elements):
         self.elements = elements
 
     _record_declaration_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         type
         \s+
-        (?P<id>[a-zA-Z][\w]*)       # An identifier
+        (?P<id>{_ID_PATTERN})       # An identifier
         \s+
         is
         \s+
@@ -809,17 +814,17 @@ def __init__(self, identifier, subtype_indication, range1, range2):
     )
 
     _range_attribute_ranges_re = re.compile(
-        r"""
-        \s*(?P<range_attribute>[a-zA-Z][\w]*'range)\s*""",
+        rf"""
+        \s*(?P<range_attribute>(?:{_ID_PATTERN})'range)\s*""",
         re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
     )
 
     _unconstrained_ranges_re = re.compile(
-        r"""
-        \s*(?P<range_type1>[a-zA-Z][\w]*)
+        rf"""
+        \s*(?P<range_type1>{_ID_PATTERN})
         \s+range\s+<>\s*
         (,
-        \s*(?P<range_type2>[a-zA-Z][\w]*)
+        \s*(?P<range_type2>{_ID_PATTERN})
         \s+range\s+<>\s*)?""",
         re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
     )
@@ -833,24 +838,24 @@ def __init__(self, identifier, subtype_indication, range1, range2):
     )
 
     _range_attribute_range_re = re.compile(
-        r"""
-        \s*(?P<range_attribute>[a-zA-Z][\w]*'range)\s*""",
+        rf"""
+        \s*(?P<range_attribute>(?:{_ID_PATTERN})'range)\s*""",
         re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
     )
 
     _unconstrained_range_re = re.compile(
-        r"""
-        \s*(?P<range_type>[a-zA-Z][\w]*)
+        rf"""
+        \s*(?P<range_type>{_ID_PATTERN})
         \s+range\s+<>\s*""",
         re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
     )
 
     _array_declaration_re = re.compile(
-        r"""
+        rf"""
         \b                    # Word boundary
         type
         \s+
-        (?P<id>[a-zA-Z][\w]*)
+        (?P<id>{_ID_PATTERN})
         \s+
         is
         \s+
@@ -950,12 +955,12 @@ class VHDLReference(object):
     _reference_types = ("package", "context", "entity", "configuration")
 
     _uses_re = re.compile(
-        r"""
+        rf"""
             \b                             # Word boundary
             (?P<use_type>use|context)      # use or context keyword
             \s+                            # At least one whitespace
-            (?P<id>[a-zA-Z][\w]*(\.[a-zA-Z][\w]*){1,2})
-            (?P<extra>(\s*,\s*[a-zA-Z][\w]*(\.[a-zA-Z][\w]*){1,2})*)
+            (?P<id>(?:{_ID_PATTERN})(\.(?:{_ID_PATTERN})){{1,2}})
+            (?P<extra>(\s*,\s*(?:{_ID_PATTERN})(\.(?:{_ID_PATTERN})){{1,2}})*)
             \s*                            # Potential whitespaces
             ;                              # Semi-colon
     """,
@@ -965,12 +970,12 @@ class VHDLReference(object):
     @classmethod
     def _find_uses(cls, code):
         """
-        Find all the libraries and use clasues within the code
+        Find all the libraries and use clauses within the code
         """
 
         def get_ids(match):
             """
-            Get all ids found within the match taking the optinal extra ids of
+            Get all ids found within the match taking the optional extra ids of
             library and use clauses into account such as:
 
             use foo, bar;
@@ -1002,7 +1007,7 @@ def get_ids(match):
         return references
 
     _entity_reference_re = re.compile(
-        r"\bentity\s+(?P<lib>[a-zA-Z]\w*)\.(?P<ent>[a-zA-Z]\w*)\s*(\((?P<arch>[a-zA-Z]\w*)\))?",
+        rf"\bentity\s+(?P<lib>{_ID_PATTERN})\.(?P<ent>{_ID_PATTERN})\s*(\((?P<arch>{_ID_PATTERN})\))?",
         re.MULTILINE | re.IGNORECASE,
     )
 
@@ -1027,7 +1032,7 @@ def _find_entity_references(cls, code):
         return references
 
     _configuration_reference_re = re.compile(
-        r"\bconfiguration\s+(?P<lib>[a-zA-Z]\w*)\.(?P<cfg>[a-zA-Z]\w*)",
+        rf"\bconfiguration\s+(?P<lib>{_ID_PATTERN})\.(?P<cfg>{_ID_PATTERN})",
         re.MULTILINE | re.IGNORECASE,
     )