Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/news.d/1120.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Added support for extended identifiers.

A known exception is that extended identifiers within a port or generic clause that uses parenthesis must
have the same number of opening and closing parenthesis. For example, a generic named ``\foo(bar)\`` is
accepted but ``\foo(bar\`` is not.
33 changes: 33 additions & 0 deletions tests/unit/test_vhdl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,39 @@ def test_two_adjacent_hyphens_in_a_literal(self):
stimulus = 'signal a : std_logic_vector(3 downto 0) := "----";'
self.assertEqual(remove_comments(stimulus), stimulus)

def test_external_identifier(self):
# This test is focused on the special cases of external identifier parsing.
design_file = VHDLDesignFile.parse(
"""
entity standard_identifier is
generic (
-- Extended identifiers with parenthesis will be accepted if they are balanced.
-- Otherwise they will interfere with finding the closing parenthesis to the
-- generic clause. Same thing with port clause. This is an acceptable limitation
-- for now.
\\foo(bar)\\ : integer
);
end entity;

entity non-standard-identifier is -- This entity won't be found because of illegal identifier pattern.
end package;

entity \\extended-identifier\\ is
end entity \\extended-identifier\\;

package \\a.package\\ is
end package \\a.package\\;
"""
)
entities = design_file.entities
self.assertEqual(len(entities), 2)
self.assertEqual(entities[0].identifier, "standard_identifier")
self.assertEqual(entities[1].identifier, "\\extended-identifier\\")

packages = design_file.packages
self.assertEqual(len(packages), 1)
self.assertEqual(packages[0].identifier, "\\a.package\\")

def parse_single_entity(self, code):
"""
Helper function to parse a single entity
Expand Down
107 changes: 56 additions & 51 deletions vunit/vhdl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def parse(self, file_name):
)


_ID_PATTERN = r"[A-Za-z]\w*|\\[^\n\r\\]+\\"


class VHDLDesignFile(object): # pylint: disable=too-many-instance-attributes
"""
Contains VHDL objects found within a file
Expand Down Expand Up @@ -86,7 +89,7 @@ def parse(cls, code):
)

_component_re = re.compile(
r"[a-zA-Z]\w*\s*\:\s*(?:component)?\s*(?:(?:[a-zA-Z]\w*)\.)?([a-zA-Z]\w*)\s*"
rf"(?:{_ID_PATTERN})\s*\:\s*(?:component)?\s*(?:(?:{_ID_PATTERN})\.)?({_ID_PATTERN})\s*"
r"(?:generic|port) map\s*\([\s\w\=\>\,\.\)\(\+\-\*\/\'\"]*\);",
re.IGNORECASE,
)
Expand All @@ -101,13 +104,13 @@ def __init__(self, identifier):
self.identifier = identifier

_package_body_pattern = re.compile(
r"""
rf"""
\b # Word boundary
package # package keyword
\s+ # At least one whitespace
body # body keyword
\s+ # At least one whitespace
(?P<package>[a-zA-Z][\w]*) # A package
(?P<package>{_ID_PATTERN}) # A package
\s+ # At least one whitespace
is # is keyword
""",
Expand All @@ -126,23 +129,23 @@ def find(cls, code):

class VHDLConfiguration(object):
"""
A configuratio declaration
A configuration declaration
"""

def __init__(self, identifier, entity):
self.identifier = identifier
self.entity = entity

_configuration_re = re.compile(
r"""
rf"""
\b # Word boundary
configuration # configuration keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
of # of keyword
\s+ # At least one whitespace
(?P<entity_id>[a-zA-Z][\w]*) # An identifier
(?P<entity_id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
is # is keyword
""",
Expand All @@ -168,15 +171,15 @@ def __init__(self, identifier, entity):
self.entity = entity

_architecture_re = re.compile(
r"""
rf"""
\b # Word boundary
architecture # architecture keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
of # of keyword
\s+ # At least one whitespace
(?P<entity_id>[a-zA-Z][\w]*) # An identifier
(?P<entity_id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
is # is keyword
""",
Expand All @@ -194,7 +197,9 @@ def find(cls, code):
yield VHDLArchitecture(identifier, entity_id)


PACKAGE_INSTANCE_PATTERN = r"\bpackage\s+(?P<new_name>[a-zA-Z]\w*)\s+is\s+new\s+(?P<lib>[a-zA-Z]\w*)\.(?P<name>[a-zA-Z]\w*)" # pylint: disable=line-too-long
PACKAGE_INSTANCE_PATTERN = (
rf"\bpackage\s+(?P<new_name>{_ID_PATTERN})\s+is\s+new\s+(?P<lib>{_ID_PATTERN})\.(?P<name>{_ID_PATTERN})"
)


class VHDLPackage(object):
Expand All @@ -209,11 +214,11 @@ def __init__(self, identifier, enumeration_types, record_types, array_types):
self.array_types = array_types

_package_start_re = re.compile(
r"""
rf"""
\b # Word boundary
package # package keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
is # is keyword
""",
Expand All @@ -233,7 +238,7 @@ def _find_normal_packages(cls, code):
end # end keyword
(\s+package)? # Optional package keyword
(\s+"""
+ identifier
+ re.escape(identifier)
+ r""")? # Optional identifier
[\s]* # Potential whitespaces
; # Semicolon
Expand Down Expand Up @@ -324,11 +329,11 @@ def add_port(self, identifier, mode, subtype_code, init_value=None):
)

_entity_start_re = re.compile(
r"""
rf"""
\b # Word boundary
entity # entity keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
is # is keyword
""",
Expand All @@ -351,7 +356,7 @@ def find(cls, code):
(entity)? # Optional entity keyword
[\s]* # Potential whitespaces
("""
+ identifier
+ re.escape(identifier)
+ r""")? # Optional identifier
[\s]* # Potential whitespaces
; # Semicolon
Expand All @@ -371,11 +376,11 @@ def parse(cls, code):
# Extract identifier
re_flags = re.MULTILINE | re.IGNORECASE | re.VERBOSE
entity_start = re.compile(
r"""
rf"""
\b # Word boundary
entity # entity keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
is # is keyword
""",
Expand Down Expand Up @@ -454,12 +459,12 @@ def _find_port_clause(cls, code):
@staticmethod
def _split_not_in_par(string, sep):
"""
Split string at all occurences of sep but not inside of a parenthesis or quoute
Split string at all occurrences of sep but not inside of a parenthesis or quote
"""
result = []
count = 0
split = []
quouted = False
quoted = False
escaped = False

for idx, char in enumerate(string):
Expand All @@ -472,7 +477,7 @@ def _split_not_in_par(string, sep):
if next_char == '"':
escaped = True
else:
quouted = not quouted
quoted = not quoted
else:
escaped = False

Expand All @@ -481,7 +486,7 @@ def _split_not_in_par(string, sep):
elif char in ")":
count -= 1

if char == sep and count == 0 and not quouted:
if char == sep and count == 0 and not quoted:
result.append("".join(split))
split = []
else:
Expand Down Expand Up @@ -560,11 +565,11 @@ def __init__(self, identifier):
self.identifier = identifier

_context_start_re = re.compile(
r"""
rf"""
\b # Word boundary
context # context keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+ # At least one whitespace
is # is keyword
""",
Expand Down Expand Up @@ -600,10 +605,10 @@ def parse(cls, code):
# Extract type mark and find out if it's an array type and if a constraint is given.
re_flags = re.MULTILINE | re.IGNORECASE | re.VERBOSE
subtype_indication_start = re.compile(
r"""
rf"""
^ # Beginning of line
[\s]* # Potential whitespaces
(?P<type_mark>[a-zA-Z][\w]*) # An type mark
(?P<type_mark>{_ID_PATTERN}) # An type mark
[\s]* # Potential whitespaces
(?P<constraint>\(.*\))?
""",
Expand Down Expand Up @@ -705,16 +710,16 @@ def __init__(self, identifier, literals):
self.literals = literals

_enum_declaration_re = re.compile(
r"""
rf"""
\b # Word boundary
type
\s+
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+
is
\s*\(\s*
(?P<literals>[a-zA-Z][\w]* # First enumeration literal
(\s*,\s*[a-zA-Z][\w]*)*) # More enumeration literals
(?P<literals>(?:{_ID_PATTERN}) # First enumeration literal
(\s*,\s*(?:{_ID_PATTERN}))*) # More enumeration literals
\s*\)\s*;""",
re.MULTILINE | re.IGNORECASE | re.VERBOSE,
)
Expand Down Expand Up @@ -746,11 +751,11 @@ def __init__(self, identifier, elements):
self.elements = elements

_record_declaration_re = re.compile(
r"""
rf"""
\b # Word boundary
type
\s+
(?P<id>[a-zA-Z][\w]*) # An identifier
(?P<id>{_ID_PATTERN}) # An identifier
\s+
is
\s+
Expand Down Expand Up @@ -809,17 +814,17 @@ def __init__(self, identifier, subtype_indication, range1, range2):
)

_range_attribute_ranges_re = re.compile(
r"""
\s*(?P<range_attribute>[a-zA-Z][\w]*'range)\s*""",
rf"""
\s*(?P<range_attribute>(?:{_ID_PATTERN})'range)\s*""",
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
)

_unconstrained_ranges_re = re.compile(
r"""
\s*(?P<range_type1>[a-zA-Z][\w]*)
rf"""
\s*(?P<range_type1>{_ID_PATTERN})
\s+range\s+<>\s*
(,
\s*(?P<range_type2>[a-zA-Z][\w]*)
\s*(?P<range_type2>{_ID_PATTERN})
\s+range\s+<>\s*)?""",
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
)
Expand All @@ -833,24 +838,24 @@ def __init__(self, identifier, subtype_indication, range1, range2):
)

_range_attribute_range_re = re.compile(
r"""
\s*(?P<range_attribute>[a-zA-Z][\w]*'range)\s*""",
rf"""
\s*(?P<range_attribute>(?:{_ID_PATTERN})'range)\s*""",
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
)

_unconstrained_range_re = re.compile(
r"""
\s*(?P<range_type>[a-zA-Z][\w]*)
rf"""
\s*(?P<range_type>{_ID_PATTERN})
\s+range\s+<>\s*""",
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
)

_array_declaration_re = re.compile(
r"""
rf"""
\b # Word boundary
type
\s+
(?P<id>[a-zA-Z][\w]*)
(?P<id>{_ID_PATTERN})
\s+
is
\s+
Expand Down Expand Up @@ -950,12 +955,12 @@ class VHDLReference(object):
_reference_types = ("package", "context", "entity", "configuration")

_uses_re = re.compile(
r"""
rf"""
\b # Word boundary
(?P<use_type>use|context) # use or context keyword
\s+ # At least one whitespace
(?P<id>[a-zA-Z][\w]*(\.[a-zA-Z][\w]*){1,2})
(?P<extra>(\s*,\s*[a-zA-Z][\w]*(\.[a-zA-Z][\w]*){1,2})*)
(?P<id>(?:{_ID_PATTERN})(\.(?:{_ID_PATTERN})){{1,2}})
(?P<extra>(\s*,\s*(?:{_ID_PATTERN})(\.(?:{_ID_PATTERN})){{1,2}})*)
\s* # Potential whitespaces
; # Semi-colon
""",
Expand All @@ -965,12 +970,12 @@ class VHDLReference(object):
@classmethod
def _find_uses(cls, code):
"""
Find all the libraries and use clasues within the code
Find all the libraries and use clauses within the code
"""

def get_ids(match):
"""
Get all ids found within the match taking the optinal extra ids of
Get all ids found within the match taking the optional extra ids of
library and use clauses into account such as:

use foo, bar;
Expand Down Expand Up @@ -1002,7 +1007,7 @@ def get_ids(match):
return references

_entity_reference_re = re.compile(
r"\bentity\s+(?P<lib>[a-zA-Z]\w*)\.(?P<ent>[a-zA-Z]\w*)\s*(\((?P<arch>[a-zA-Z]\w*)\))?",
rf"\bentity\s+(?P<lib>{_ID_PATTERN})\.(?P<ent>{_ID_PATTERN})\s*(\((?P<arch>{_ID_PATTERN})\))?",
re.MULTILINE | re.IGNORECASE,
)

Expand All @@ -1027,7 +1032,7 @@ def _find_entity_references(cls, code):
return references

_configuration_reference_re = re.compile(
r"\bconfiguration\s+(?P<lib>[a-zA-Z]\w*)\.(?P<cfg>[a-zA-Z]\w*)",
rf"\bconfiguration\s+(?P<lib>{_ID_PATTERN})\.(?P<cfg>{_ID_PATTERN})",
re.MULTILINE | re.IGNORECASE,
)

Expand Down
Loading