diff --git a/README.md b/README.md index 13a481d..b82afb5 100644 --- a/README.md +++ b/README.md @@ -35,75 +35,106 @@ ctags --fields=afmikKlnsStz readtags.c readtags.h **Opening Tags File** ```python import ctags -from ctags import CTags, TagEntry +from ctags import CTags import sys try: tagFile = CTags('tags') -except: +except OSError as err: + print(err) sys.exit(1) # Available file information keys: # opened - was the tag file successfully opened? # error_number - errno value when 'opened' is false # format - format of tag file (1 = original, 2 = extended) -# sort - how is the tag file sorted? -# author - name of author of generating program (may be empy string) -# name - name of program (may be empy string) -# url - URL of distribution (may be empy string) -# version - program version (may be empty string) +# sort - how is the tag file sorted? +# +# Other keys may be available: +# author - name of author of generating program +# name - name of program +# url - URL of distribution +# version - program version +# If one of them is not present a KeyError is raised. -print tagFile['name'] -print tagFile['author'] -print tagFile['format'] +try: + print(tagFile['name']) +except KeyError: + print("No 'name' in the tagfile") + +try: + print(tagFile['author']) +except KeyError: + print("No 'author' in the tagfile") + +print(tagFile['format']) # Available sort type: # TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED # Note: use this only if you know how the tags file is sorted which is # specified when you generate the tag file -status = tagFile.setSortType(ctags.TAG_SORTED) +tagFile.setSortType(ctags.TAG_SORTED) ``` -**Obtaining First Tag Entry** +**Listing Tag Entries** ```python -entry = TagEntry() -status = tagFile.first(entry) - -if status: - # Available TagEntry keys: - # name - name of tag - # file - path of source file containing definition of tag - # pattern - pattern for locating source line (None if no pattern) - # lineNumber - line number in source file of tag definition (may be zero if not known) - # kind - kind of tag (none if not known) - # fileScope - is tag of file-limited scope? - - # Note: other keys will be assumed as an extension key and will - # return None if no such key is found - - print entry['name'] - print entry['kind'] +# A generator of all tags in the file can be obtain with: +all_tags = tagFile.all_tags() + +# The generator yield a dict for each entry. +# The following keys are always available for a entry: +# name - name of tag +# file - path of source file containing definition of tag +# pattern - pattern for locating source line +# (None if no pattern, this should no huppen with a correct +# tag file) +# fileScope - is tag of file-limited scope? +# +# The dict may contain other keys (extension keys). +# Other keys include : +# lineNumber - line number in source file of tag definition +# kind - kind of tag + +for entry in all_tags: + print(entry['name']) + print(entry['file']) + try: + entry['lineNumber'] + except KeyError: + print("Entry has no lineNumber") + else: + print("Entry has a lineNumber") ``` -**Finding a Tag Entry** -```python +**Finding Tag Entries** +```python # Available options: # TAG_PARTIALMATCH - begin with # TAG_FULLMATCH - full length matching # TAG_IGNORECASE - disable binary search # TAG_OBSERVECASE - case sensitive and allowed binary search to perform -if tagFile.find(entry, 'find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE): - print 'found' - print entry['lineNumber'] - print entry['pattern'] - print entry['kind'] +found_tags = tagFile.find_tags('find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE) +for entry in found_tags: + print(entry['lineNumber']) + print(entry['pattern']) + print(entry['kind']) -# Find the next tag matching the name and options supplied to the -# most recent call to tagFile.find(). (replace the entry if found) -status = tagFile.findNext(entry) -# Step to the next tag in the file (replace entry if found) -status = tagFile.next(entry) +# File Encoding. + +By default, CTags return unicode strings using the 'utf8' encoding. +This can be changed by providing a custom encoding at CTags creation : + +```python +tagFile = CTags('tags', encoding='latin1') ``` + +If None is provided as encoding, no encoding is done and entries will contain +bytes instead of string. + +This is also possible to provide a encoding_errors. +It will be passed to the encode function as the 'errors' argument. +See the definition of the encode function to know how to use this argument. +By default, encoding_errors is 'strict'. diff --git a/src/_readtags.pyx b/src/_readtags.pyx index 73d7e0b..d99ff58 100644 --- a/src/_readtags.pyx +++ b/src/_readtags.pyx @@ -17,130 +17,161 @@ You should have received a copy of the GNU General Public License along with Python-Ctags. If not, see . """ +cdef extern from "string.h": + char* strerror(int errnum) include "stdlib.pxi" include "readtags.pxi" +import sys -cdef class TagEntry: - cdef tagEntry c_entry - - def __cinit__(self): - self.c_entry.fields.count = 0 - self.c_entry.fields.list = NULL - - - def __setitem__(self, key, item): - if key == 'name': - self.c_entry.name = item - elif key == 'file': - self.c_entry.file = item - elif key == 'pattern': - self.c_entry.address.pattern = item - elif key == 'lineNumber': - self.c_entry.address.lineNumber = item - elif key == 'kind': - self.c_entry.kind = item - elif key == 'fileScope': - self.c_entry.fileScope = item - elif key == 'fields': - # fields.list is allocated by readtags.c - if self.c_entry.fields.count != len(item): - return - - fields = item - if self.c_entry.fields.list != NULL: - free(self.c_entry.fields.list) - self.c_entry.fields.list = NULL - - for k, v in fields.iteritems(): - self.c_entry.fields.list.key = k - self.c_entry.fields.list.value = v - - def __getitem__(self, key): - cdef char* result - if key == 'name': - return self.c_entry.name - elif key == 'file': - return self.c_entry.file - elif key == 'pattern': - if self.c_entry.address.pattern == NULL: - return None - return self.c_entry.address.pattern - elif key == 'lineNumber': - return self.c_entry.address.lineNumber - elif key == 'kind': - if self.c_entry.kind == NULL: - return None - return self.c_entry.kind - elif key == 'fileScope': - return self.c_entry.fileScope - else: - # It will crash if we mix NULL/0/None - # don't mix comparison of type - result = ctagsField(&self.c_entry, key) - if result == NULL: - return None - - return result cdef class CTags: cdef tagFile* file cdef tagFileInfo info + cdef tagEntry c_entry + cdef object current_id + cdef object encoding + cdef str encoding_errors - def __cinit__(self, filepath): - self.open(filepath) + def __cinit__(self, filepath, encoding='utf8', encoding_errors='strict'): + if isinstance(filepath, unicode): + filepath = (filepath).encode(sys.getfilesystemencoding()) + self.file = ctagsOpen(filepath, &self.info) + if not self.file: + raise OSError(self.info.status.error_number, + strerror(self.info.status.error_number), + filepath) + self.encoding = encoding + self.encoding_errors = encoding_errors + + cdef decode(self, bytes bytes_array): + if not self.encoding: + return bytes_array + return bytes_array.decode(self.encoding, self.encoding_errors) def __dealloc__(self): - if self.file: ctagsClose(self.file) def __getitem__(self, key): - if key == 'opened': - return self.info.status.opened - if key == 'error_number': - return self.info.status.error_number + ret = None if key == 'format': return self.info.file.format - if key == 'sort': + elif key == 'sort': return self.info.file.sort - if key == 'author': - if self.info.program.author == NULL: - return '' - return self.info.program.author - if key == 'name': - if self.info.program.name == NULL: - return '' - return self.info.program.name - if key == 'url': - if self.info.program.url == NULL: - return '' - return self.info.program.url - if key == 'version': - if self.info.program.version == NULL: - return '' - return self.info.program.version - - - def open(self, filepath): - self.file = ctagsOpen(filepath, &self.info) - - if not self.info.status.opened: - raise Exception('Invalid tag file') + else: + if key == 'author': + ret = self.info.program.author + elif key == 'name': + ret = self.info.program.name + elif key == 'url': + ret = self.info.program.url + elif key == 'version': + ret = self.info.program.version + if ret is None: + raise KeyError(key) + return self.decode(ret) def setSortType(self, tagSortType type): - return ctagsSetSortType(self.file, type) - - def first(self, TagEntry entry): - return ctagsFirst(self.file, &entry.c_entry) - - def find(self, TagEntry entry, char* name, int options): - return ctagsFind(self.file, &entry.c_entry, name, options) - - def findNext(self, TagEntry entry): - return ctagsFindNext(self.file, &entry.c_entry) - - def next(self, TagEntry entry): - return ctagsNext(self.file, &entry.c_entry) + success = ctagsSetSortType(self.file, type) + if not success: + raise RuntimeError() + + cdef create_tagEntry(self, const tagEntry* const c_entry): + cdef dict ret = {} + ret['name'] = self.decode(c_entry.name) + ret['file'] = self.decode(c_entry.file) + ret['fileScope'] = c_entry.fileScope + if c_entry.address.pattern != NULL: + ret['pattern'] = self.decode(c_entry.address.pattern) + if c_entry.address.lineNumber: + ret['lineNumber'] = c_entry.address.lineNumber + if c_entry.kind != NULL: + ret['kind'] = self.decode(c_entry.kind) + for index in range(c_entry.fields.count): + key = c_entry.fields.list[index].key + ret[key.decode()] = self.decode(c_entry.fields.list[index].value) + return ret + + cdef first(self): + success = ctagsFirst(self.file, &self.c_entry) + if not success: + raise RuntimeError() + return self.create_tagEntry(&self.c_entry) + + cdef find(self, bytes name, int options): + success = ctagsFind(self.file, &self.c_entry, name, options) + if not success: + raise RuntimeError() + return self.create_tagEntry(&self.c_entry) + + cdef findNext(self): + success = ctagsFindNext(self.file, &self.c_entry) + if not success: + raise RuntimeError() + return self.create_tagEntry(&self.c_entry) + + cdef next(self): + success = ctagsNext(self.file, &self.c_entry) + if not success: + raise RuntimeError() + return self.create_tagEntry(&self.c_entry) + + def find_tags(self, name, int options): + """ Find tags corresponding to name in the tag file. + @name : a bytes array to search to. + @options : A option flags for the search. + @return : A iterator on all tags corresponding to the search. + + WARNING: Only one iterator can run on a tag file. + If you use another iterator (by calling all_tags or find_tags), + any previous iterator will be invalidated and raise a RuntimeError. + """ + if isinstance(name, unicode): + if self.encoding is None: + raise ValueError("%r is a unicode string and you do not provide" + "a encoding"%name) + name = (name).encode(self.encoding) + try: + first = self.find(name, options) + self.current_id = first + yield first + except RuntimeError: + raise StopIteration from None + + while True: + if self.current_id is not first: + raise RuntimeError("Only one search/list generator at a time") + try: + other = self.findNext() + except RuntimeError: + raise StopIteration from None + else: + yield other + + def all_tags(self): + """ List all tags in the tag file. + @return : A iterator on all tags in the file. + + WARNING: Only one iterator can run on a tag file. + If you use another iterator (by calling all_tags or find_tags), + any previous iterator will be invalidated and raise a RuntimeError. + """ + try: + first = self.first() + self.current_id = first + yield first + except RuntimeError: + raise StopIteration from None + + while True: + if self.current_id is not first: + raise RuntimeError("Only one search/list generator at a time") + try: + other = self.next() + except RuntimeError: + raise StopIteration from None + else: + yield other diff --git a/src/ctags/__init__.py b/src/ctags/__init__.py index 2a9ef36..9bf7b87 100644 --- a/src/ctags/__init__.py +++ b/src/ctags/__init__.py @@ -20,8 +20,8 @@ """ -from ._readtags import TagEntry, CTags -__all__ = ['TagEntry', 'CTags'] +from ._readtags import CTags +__all__ = ['CTags'] # sortType TAG_UNSORTED=0 diff --git a/src/readtags.pxi b/src/readtags.pxi index a674b7b..b6f91f0 100644 --- a/src/readtags.pxi +++ b/src/readtags.pxi @@ -36,10 +36,10 @@ cdef extern from "readtags.h": int error_number ctypedef struct programType "program": - char *author - char *name - char *url - char *version + const char *author + const char *name + const char *url + const char *version ctypedef struct tagFileInfo: statusType status @@ -48,11 +48,11 @@ cdef extern from "readtags.h": ctypedef struct tagExtensionField: - char* key - char* value + const char* key + const char* value ctypedef struct addressType "address": - char* pattern + const char* pattern unsigned long lineNumber ctypedef struct fieldsType: @@ -60,12 +60,12 @@ cdef extern from "readtags.h": tagExtensionField *list ctypedef struct tagEntry: - char* name - char* file + const char* name + const char* file addressType address - char* kind + const char* kind short fileScope fieldsType fields @@ -75,12 +75,12 @@ cdef extern from "readtags.h": TagSuccess - tagFile* ctagsOpen "tagsOpen" (char *filePath, tagFileInfo *info) - tagResult ctagsSetSortType "tagsSetSortType" (tagFile* file, tagSortType type) - tagResult ctagsFirst "tagsFirst" (tagFile *file, tagEntry *entry) -#C++: char *ctagsField "tagsField" (tagEntry *entry, char *key) except +MemoryError - char *ctagsField "tagsField" (tagEntry *entry, char *key) - tagResult ctagsFind "tagsFind" (tagFile *file, tagEntry *entry, char *name, int options) - tagResult ctagsNext "tagsNext" (tagFile *file, tagEntry *entry) + tagFile* ctagsOpen "tagsOpen" (const char *const filePath, tagFileInfo *const info) + tagResult ctagsSetSortType "tagsSetSortType" (tagFile *const file, const tagSortType type) + tagResult ctagsFirst "tagsFirst" (tagFile *const file, tagEntry *const entry) +#C++: const char *ctagsField "tagsField" (const tagEntry *const entry, const char *const key) except +MemoryError + const char *ctagsField "tagsField" (const tagEntry *const entry, const char *const key) + tagResult ctagsFind "tagsFind" (tagFile *const file, tagEntry *const entry, const char *const name, const int options) + tagResult ctagsNext "tagsNext" (tagFile *const file, tagEntry *const entry) tagResult ctagsFindNext "tagsFindNext" (tagFile *file, tagEntry *entry) - tagResult ctagsClose "tagsClose" (tagFile *file) + tagResult ctagsClose "tagsClose" (tagFile *const file) diff --git a/tests/test_ctags.py b/tests/test_ctags.py index c90a6e1..0348d8e 100644 --- a/tests/test_ctags.py +++ b/tests/test_ctags.py @@ -7,30 +7,100 @@ from unittest import TestCase import ctags +class TestCTagsOpen(TestCase): + def setUp(self): + self.file_path = os.path.join(src_dir, 'examples', 'tags') + + def test_open_str(self): + ctags.CTags(self.file_path) + + def test_open_bytes(self): + ctags.CTags(self.file_path.encode(sys.getfilesystemencoding())) + class TestCTagsParse(TestCase): def setUp(self): file_path = os.path.join(src_dir, 'examples', 'tags') - self.ctags = ctags.CTags(file_path.encode(sys.getfilesystemencoding())) + self.ctags = ctags.CTags(file_path) + def test_tag_entry(self): + self.ctags.setSortType(ctags.TAG_SORTED) + entry = next(self.ctags.all_tags()) + entry_info = [entry[_] + for _ in ('file', 'name', 'pattern', 'kind', 'language') + ] + self.assertEqual( + entry_info, + ['../_readtags.c', 'DL_EXPORT', '10', 'macro', 'C'] + ) + def test_tag_find(self): + self.ctags.setSortType(ctags.TAG_SORTED) + entry = next(self.ctags.find_tags('find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE)) + entry_info = [entry[_] + for _ in ('file', 'name', 'pattern', 'kind', 'language') + ] + self.assertEqual( + entry_info, + ['../readtags.c', 'find', '/^static tagResult find (tagFile ' + '*const file, tagEntry *const entry,$/', 'function', 'C'] + ) + + def test_tag_find_partial_nocase(self): + for entry in self.ctags.find_tags('tag', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE): + self.assertTrue(entry['name'].lower().startswith('tag')) + + def test_tag_find_nocase(self): + for entry in self.ctags.find_tags('tag', ctags.TAG_IGNORECASE): + self.assertEqual(entry['name'].lower(), 'tag') + + def test_tag_find_partial(self): + for entry in self.ctags.find_tags('tag', ctags.TAG_PARTIALMATCH): + self.assertTrue(entry['name'].startswith('tag')) + + def test_tag_find_noflag(self): + for entry in self.ctags.find_tags('tag', 0): + self.assertEqual(entry['name'], 'tag') + + def test_tag_find_bytes(self): + for entry in self.ctags.find_tags(b'tag', 0): + self.assertEqual(entry['name'], 'tag') + +class TestCTagsParseNoEncoding(TestCase): + def setUp(self): + file_path = os.path.join(src_dir, 'examples', 'tags') + self.ctags = ctags.CTags(file_path, encoding=None) def test_tag_entry(self): - entry = ctags.TagEntry() self.ctags.setSortType(ctags.TAG_SORTED) - self.ctags.first(entry) + entry = next(self.ctags.all_tags()) entry_info = [entry[_] - for _ in ('file', 'name', 'pattern', 'kind', b'language') + for _ in ('file', 'name', 'pattern', 'kind', 'language') ] self.assertEqual( entry_info, [b'../_readtags.c', b'DL_EXPORT', b'10', b'macro', b'C'] ) def test_tag_find(self): - entry = ctags.TagEntry() self.ctags.setSortType(ctags.TAG_SORTED) - self.ctags.find(entry, b'find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE) + entry = next(self.ctags.find_tags(b'find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE)) entry_info = [entry[_] - for _ in ('file', 'name', 'pattern', 'kind', b'language') + for _ in ('file', 'name', 'pattern', 'kind', 'language') ] self.assertEqual( entry_info, [b'../readtags.c', b'find', b'/^static tagResult find (tagFile ' b'*const file, tagEntry *const entry,$/', b'function', b'C'] ) + + def test_tag_find_partial_nocase(self): + for entry in self.ctags.find_tags(b'tag', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE): + self.assertTrue(entry['name'].lower().startswith(b'tag')) + + def test_tag_find_nocase(self): + for entry in self.ctags.find_tags(b'tag', ctags.TAG_IGNORECASE): + self.assertEqual(entry['name'].lower(), b'tag') + + def test_tag_find_partial(self): + for entry in self.ctags.find_tags(b'tag', ctags.TAG_PARTIALMATCH): + self.assertTrue(entry['name'].startswith(b'tag')) + + def test_tag_find_noflag(self): + for entry in self.ctags.find_tags(b'tag', 0): + self.assertEqual(entry['name'], b'tag')