diff --git a/README.md b/README.md
index 13a481d..b82afb5 100644
--- a/README.md
+++ b/README.md
@@ -35,75 +35,106 @@ ctags --fields=afmikKlnsStz readtags.c readtags.h
**Opening Tags File**
```python
import ctags
-from ctags import CTags, TagEntry
+from ctags import CTags
import sys
try:
tagFile = CTags('tags')
-except:
+except OSError as err:
+ print(err)
sys.exit(1)
# Available file information keys:
# opened - was the tag file successfully opened?
# error_number - errno value when 'opened' is false
# format - format of tag file (1 = original, 2 = extended)
-# sort - how is the tag file sorted?
-# author - name of author of generating program (may be empy string)
-# name - name of program (may be empy string)
-# url - URL of distribution (may be empy string)
-# version - program version (may be empty string)
+# sort - how is the tag file sorted?
+#
+# Other keys may be available:
+# author - name of author of generating program
+# name - name of program
+# url - URL of distribution
+# version - program version
+# If one of them is not present a KeyError is raised.
-print tagFile['name']
-print tagFile['author']
-print tagFile['format']
+try:
+ print(tagFile['name'])
+except KeyError:
+ print("No 'name' in the tagfile")
+
+try:
+ print(tagFile['author'])
+except KeyError:
+ print("No 'author' in the tagfile")
+
+print(tagFile['format'])
# Available sort type:
# TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED
# Note: use this only if you know how the tags file is sorted which is
# specified when you generate the tag file
-status = tagFile.setSortType(ctags.TAG_SORTED)
+tagFile.setSortType(ctags.TAG_SORTED)
```
-**Obtaining First Tag Entry**
+**Listing Tag Entries**
```python
-entry = TagEntry()
-status = tagFile.first(entry)
-
-if status:
- # Available TagEntry keys:
- # name - name of tag
- # file - path of source file containing definition of tag
- # pattern - pattern for locating source line (None if no pattern)
- # lineNumber - line number in source file of tag definition (may be zero if not known)
- # kind - kind of tag (none if not known)
- # fileScope - is tag of file-limited scope?
-
- # Note: other keys will be assumed as an extension key and will
- # return None if no such key is found
-
- print entry['name']
- print entry['kind']
+# A generator of all tags in the file can be obtain with:
+all_tags = tagFile.all_tags()
+
+# The generator yield a dict for each entry.
+# The following keys are always available for a entry:
+# name - name of tag
+# file - path of source file containing definition of tag
+# pattern - pattern for locating source line
+# (None if no pattern, this should no huppen with a correct
+# tag file)
+# fileScope - is tag of file-limited scope?
+#
+# The dict may contain other keys (extension keys).
+# Other keys include :
+# lineNumber - line number in source file of tag definition
+# kind - kind of tag
+
+for entry in all_tags:
+ print(entry['name'])
+ print(entry['file'])
+ try:
+ entry['lineNumber']
+ except KeyError:
+ print("Entry has no lineNumber")
+ else:
+ print("Entry has a lineNumber")
```
-**Finding a Tag Entry**
-```python
+**Finding Tag Entries**
+```python
# Available options:
# TAG_PARTIALMATCH - begin with
# TAG_FULLMATCH - full length matching
# TAG_IGNORECASE - disable binary search
# TAG_OBSERVECASE - case sensitive and allowed binary search to perform
-if tagFile.find(entry, 'find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE):
- print 'found'
- print entry['lineNumber']
- print entry['pattern']
- print entry['kind']
+found_tags = tagFile.find_tags('find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE)
+for entry in found_tags:
+ print(entry['lineNumber'])
+ print(entry['pattern'])
+ print(entry['kind'])
-# Find the next tag matching the name and options supplied to the
-# most recent call to tagFile.find(). (replace the entry if found)
-status = tagFile.findNext(entry)
-# Step to the next tag in the file (replace entry if found)
-status = tagFile.next(entry)
+# File Encoding.
+
+By default, CTags return unicode strings using the 'utf8' encoding.
+This can be changed by providing a custom encoding at CTags creation :
+
+```python
+tagFile = CTags('tags', encoding='latin1')
```
+
+If None is provided as encoding, no encoding is done and entries will contain
+bytes instead of string.
+
+This is also possible to provide a encoding_errors.
+It will be passed to the encode function as the 'errors' argument.
+See the definition of the encode function to know how to use this argument.
+By default, encoding_errors is 'strict'.
diff --git a/src/_readtags.pyx b/src/_readtags.pyx
index 73d7e0b..d99ff58 100644
--- a/src/_readtags.pyx
+++ b/src/_readtags.pyx
@@ -17,130 +17,161 @@ You should have received a copy of the GNU General Public License
along with Python-Ctags. If not, see .
"""
+cdef extern from "string.h":
+ char* strerror(int errnum)
include "stdlib.pxi"
include "readtags.pxi"
+import sys
-cdef class TagEntry:
- cdef tagEntry c_entry
-
- def __cinit__(self):
- self.c_entry.fields.count = 0
- self.c_entry.fields.list = NULL
-
-
- def __setitem__(self, key, item):
- if key == 'name':
- self.c_entry.name = item
- elif key == 'file':
- self.c_entry.file = item
- elif key == 'pattern':
- self.c_entry.address.pattern = item
- elif key == 'lineNumber':
- self.c_entry.address.lineNumber = item
- elif key == 'kind':
- self.c_entry.kind = item
- elif key == 'fileScope':
- self.c_entry.fileScope = item
- elif key == 'fields':
- # fields.list is allocated by readtags.c
- if self.c_entry.fields.count != len(item):
- return
-
- fields = item
- if self.c_entry.fields.list != NULL:
- free(self.c_entry.fields.list)
- self.c_entry.fields.list = NULL
-
- for k, v in fields.iteritems():
- self.c_entry.fields.list.key = k
- self.c_entry.fields.list.value = v
-
- def __getitem__(self, key):
- cdef char* result
- if key == 'name':
- return self.c_entry.name
- elif key == 'file':
- return self.c_entry.file
- elif key == 'pattern':
- if self.c_entry.address.pattern == NULL:
- return None
- return self.c_entry.address.pattern
- elif key == 'lineNumber':
- return self.c_entry.address.lineNumber
- elif key == 'kind':
- if self.c_entry.kind == NULL:
- return None
- return self.c_entry.kind
- elif key == 'fileScope':
- return self.c_entry.fileScope
- else:
- # It will crash if we mix NULL/0/None
- # don't mix comparison of type
- result = ctagsField(&self.c_entry, key)
- if result == NULL:
- return None
-
- return result
cdef class CTags:
cdef tagFile* file
cdef tagFileInfo info
+ cdef tagEntry c_entry
+ cdef object current_id
+ cdef object encoding
+ cdef str encoding_errors
- def __cinit__(self, filepath):
- self.open(filepath)
+ def __cinit__(self, filepath, encoding='utf8', encoding_errors='strict'):
+ if isinstance(filepath, unicode):
+ filepath = (filepath).encode(sys.getfilesystemencoding())
+ self.file = ctagsOpen(filepath, &self.info)
+ if not self.file:
+ raise OSError(self.info.status.error_number,
+ strerror(self.info.status.error_number),
+ filepath)
+ self.encoding = encoding
+ self.encoding_errors = encoding_errors
+
+ cdef decode(self, bytes bytes_array):
+ if not self.encoding:
+ return bytes_array
+ return bytes_array.decode(self.encoding, self.encoding_errors)
def __dealloc__(self):
-
if self.file:
ctagsClose(self.file)
def __getitem__(self, key):
- if key == 'opened':
- return self.info.status.opened
- if key == 'error_number':
- return self.info.status.error_number
+ ret = None
if key == 'format':
return self.info.file.format
- if key == 'sort':
+ elif key == 'sort':
return self.info.file.sort
- if key == 'author':
- if self.info.program.author == NULL:
- return ''
- return self.info.program.author
- if key == 'name':
- if self.info.program.name == NULL:
- return ''
- return self.info.program.name
- if key == 'url':
- if self.info.program.url == NULL:
- return ''
- return self.info.program.url
- if key == 'version':
- if self.info.program.version == NULL:
- return ''
- return self.info.program.version
-
-
- def open(self, filepath):
- self.file = ctagsOpen(filepath, &self.info)
-
- if not self.info.status.opened:
- raise Exception('Invalid tag file')
+ else:
+ if key == 'author':
+ ret = self.info.program.author
+ elif key == 'name':
+ ret = self.info.program.name
+ elif key == 'url':
+ ret = self.info.program.url
+ elif key == 'version':
+ ret = self.info.program.version
+ if ret is None:
+ raise KeyError(key)
+ return self.decode(ret)
def setSortType(self, tagSortType type):
- return ctagsSetSortType(self.file, type)
-
- def first(self, TagEntry entry):
- return ctagsFirst(self.file, &entry.c_entry)
-
- def find(self, TagEntry entry, char* name, int options):
- return ctagsFind(self.file, &entry.c_entry, name, options)
-
- def findNext(self, TagEntry entry):
- return ctagsFindNext(self.file, &entry.c_entry)
-
- def next(self, TagEntry entry):
- return ctagsNext(self.file, &entry.c_entry)
+ success = ctagsSetSortType(self.file, type)
+ if not success:
+ raise RuntimeError()
+
+ cdef create_tagEntry(self, const tagEntry* const c_entry):
+ cdef dict ret = {}
+ ret['name'] = self.decode(c_entry.name)
+ ret['file'] = self.decode(c_entry.file)
+ ret['fileScope'] = c_entry.fileScope
+ if c_entry.address.pattern != NULL:
+ ret['pattern'] = self.decode(c_entry.address.pattern)
+ if c_entry.address.lineNumber:
+ ret['lineNumber'] = c_entry.address.lineNumber
+ if c_entry.kind != NULL:
+ ret['kind'] = self.decode(c_entry.kind)
+ for index in range(c_entry.fields.count):
+ key = c_entry.fields.list[index].key
+ ret[key.decode()] = self.decode(c_entry.fields.list[index].value)
+ return ret
+
+ cdef first(self):
+ success = ctagsFirst(self.file, &self.c_entry)
+ if not success:
+ raise RuntimeError()
+ return self.create_tagEntry(&self.c_entry)
+
+ cdef find(self, bytes name, int options):
+ success = ctagsFind(self.file, &self.c_entry, name, options)
+ if not success:
+ raise RuntimeError()
+ return self.create_tagEntry(&self.c_entry)
+
+ cdef findNext(self):
+ success = ctagsFindNext(self.file, &self.c_entry)
+ if not success:
+ raise RuntimeError()
+ return self.create_tagEntry(&self.c_entry)
+
+ cdef next(self):
+ success = ctagsNext(self.file, &self.c_entry)
+ if not success:
+ raise RuntimeError()
+ return self.create_tagEntry(&self.c_entry)
+
+ def find_tags(self, name, int options):
+ """ Find tags corresponding to name in the tag file.
+ @name : a bytes array to search to.
+ @options : A option flags for the search.
+ @return : A iterator on all tags corresponding to the search.
+
+ WARNING: Only one iterator can run on a tag file.
+ If you use another iterator (by calling all_tags or find_tags),
+ any previous iterator will be invalidated and raise a RuntimeError.
+ """
+ if isinstance(name, unicode):
+ if self.encoding is None:
+ raise ValueError("%r is a unicode string and you do not provide"
+ "a encoding"%name)
+ name = (name).encode(self.encoding)
+ try:
+ first = self.find(name, options)
+ self.current_id = first
+ yield first
+ except RuntimeError:
+ raise StopIteration from None
+
+ while True:
+ if self.current_id is not first:
+ raise RuntimeError("Only one search/list generator at a time")
+ try:
+ other = self.findNext()
+ except RuntimeError:
+ raise StopIteration from None
+ else:
+ yield other
+
+ def all_tags(self):
+ """ List all tags in the tag file.
+ @return : A iterator on all tags in the file.
+
+ WARNING: Only one iterator can run on a tag file.
+ If you use another iterator (by calling all_tags or find_tags),
+ any previous iterator will be invalidated and raise a RuntimeError.
+ """
+ try:
+ first = self.first()
+ self.current_id = first
+ yield first
+ except RuntimeError:
+ raise StopIteration from None
+
+ while True:
+ if self.current_id is not first:
+ raise RuntimeError("Only one search/list generator at a time")
+ try:
+ other = self.next()
+ except RuntimeError:
+ raise StopIteration from None
+ else:
+ yield other
diff --git a/src/ctags/__init__.py b/src/ctags/__init__.py
index 2a9ef36..9bf7b87 100644
--- a/src/ctags/__init__.py
+++ b/src/ctags/__init__.py
@@ -20,8 +20,8 @@
"""
-from ._readtags import TagEntry, CTags
-__all__ = ['TagEntry', 'CTags']
+from ._readtags import CTags
+__all__ = ['CTags']
# sortType
TAG_UNSORTED=0
diff --git a/src/readtags.pxi b/src/readtags.pxi
index a674b7b..b6f91f0 100644
--- a/src/readtags.pxi
+++ b/src/readtags.pxi
@@ -36,10 +36,10 @@ cdef extern from "readtags.h":
int error_number
ctypedef struct programType "program":
- char *author
- char *name
- char *url
- char *version
+ const char *author
+ const char *name
+ const char *url
+ const char *version
ctypedef struct tagFileInfo:
statusType status
@@ -48,11 +48,11 @@ cdef extern from "readtags.h":
ctypedef struct tagExtensionField:
- char* key
- char* value
+ const char* key
+ const char* value
ctypedef struct addressType "address":
- char* pattern
+ const char* pattern
unsigned long lineNumber
ctypedef struct fieldsType:
@@ -60,12 +60,12 @@ cdef extern from "readtags.h":
tagExtensionField *list
ctypedef struct tagEntry:
- char* name
- char* file
+ const char* name
+ const char* file
addressType address
- char* kind
+ const char* kind
short fileScope
fieldsType fields
@@ -75,12 +75,12 @@ cdef extern from "readtags.h":
TagSuccess
- tagFile* ctagsOpen "tagsOpen" (char *filePath, tagFileInfo *info)
- tagResult ctagsSetSortType "tagsSetSortType" (tagFile* file, tagSortType type)
- tagResult ctagsFirst "tagsFirst" (tagFile *file, tagEntry *entry)
-#C++: char *ctagsField "tagsField" (tagEntry *entry, char *key) except +MemoryError
- char *ctagsField "tagsField" (tagEntry *entry, char *key)
- tagResult ctagsFind "tagsFind" (tagFile *file, tagEntry *entry, char *name, int options)
- tagResult ctagsNext "tagsNext" (tagFile *file, tagEntry *entry)
+ tagFile* ctagsOpen "tagsOpen" (const char *const filePath, tagFileInfo *const info)
+ tagResult ctagsSetSortType "tagsSetSortType" (tagFile *const file, const tagSortType type)
+ tagResult ctagsFirst "tagsFirst" (tagFile *const file, tagEntry *const entry)
+#C++: const char *ctagsField "tagsField" (const tagEntry *const entry, const char *const key) except +MemoryError
+ const char *ctagsField "tagsField" (const tagEntry *const entry, const char *const key)
+ tagResult ctagsFind "tagsFind" (tagFile *const file, tagEntry *const entry, const char *const name, const int options)
+ tagResult ctagsNext "tagsNext" (tagFile *const file, tagEntry *const entry)
tagResult ctagsFindNext "tagsFindNext" (tagFile *file, tagEntry *entry)
- tagResult ctagsClose "tagsClose" (tagFile *file)
+ tagResult ctagsClose "tagsClose" (tagFile *const file)
diff --git a/tests/test_ctags.py b/tests/test_ctags.py
index c90a6e1..0348d8e 100644
--- a/tests/test_ctags.py
+++ b/tests/test_ctags.py
@@ -7,30 +7,100 @@
from unittest import TestCase
import ctags
+class TestCTagsOpen(TestCase):
+ def setUp(self):
+ self.file_path = os.path.join(src_dir, 'examples', 'tags')
+
+ def test_open_str(self):
+ ctags.CTags(self.file_path)
+
+ def test_open_bytes(self):
+ ctags.CTags(self.file_path.encode(sys.getfilesystemencoding()))
+
class TestCTagsParse(TestCase):
def setUp(self):
file_path = os.path.join(src_dir, 'examples', 'tags')
- self.ctags = ctags.CTags(file_path.encode(sys.getfilesystemencoding()))
+ self.ctags = ctags.CTags(file_path)
+ def test_tag_entry(self):
+ self.ctags.setSortType(ctags.TAG_SORTED)
+ entry = next(self.ctags.all_tags())
+ entry_info = [entry[_]
+ for _ in ('file', 'name', 'pattern', 'kind', 'language')
+ ]
+ self.assertEqual(
+ entry_info,
+ ['../_readtags.c', 'DL_EXPORT', '10', 'macro', 'C']
+ )
+ def test_tag_find(self):
+ self.ctags.setSortType(ctags.TAG_SORTED)
+ entry = next(self.ctags.find_tags('find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE))
+ entry_info = [entry[_]
+ for _ in ('file', 'name', 'pattern', 'kind', 'language')
+ ]
+ self.assertEqual(
+ entry_info,
+ ['../readtags.c', 'find', '/^static tagResult find (tagFile '
+ '*const file, tagEntry *const entry,$/', 'function', 'C']
+ )
+
+ def test_tag_find_partial_nocase(self):
+ for entry in self.ctags.find_tags('tag', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE):
+ self.assertTrue(entry['name'].lower().startswith('tag'))
+
+ def test_tag_find_nocase(self):
+ for entry in self.ctags.find_tags('tag', ctags.TAG_IGNORECASE):
+ self.assertEqual(entry['name'].lower(), 'tag')
+
+ def test_tag_find_partial(self):
+ for entry in self.ctags.find_tags('tag', ctags.TAG_PARTIALMATCH):
+ self.assertTrue(entry['name'].startswith('tag'))
+
+ def test_tag_find_noflag(self):
+ for entry in self.ctags.find_tags('tag', 0):
+ self.assertEqual(entry['name'], 'tag')
+
+ def test_tag_find_bytes(self):
+ for entry in self.ctags.find_tags(b'tag', 0):
+ self.assertEqual(entry['name'], 'tag')
+
+class TestCTagsParseNoEncoding(TestCase):
+ def setUp(self):
+ file_path = os.path.join(src_dir, 'examples', 'tags')
+ self.ctags = ctags.CTags(file_path, encoding=None)
def test_tag_entry(self):
- entry = ctags.TagEntry()
self.ctags.setSortType(ctags.TAG_SORTED)
- self.ctags.first(entry)
+ entry = next(self.ctags.all_tags())
entry_info = [entry[_]
- for _ in ('file', 'name', 'pattern', 'kind', b'language')
+ for _ in ('file', 'name', 'pattern', 'kind', 'language')
]
self.assertEqual(
entry_info,
[b'../_readtags.c', b'DL_EXPORT', b'10', b'macro', b'C']
)
def test_tag_find(self):
- entry = ctags.TagEntry()
self.ctags.setSortType(ctags.TAG_SORTED)
- self.ctags.find(entry, b'find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE)
+ entry = next(self.ctags.find_tags(b'find', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE))
entry_info = [entry[_]
- for _ in ('file', 'name', 'pattern', 'kind', b'language')
+ for _ in ('file', 'name', 'pattern', 'kind', 'language')
]
self.assertEqual(
entry_info,
[b'../readtags.c', b'find', b'/^static tagResult find (tagFile '
b'*const file, tagEntry *const entry,$/', b'function', b'C']
)
+
+ def test_tag_find_partial_nocase(self):
+ for entry in self.ctags.find_tags(b'tag', ctags.TAG_PARTIALMATCH | ctags.TAG_IGNORECASE):
+ self.assertTrue(entry['name'].lower().startswith(b'tag'))
+
+ def test_tag_find_nocase(self):
+ for entry in self.ctags.find_tags(b'tag', ctags.TAG_IGNORECASE):
+ self.assertEqual(entry['name'].lower(), b'tag')
+
+ def test_tag_find_partial(self):
+ for entry in self.ctags.find_tags(b'tag', ctags.TAG_PARTIALMATCH):
+ self.assertTrue(entry['name'].startswith(b'tag'))
+
+ def test_tag_find_noflag(self):
+ for entry in self.ctags.find_tags(b'tag', 0):
+ self.assertEqual(entry['name'], b'tag')