From 5bfff57ad688390fefd0c51edda406e035fa9afc Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Sun, 13 Sep 2020 17:48:22 +0530 Subject: [PATCH 1/7] Added support for XML parsing, because attributes of xml tags are case-sensitive. --- pynliner/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pynliner/__init__.py b/pynliner/__init__.py index 873d0a8..bc3cec7 100644 --- a/pynliner/__init__.py +++ b/pynliner/__init__.py @@ -64,7 +64,7 @@ class Pynliner(object): output = False def __init__(self, log=None, allow_conditional_comments=False, - preserve_entities=True): + preserve_entities=True, is_xml=False): self.log = log cssutils.log.enabled = False if log is None else True self.extra_style_strings = [] @@ -73,6 +73,7 @@ def __init__(self, log=None, allow_conditional_comments=False, self.root_url = None self.relative_url = None self._substitutions = None + self.is_xml = is_xml def from_url(self, url): """Gets remote HTML page for conversion @@ -179,9 +180,14 @@ def _unsubstitute_output(self): def _get_soup(self): """Convert source string to BeautifulSoup object. Sets it to self.soup. + If parsing xml (i.e. self.is_xml = True), then use xml parser. + If using mod_wgsi, use html5 parsing to prevent BeautifulSoup incompatibility. """ + if self.is_xml: + self.soup = BeautifulSoup(self.source_string, "xml") + return # Check if mod_wsgi is running # - see http://code.google.com/p/modwsgi/wiki/TipsAndTricks try: From 9655bda623e8802bc9de6dbaba1f9f15bd0b9ebc Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Sun, 13 Sep 2020 18:29:28 +0530 Subject: [PATCH 2/7] Changed distro for python 2.6 and 3.3 to "trusty" as there are no packages for them in xenial. --- .travis.yml | 18 ++++++++++++------ .vscode/settings.json | 3 +++ 2 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.travis.yml b/.travis.yml index ba28818..1e316b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,15 @@ language: python -python: - - "2.6" - - "2.7" - - "3.3" - - "3.4" - - "3.5" +jobs: + include: + - python: 2.6 + dist: trusty + - python: 2.7 + dist: xenial + - python: 3.3 + dist: trusty + - python: 3.4 + dist: xenial + - python: 3.5 + dist: xenial # command to run tests script: python setup.py test diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1b70af9 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "/home/akash/anaconda3/envs/test/bin/python" +} \ No newline at end of file From 9f829b8cde2b01044df3741918a4e74a621d3012 Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Sun, 13 Sep 2020 18:35:11 +0530 Subject: [PATCH 3/7] Ignored VScode files --- .gitignore | 1 + .vscode/settings.json | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index d511880..b21ba81 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ /venv /dist /docs/html +.vscode/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 1b70af9..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "python.pythonPath": "/home/akash/anaconda3/envs/test/bin/python" -} \ No newline at end of file From b0c778eefd8faba903498f9f7966f9a9edf22fcc Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Sun, 13 Sep 2020 18:40:33 +0530 Subject: [PATCH 4/7] BeautifulSoup4 not supported on python 2.6 and 3.3. --- .travis.yml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1e316b2..54ab201 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,8 @@ language: python -jobs: - include: - - python: 2.6 - dist: trusty - - python: 2.7 - dist: xenial - - python: 3.3 - dist: trusty - - python: 3.4 - dist: xenial - - python: 3.5 - dist: xenial +python: + - "2.7" + - "3.4" + - "3.5" + - "3.7" # command to run tests script: python setup.py test From d7b047c42a83f8e2fb77ad958c780af37989066a Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Sun, 13 Sep 2020 19:17:36 +0530 Subject: [PATCH 5/7] Added 'lxml' to required packages list. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 70bc3ac..bf41ad6 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ install_requires=[ 'BeautifulSoup4 >= 4.4.1', 'cssutils >=0.9.7', + 'lxml', ], tests_require=[ 'mock' From 38502a5841b7e3262f91fba27cb56ce94e4a7d7f Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Sun, 13 Sep 2020 19:23:31 +0530 Subject: [PATCH 6/7] Lastest version of lxml requires Python 2.7, 3.5 or later. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 54ab201..4c8ccda 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,8 @@ language: python python: - "2.7" - - "3.4" - "3.5" + - "3.6" - "3.7" # command to run tests script: python setup.py test From 904b9b50a8cbb8f40c1e0863545e39ef6150b282 Mon Sep 17 00:00:00 2001 From: Akash Karnatak Date: Mon, 14 Sep 2020 17:26:46 +0530 Subject: [PATCH 7/7] Encoded the input string of BeautifulSoup to automatically handle different xml formats For more info see: https://stackoverflow.com/a/54164665/11701676 --- pynliner/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pynliner/__init__.py b/pynliner/__init__.py index bc3cec7..2c40193 100644 --- a/pynliner/__init__.py +++ b/pynliner/__init__.py @@ -186,7 +186,7 @@ def _get_soup(self): incompatibility. """ if self.is_xml: - self.soup = BeautifulSoup(self.source_string, "xml") + self.soup = BeautifulSoup(self.source_string.encode('utf-8'), "xml") return # Check if mod_wsgi is running # - see http://code.google.com/p/modwsgi/wiki/TipsAndTricks