Skip to content

Commit c687259

Browse files
authored
Merge pull request #5189 from StackStorm/url_path_unicode_fix
Correctly handle unicode characters in the URL path names and the CLI arguments (sys.argv)
2 parents 50787d5 + c0ed21b commit c687259

File tree

10 files changed

+260
-3
lines changed

10 files changed

+260
-3
lines changed

CHANGELOG.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,20 @@ Added
2828
* Added the command line utility `st2-validate-pack` that can be used by pack developers to
2929
validate pack contents. (improvement)
3030

31+
* Fix a bug in the API and CLI code which would prevent users from being able to retrieve resources
32+
which contain non-ascii (utf-8) characters in the names / references. (bug fix) #5189
33+
34+
Contributed by @Kami.
35+
36+
* Fix a bug in the API router code and make sure we return correct and user-friendly error to the
37+
user in case we fail to parse the request URL / path because it contains invalid or incorrectly
38+
URL encoded data.
39+
40+
Previously such errors weren't handled correctly which meant original exception with a stack
41+
trace got propagated to the user. (bug fix) #5189
42+
43+
Contributed by @Kami.
44+
3145
Changed
3246
~~~~~~~
3347

st2api/tests/unit/controllers/v1/test_actions.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import os
1818
import os.path
1919
import copy
20+
import urllib
2021

2122
try:
2223
import simplejson as json
@@ -277,6 +278,22 @@
277278
}
278279

279280

281+
ACTION_WITH_UNICODE_NAME = {
282+
"name": "st2.dummy.action_unicode_我爱狗",
283+
"description": "test description",
284+
"enabled": True,
285+
"pack": "dummy_pack_1",
286+
"entry_point": "/tmp/test/action1.sh",
287+
"runner_type": "local-shell-script",
288+
"parameters": {
289+
"a": {"type": "string", "default": "A1"},
290+
"b": {"type": "string", "default": "B1"},
291+
"sudo": {"default": True, "immutable": True},
292+
},
293+
"notify": {"on-complete": {"message": "Woohoo! I completed!!!"}},
294+
}
295+
296+
280297
class ActionsControllerTestCase(
281298
FunctionalTest, APIControllerWithIncludeAndExcludeFilterTestCase, CleanFilesTestCase
282299
):
@@ -640,6 +657,48 @@ def test_action_with_notify_update(self):
640657
self.assertEqual(get_resp.json["notify"], {})
641658
self.__do_delete(action_id)
642659

660+
@mock.patch.object(
661+
action_validator, "validate_action", mock.MagicMock(return_value=True)
662+
)
663+
def test_action_with_unicode_name_create(self):
664+
post_resp = self.__do_post(ACTION_WITH_UNICODE_NAME)
665+
action_id = self.__get_action_id(post_resp)
666+
get_resp = self.__do_get_one(action_id)
667+
self.assertEqual(get_resp.status_int, 200)
668+
self.assertEqual(self.__get_action_id(get_resp), action_id)
669+
self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗")
670+
self.assertEqual(
671+
get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗"
672+
)
673+
self.assertEqual(
674+
get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗"
675+
)
676+
677+
get_resp = self.__do_get_one("dummy_pack_1.st2.dummy.action_unicode_我爱狗")
678+
self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗")
679+
self.assertEqual(
680+
get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗"
681+
)
682+
self.assertEqual(
683+
get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗"
684+
)
685+
686+
# Now retrieve the action using the ref and ensure it works correctly
687+
# NOTE: We need to use urlquoted value when retrieving the item since that's how all the
688+
# http clients work - non ascii characters get escaped / quoted. Passing in unquoted
689+
# value will result in exception (as expected).
690+
ref_quoted = urllib.parse.quote("dummy_pack_1.st2.dummy.action_unicode_我爱狗")
691+
get_resp = self.__do_get_one(ref_quoted)
692+
self.assertEqual(get_resp.json["name"], "st2.dummy.action_unicode_我爱狗")
693+
self.assertEqual(
694+
get_resp.json["ref"], "dummy_pack_1.st2.dummy.action_unicode_我爱狗"
695+
)
696+
self.assertEqual(
697+
get_resp.json["uid"], "action:dummy_pack_1:st2.dummy.action_unicode_我爱狗"
698+
)
699+
700+
self.__do_delete(action_id)
701+
643702
@mock.patch.object(
644703
action_validator, "validate_action", mock.MagicMock(return_value=True)
645704
)

st2api/tests/unit/controllers/v1/test_base.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
import webob
1617
from oslo_config import cfg
18+
from webob.request import Request
19+
20+
from st2common.router import Router
21+
1722
from st2tests.api import FunctionalTest
1823

1924

@@ -95,3 +100,16 @@ def test_valid_status_code_is_returned_on_invalid_path(self):
95100
"/v1/executions/577f775b0640fd1451f2030b/re_run/a/b", expect_errors=True
96101
)
97102
self.assertEqual(resp.status_int, 404)
103+
104+
def test_router_invalid_url_path_friendly_error(self):
105+
# NOTE: We intentionally don't use sp.app.get here since that goes through the webtest
106+
# layer which manipulates the path which means we won't be testing what we actually want
107+
# to test (an edge case). To test the edge case correctly, we need to manually call router
108+
# with specifically crafted data.
109+
router = Router()
110+
request = Request(environ={"PATH_INFO": "/v1/rules/好的".encode("utf-8")})
111+
112+
expected_msg = "URL likely contains invalid or incorrectly URL encoded values"
113+
self.assertRaisesRegexp(
114+
webob.exc.HTTPBadRequest, expected_msg, router.match, request
115+
)

st2client/st2client/shell.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
from st2client.config import set_config
6464
from st2client.exceptions.operations import OperationFailureException
6565
from st2client.utils.logging import LogLevelFilter, set_log_level_for_all_loggers
66+
from st2client.utils.misc import reencode_list_with_surrogate_escape_sequences
6667
from st2client.commands.auth import TokenCreateCommand
6768
from st2client.commands.auth import LoginCommand
6869

@@ -98,6 +99,42 @@
9899

99100
PACKAGE_METADATA_FILE_PATH = "/opt/stackstorm/st2/package.meta"
100101

102+
"""
103+
Here we sanitize the provided args and ensure they contain valid unicode values.
104+
105+
By default, sys.argv will contain a unicode string where the actual item values which contain
106+
unicode sequences are escaped using unicode surrogates.
107+
108+
For example, if "examples.test_rule_utf8_náme" value is specified as a CLI argument, sys.argv
109+
and as such also url, would contain "examples.test_rule_utf8_n%ED%B3%83%ED%B2%A1me" which is not
110+
what we want.
111+
112+
Complete sys.argv example:
113+
114+
1. Default - ['shell.py', '--debug', 'rule', 'get', 'examples.test_rule_utf8_n\udcc3\udca1me']
115+
2. What we want - ['shell.py', '--debug', 'rule', 'get', 'examples.test_rule_utf8_náme']
116+
117+
This won't work correctly when sending requests to the API. As such, we correctly escape the
118+
value to the unicode string here and then let the http layer (requests) correctly url encode
119+
this value.
120+
121+
Technically, we could also just try to re-encode it in the HTTPClient and I tried that first, but
122+
it turns out more code in the client results in exceptions if it's not re-encoded as early as
123+
possible.
124+
"""
125+
126+
REENCODE_ARGV = os.environ.get("ST2_CLI_RENCODE_ARGV", "true").lower() in [
127+
"true",
128+
"1",
129+
"yes",
130+
]
131+
132+
if REENCODE_ARGV:
133+
try:
134+
sys.argv = reencode_list_with_surrogate_escape_sequences(sys.argv)
135+
except Exception as e:
136+
print("Failed to re-encode sys.argv: %s" % (str(e)))
137+
101138

102139
def get_stackstorm_version():
103140
"""

st2client/st2client/utils/misc.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,14 @@
1414
# limitations under the License.
1515

1616
from __future__ import absolute_import
17+
18+
from typing import List
19+
1720
import copy
1821

1922
import six
2023

21-
__all__ = ["merge_dicts"]
24+
__all__ = ["merge_dicts", "reencode_list_with_surrogate_escape_sequences"]
2225

2326

2427
def merge_dicts(d1, d2):
@@ -39,3 +42,22 @@ def merge_dicts(d1, d2):
3942
result[key] = value
4043

4144
return result
45+
46+
47+
def reencode_list_with_surrogate_escape_sequences(value: List[str]) -> List[str]:
48+
"""
49+
Function which reencodes each item in the provided list replacing unicode surrogate escape
50+
sequences using actual unicode values.
51+
"""
52+
result = []
53+
54+
for item in value:
55+
try:
56+
item = item.encode("ascii", "surrogateescape").decode("utf-8")
57+
except UnicodeEncodeError:
58+
# Already a unicode string, nothing to do
59+
pass
60+
61+
result.append(item)
62+
63+
return result

st2client/tests/unit/test_commands.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,21 @@ def test_command_delete_failed(self):
295295
args = self.parser.parse_args(["fakeresource", "delete", "cba"])
296296
self.assertRaises(Exception, self.branch.commands["delete"].run, args)
297297

298+
@mock.patch.object(
299+
models.ResourceManager,
300+
"get_by_id",
301+
mock.MagicMock(return_value=base.FakeResource(**base.RESOURCES[0])),
302+
)
303+
def test_command_get_unicode_primary_key(self):
304+
args = self.parser.parse_args(
305+
["fakeresource", "get", "examples.test_rule_utf8_náme"]
306+
)
307+
self.assertEqual(args.func, self.branch.commands["get"].run_and_print)
308+
instance = self.branch.commands["get"].run(args)
309+
actual = instance.serialize()
310+
expected = json.loads(json.dumps(base.RESOURCES[0]))
311+
self.assertEqual(actual, expected)
312+
298313

299314
class ResourceViewCommandTestCase(unittest2.TestCase):
300315
def setUp(self):

st2client/tests/unit/test_shell.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# Copyright 2020 The StackStorm Authors.
23
# Copyright 2019 Extreme Networks, Inc.
34
#
@@ -32,6 +33,7 @@
3233
from st2client.shell import Shell
3334
from st2client.client import Client
3435
from st2client.utils import httpclient
36+
from st2client.utils.misc import reencode_list_with_surrogate_escape_sequences
3537
from st2common.models.db.auth import TokenDB
3638
from tests import base
3739

@@ -828,3 +830,34 @@ def test_automatic_auth_skipped_if_token_provided_as_cli_argument(self):
828830
args = shell.parser.parse_args(args=argv)
829831
shell.get_client(args=args)
830832
self.assertEqual(shell._get_auth_token.call_count, 0)
833+
834+
def test_get_one_unicode_character_in_name(self):
835+
self._write_mock_config()
836+
837+
shell = Shell()
838+
shell._get_auth_token = mock.Mock()
839+
840+
os.environ["ST2_AUTH_TOKEN"] = "fooo"
841+
argv = ["action", "get", "examples.test_rule_utf8_náme"]
842+
args = shell.parser.parse_args(args=argv)
843+
shell.get_client(args=args)
844+
self.assertEqual(args.ref_or_id, "examples.test_rule_utf8_náme")
845+
846+
def test_reencode_list_replace_surrogate_escape(self):
847+
value = ["a", "b", "c", "d"]
848+
expected = value
849+
result = reencode_list_with_surrogate_escape_sequences(value)
850+
851+
self.assertEqual(result, expected)
852+
853+
value = ["a", "b", "c", "n\udcc3\udca1me"]
854+
expected = ["a", "b", "c", "náme"]
855+
result = reencode_list_with_surrogate_escape_sequences(value)
856+
857+
self.assertEqual(result, expected)
858+
859+
value = ["a", "b", "c", "你好"]
860+
expected = value
861+
result = reencode_list_with_surrogate_escape_sequences(value)
862+
863+
self.assertEqual(result, expected)

st2common/st2common/middleware/instrumentation.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from st2common.metrics.base import get_driver
2121
from st2common.util.date import get_datetime_utc_now
2222
from st2common.router import NotFoundException
23+
from st2common.router import Response
24+
from st2common.util.jsonify import json_encode
2325

2426
__all__ = ["RequestInstrumentationMiddleware", "ResponseInstrumentationMiddleware"]
2527

@@ -47,6 +49,16 @@ def __call__(self, environ, start_response):
4749
endpoint, _ = self.router.match(request)
4850
except NotFoundException:
4951
endpoint = {}
52+
except Exception as e:
53+
# Special case to make sure we return friendly error to the user.
54+
# If we don't do that and router.match() throws an exception, we will return stack trace
55+
# to the end user which is not good.
56+
status_code = getattr(e, "status_code", 500)
57+
headers = {"Content-Type": "application/json"}
58+
body = {"faultstring": getattr(e, "detail", str(e))}
59+
response_body = json_encode(body)
60+
resp = Response(response_body, status=status_code, headers=headers)
61+
return resp(environ, start_response)
5062

5163
# NOTE: We don't track per request and response metrics for /v1/executions/<id> and some
5264
# other endpoints because this would result in a lot of unique metrics which is an

st2common/st2common/router.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from six.moves.urllib import parse as urlparse # pylint: disable=import-error
3030
import webob
3131
from webob import cookies, exc
32-
from webob.compat import url_unquote
32+
from six.moves import urllib
3333

3434
from st2common.exceptions import rbac as rbac_exc
3535
from st2common.exceptions import auth as auth_exc
@@ -264,7 +264,31 @@ def add_spec(self, spec, transforms):
264264
)
265265

266266
def match(self, req):
267-
path = url_unquote(req.path)
267+
# NOTE: webob.url_unquote doesn't work correctly under Python 3 when paths contain non-ascii
268+
# characters. That method supposed to handle Python 2 and Python 3 compatibility, but it
269+
# doesn't work correctly under Python 3.
270+
try:
271+
path = urllib.parse.unquote(req.path)
272+
except Exception as e:
273+
# This exception being thrown indicates that the URL / path contains bad or incorrectly
274+
# URL escaped characters. Instead of returning this stack track + 500 error to the
275+
# user we return a friendly and more correct exception
276+
# NOTE: We should not access or log req.path here since it's a property which results
277+
# in exception and if we try to log it, it will fail.
278+
try:
279+
path = req.environ["PATH_INFO"]
280+
except Exception:
281+
path = "unknown"
282+
283+
LOG.error('Failed to parse request URL / path "%s": %s' % (path, str(e)))
284+
285+
abort(
286+
400,
287+
'Failed to parse request path "%s". URL likely contains invalid or incorrectly '
288+
"URL encoded values." % (path),
289+
)
290+
return
291+
268292
LOG.debug("Match path: %s", path)
269293

270294
if len(path) > 1 and path.endswith("/"):

st2tests/st2tests/api.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,12 @@
2323

2424
import six
2525
import webtest
26+
import webob.compat
27+
import webob.request
2628
import mock
2729

30+
from six.moves import urllib
31+
2832
from oslo_config import cfg
2933

3034
from st2common.router import Router
@@ -57,6 +61,25 @@ class ResponseLeakError(ValueError):
5761
pass
5862

5963

64+
# NOTE: This is not ideal, but we need to patch those functions so it works correctly for the
65+
# tests.
66+
# The problem is that for the unit based api tests we utilize webtest which has the same bug as
67+
# webob when handling unicode characters in the path names and the actual unit test API code doesn't
68+
# follow exactly the same code path as actual production code which doesn't utilize webtest
69+
# In short, that's why important we also have end to end tests for API endpoints!
70+
webob.request.url_unquote = urllib.parse.unquote
71+
webob.compat.url_unquote = urllib.parse.unquote
72+
73+
74+
def bytes_(s, encoding="utf-8", errors="strict"):
75+
if isinstance(s, six.text_type):
76+
return s.encode("utf-8", errors)
77+
78+
79+
webob.compat.bytes_ = bytes_
80+
webob.request.bytes_ = bytes_
81+
82+
6083
class TestApp(webtest.TestApp):
6184
def do_request(self, req, **kwargs):
6285
self.cookiejar.clear()

0 commit comments

Comments
 (0)