blob: 42ba50bbf5adba2f2dc18ebfcff271caefd6ac54 [file] [log] [blame]
#!/usr/bin/env python
import tornado.escape
import unittest
from tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode, json_encode
from tornado.util import b
linkify_tests = [
# (input, linkify_kwargs, expected_output)
("hello!", {},
u'hello <a href=""></a>!'),
("hello", {},
u'hello <a href=";stuff=yes">;stuff=yes</a>'),
# an opened paren followed by many chars killed Gruber's regex
("", {},
u'<a href=""></a>(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'),
# as did too many dots at the end
("", {},
u'<a href=""></a>.......................................'),
("", {},
u'<a href=""></a>((((((((((((((((((((((((((((((((((a)'),
# some examples from
# plus a fex extras (such as multiple parentheses).
("", {},
u'<a href=""></a>'),
("", {},
u'<a href=""></a>'),
("(Something like", {},
u'(Something like <a href=""></a>)'),
("", {},
u'<a href=""></a>'),
("", {},
u'<a href=""></a>'),
("(Something like", {},
u'(Something like <a href=""></a>)'),
("", {},
u'<a href=""></a>.'),
("", {},
u'<a href=""></a>.'),
("<>", {},
u'&lt;<a href=""></a>&gt;'),
("<>", {},
u'&lt;<a href=""></a>&gt;'),
(",", {},
u'<a href=""></a>,'),
("", {},
u'<a href=""></a>.'),
{"permitted_protocols": ["http", "rdar"]},
u'<a href="rdar://1234">rdar://1234</a>'),
{"permitted_protocols": ["rdar"]},
u'<a href="rdar:/1234">rdar:/1234</a>'),
("", {},
u'<a href=""></a>'),
("", {},
u'<a href=""></a>'),
("", {},
u'<a href=""></a>'),
("", {},
u'<a href=""></a>'),
{"permitted_protocols": ["http", "message"]},
u'<a href="message://">message://</a>'),
(u"http://\\u4a39", {},
u'<a href="http://\\u4a39">http://\\u4a39</a>'),
("<tag></tag>", {},
u'&lt;tag&gt;<a href=""></a>&lt;/tag&gt;'),
("Just a link.", {},
u'Just a <a href=""></a> link.'),
("Just a link.",
{"require_protocol": True},
u'Just a link.'),
{"require_protocol": True, "shorten": True},
u'A <a href="" title=""></a>'),
{"shorten": True},
u'A <a href="" title="">http://reallylongdomainnametha...</a>!'),
("A file:///passwords.txt and link", {},
u'A file:///passwords.txt and <a href=""></a> link'),
("A file:///passwords.txt and link",
{"permitted_protocols": ["file"]},
u'A <a href="file:///passwords.txt">file:///passwords.txt</a> and link'),
{"extra_params": 'rel="nofollow" class="external"'},
u'<a href="" rel="nofollow" class="external"></a>'),
class EscapeTestCase(unittest.TestCase):
def test_linkify(self):
for text, kwargs, html in linkify_tests:
linked = tornado.escape.linkify(text, **kwargs)
self.assertEqual(linked, html)
def test_xhtml_escape(self):
tests = [
("<foo>", "&lt;foo&gt;"),
(u"<foo>", u"&lt;foo&gt;"),
(b("<foo>"), b("&lt;foo&gt;")),
("<>&\"", "&lt;&gt;&amp;&quot;"),
("&amp;", "&amp;amp;"),
for unescaped, escaped in tests:
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_url_escape(self):
tests = [
# byte strings are passed through as-is
(u'\u00e9'.encode('utf8'), '%C3%A9'),
(u'\u00e9'.encode('latin1'), '%E9'),
# unicode strings become utf8
(u'\u00e9', '%C3%A9'),
for unescaped, escaped in tests:
self.assertEqual(url_escape(unescaped), escaped)
def test_url_unescape(self):
tests = [
('%C3%A9', u'\u00e9', 'utf8'),
('%C3%A9', u'\u00c3\u00a9', 'latin1'),
('%C3%A9', utf8(u'\u00e9'), None),
for escaped, unescaped, encoding in tests:
# input strings to url_unescape should only contain ascii
# characters, but make sure the function accepts both byte
# and unicode strings.
self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
def test_escape_return_types(self):
# On python2 the escape methods should generally return the same
# type as their argument
self.assertEqual(type(xhtml_escape("foo")), str)
self.assertEqual(type(xhtml_escape(u"foo")), unicode)
def test_json_decode(self):
# json_decode accepts both bytes and unicode, but strings it returns
# are always unicode.
self.assertEqual(json_decode(b('"foo"')), u"foo")
self.assertEqual(json_decode(u'"foo"'), u"foo")
# Non-ascii bytes are interpreted as utf8
self.assertEqual(json_decode(utf8(u'"\u00e9"')), u"\u00e9")
def test_json_encode(self):
# json deals with strings, not bytes, but our encoding function should
# accept bytes as well as long as they are utf8.
self.assertEqual(json_decode(json_encode(u"\u00e9")), u"\u00e9")
self.assertEqual(json_decode(json_encode(utf8(u"\u00e9"))), u"\u00e9")
self.assertRaises(UnicodeDecodeError, json_encode, b("\xe9"))