Package cherrypy :: Package test :: Module test_encoding
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.test.test_encoding

  1   
  2  import gzip 
  3  import sys 
  4   
  5  import cherrypy 
  6  from cherrypy._cpcompat import BytesIO, IncompleteRead, ntob, ntou 
  7   
  8  europoundUnicode = ntou('\x80\xa3') 
  9  sing = ntou("\u6bdb\u6cfd\u4e1c: Sing, Little Birdie?", 'escape') 
 10  sing8 = sing.encode('utf-8') 
 11  sing16 = sing.encode('utf-16') 
 12   
 13   
 14  from cherrypy.test import helper 
 15   
 16   
17 -class EncodingTests(helper.CPWebCase):
18
19 - def setup_server():
20 class Root: 21 22 def index(self, param): 23 assert param == europoundUnicode, "%r != %r" % ( 24 param, europoundUnicode) 25 yield europoundUnicode
26 index.exposed = True 27 28 def mao_zedong(self): 29 return sing
30 mao_zedong.exposed = True 31 32 def utf8(self): 33 return sing8 34 utf8.exposed = True 35 utf8._cp_config = {'tools.encode.encoding': 'utf-8'} 36 37 def cookies_and_headers(self): 38 # if the headers have non-ascii characters and a cookie has 39 # any part which is unicode (even ascii), the response 40 # should not fail. 41 cherrypy.response.cookie['candy'] = 'bar' 42 cherrypy.response.cookie['candy']['domain'] = 'cherrypy.org' 43 cherrypy.response.headers[ 44 'Some-Header'] = 'My d\xc3\xb6g has fleas' 45 return 'Any content' 46 cookies_and_headers.exposed = True 47 48 def reqparams(self, *args, **kwargs): 49 return ntob(', ').join( 50 [": ".join((k, v)).encode('utf8') 51 for k, v in sorted(cherrypy.request.params.items())] 52 ) 53 reqparams.exposed = True 54 55 def nontext(self, *args, **kwargs): 56 cherrypy.response.headers[ 57 'Content-Type'] = 'application/binary' 58 return '\x00\x01\x02\x03' 59 nontext.exposed = True 60 nontext._cp_config = {'tools.encode.text_only': False, 61 'tools.encode.add_charset': True, 62 } 63 64 class GZIP: 65 66 def index(self): 67 yield "Hello, world" 68 index.exposed = True 69 70 def noshow(self): 71 # Test for ticket #147, where yield showed no exceptions 72 # (content-encoding was still gzip even though traceback 73 # wasn't zipped). 74 raise IndexError() 75 yield "Here be dragons" 76 noshow.exposed = True 77 # Turn encoding off so the gzip tool is the one doing the collapse. 78 noshow._cp_config = {'tools.encode.on': False} 79 80 def noshow_stream(self): 81 # Test for ticket #147, where yield showed no exceptions 82 # (content-encoding was still gzip even though traceback 83 # wasn't zipped). 84 raise IndexError() 85 yield "Here be dragons" 86 noshow_stream.exposed = True 87 noshow_stream._cp_config = {'response.stream': True} 88 89 class Decode: 90 91 def extra_charset(self, *args, **kwargs): 92 return ', '.join([": ".join((k, v)) 93 for k, v in cherrypy.request.params.items()]) 94 extra_charset.exposed = True 95 extra_charset._cp_config = { 96 'tools.decode.on': True, 97 'tools.decode.default_encoding': ['utf-16'], 98 } 99 100 def force_charset(self, *args, **kwargs): 101 return ', '.join([": ".join((k, v)) 102 for k, v in cherrypy.request.params.items()]) 103 force_charset.exposed = True 104 force_charset._cp_config = { 105 'tools.decode.on': True, 106 'tools.decode.encoding': 'utf-16', 107 } 108 109 root = Root() 110 root.gzip = GZIP() 111 root.decode = Decode() 112 cherrypy.tree.mount(root, config={'/gzip': {'tools.gzip.on': True}}) 113 setup_server = staticmethod(setup_server) 114
115 - def test_query_string_decoding(self):
116 europoundUtf8 = europoundUnicode.encode('utf-8') 117 self.getPage(ntob('/?param=') + europoundUtf8) 118 self.assertBody(europoundUtf8) 119 120 # Encoded utf8 query strings MUST be parsed correctly. 121 # Here, q is the POUND SIGN U+00A3 encoded in utf8 and then %HEX 122 self.getPage("/reqparams?q=%C2%A3") 123 # The return value will be encoded as utf8. 124 self.assertBody(ntob("q: \xc2\xa3")) 125 126 # Query strings that are incorrectly encoded MUST raise 404. 127 # Here, q is the POUND SIGN U+00A3 encoded in latin1 and then %HEX 128 self.getPage("/reqparams?q=%A3") 129 self.assertStatus(404) 130 self.assertErrorPage( 131 404, 132 "The given query string could not be processed. Query " 133 "strings for this resource must be encoded with 'utf8'.")
134
135 - def test_urlencoded_decoding(self):
136 # Test the decoding of an application/x-www-form-urlencoded entity. 137 europoundUtf8 = europoundUnicode.encode('utf-8') 138 body = ntob("param=") + europoundUtf8 139 self.getPage('/', 140 method='POST', 141 headers=[ 142 ("Content-Type", "application/x-www-form-urlencoded"), 143 ("Content-Length", str(len(body))), 144 ], 145 body=body), 146 self.assertBody(europoundUtf8) 147 148 # Encoded utf8 entities MUST be parsed and decoded correctly. 149 # Here, q is the POUND SIGN U+00A3 encoded in utf8 150 body = ntob("q=\xc2\xa3") 151 self.getPage('/reqparams', method='POST', 152 headers=[( 153 "Content-Type", "application/x-www-form-urlencoded"), 154 ("Content-Length", str(len(body))), 155 ], 156 body=body), 157 self.assertBody(ntob("q: \xc2\xa3")) 158 159 # ...and in utf16, which is not in the default attempt_charsets list: 160 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00") 161 self.getPage('/reqparams', 162 method='POST', 163 headers=[ 164 ("Content-Type", 165 "application/x-www-form-urlencoded;charset=utf-16"), 166 ("Content-Length", str(len(body))), 167 ], 168 body=body), 169 self.assertBody(ntob("q: \xc2\xa3")) 170 171 # Entities that are incorrectly encoded MUST raise 400. 172 # Here, q is the POUND SIGN U+00A3 encoded in utf16, but 173 # the Content-Type incorrectly labels it utf-8. 174 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00") 175 self.getPage('/reqparams', 176 method='POST', 177 headers=[ 178 ("Content-Type", 179 "application/x-www-form-urlencoded;charset=utf-8"), 180 ("Content-Length", str(len(body))), 181 ], 182 body=body), 183 self.assertStatus(400) 184 self.assertErrorPage( 185 400, 186 "The request entity could not be decoded. The following charsets " 187 "were attempted: ['utf-8']")
188
189 - def test_decode_tool(self):
190 # An extra charset should be tried first, and succeed if it matches. 191 # Here, we add utf-16 as a charset and pass a utf-16 body. 192 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00") 193 self.getPage('/decode/extra_charset', method='POST', 194 headers=[( 195 "Content-Type", "application/x-www-form-urlencoded"), 196 ("Content-Length", str(len(body))), 197 ], 198 body=body), 199 self.assertBody(ntob("q: \xc2\xa3")) 200 201 # An extra charset should be tried first, and continue to other default 202 # charsets if it doesn't match. 203 # Here, we add utf-16 as a charset but still pass a utf-8 body. 204 body = ntob("q=\xc2\xa3") 205 self.getPage('/decode/extra_charset', method='POST', 206 headers=[( 207 "Content-Type", "application/x-www-form-urlencoded"), 208 ("Content-Length", str(len(body))), 209 ], 210 body=body), 211 self.assertBody(ntob("q: \xc2\xa3")) 212 213 # An extra charset should error if force is True and it doesn't match. 214 # Here, we force utf-16 as a charset but still pass a utf-8 body. 215 body = ntob("q=\xc2\xa3") 216 self.getPage('/decode/force_charset', method='POST', 217 headers=[( 218 "Content-Type", "application/x-www-form-urlencoded"), 219 ("Content-Length", str(len(body))), 220 ], 221 body=body), 222 self.assertErrorPage( 223 400, 224 "The request entity could not be decoded. The following charsets " 225 "were attempted: ['utf-16']")
226
227 - def test_multipart_decoding(self):
228 # Test the decoding of a multipart entity when the charset (utf16) is 229 # explicitly given. 230 body = ntob('\r\n'.join([ 231 '--X', 232 'Content-Type: text/plain;charset=utf-16', 233 'Content-Disposition: form-data; name="text"', 234 '', 235 '\xff\xfea\x00b\x00\x1c c\x00', 236 '--X', 237 'Content-Type: text/plain;charset=utf-16', 238 'Content-Disposition: form-data; name="submit"', 239 '', 240 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00', 241 '--X--' 242 ])) 243 self.getPage('/reqparams', method='POST', 244 headers=[( 245 "Content-Type", "multipart/form-data;boundary=X"), 246 ("Content-Length", str(len(body))), 247 ], 248 body=body), 249 self.assertBody(ntob("submit: Create, text: ab\xe2\x80\x9cc"))
250
251 - def test_multipart_decoding_no_charset(self):
252 # Test the decoding of a multipart entity when the charset (utf8) is 253 # NOT explicitly given, but is in the list of charsets to attempt. 254 body = ntob('\r\n'.join([ 255 '--X', 256 'Content-Disposition: form-data; name="text"', 257 '', 258 '\xe2\x80\x9c', 259 '--X', 260 'Content-Disposition: form-data; name="submit"', 261 '', 262 'Create', 263 '--X--' 264 ])) 265 self.getPage('/reqparams', method='POST', 266 headers=[( 267 "Content-Type", "multipart/form-data;boundary=X"), 268 ("Content-Length", str(len(body))), 269 ], 270 body=body), 271 self.assertBody(ntob("submit: Create, text: \xe2\x80\x9c"))
272
273 - def test_multipart_decoding_no_successful_charset(self):
274 # Test the decoding of a multipart entity when the charset (utf16) is 275 # NOT explicitly given, and is NOT in the list of charsets to attempt. 276 body = ntob('\r\n'.join([ 277 '--X', 278 'Content-Disposition: form-data; name="text"', 279 '', 280 '\xff\xfea\x00b\x00\x1c c\x00', 281 '--X', 282 'Content-Disposition: form-data; name="submit"', 283 '', 284 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00', 285 '--X--' 286 ])) 287 self.getPage('/reqparams', method='POST', 288 headers=[( 289 "Content-Type", "multipart/form-data;boundary=X"), 290 ("Content-Length", str(len(body))), 291 ], 292 body=body), 293 self.assertStatus(400) 294 self.assertErrorPage( 295 400, 296 "The request entity could not be decoded. The following charsets " 297 "were attempted: ['us-ascii', 'utf-8']")
298
299 - def test_nontext(self):
300 self.getPage('/nontext') 301 self.assertHeader('Content-Type', 'application/binary;charset=utf-8') 302 self.assertBody('\x00\x01\x02\x03')
303
304 - def testEncoding(self):
305 # Default encoding should be utf-8 306 self.getPage('/mao_zedong') 307 self.assertBody(sing8) 308 309 # Ask for utf-16. 310 self.getPage('/mao_zedong', [('Accept-Charset', 'utf-16')]) 311 self.assertHeader('Content-Type', 'text/html;charset=utf-16') 312 self.assertBody(sing16) 313 314 # Ask for multiple encodings. ISO-8859-1 should fail, and utf-16 315 # should be produced. 316 self.getPage('/mao_zedong', [('Accept-Charset', 317 'iso-8859-1;q=1, utf-16;q=0.5')]) 318 self.assertBody(sing16) 319 320 # The "*" value should default to our default_encoding, utf-8 321 self.getPage('/mao_zedong', [('Accept-Charset', '*;q=1, utf-7;q=.2')]) 322 self.assertBody(sing8) 323 324 # Only allow iso-8859-1, which should fail and raise 406. 325 self.getPage('/mao_zedong', [('Accept-Charset', 'iso-8859-1, *;q=0')]) 326 self.assertStatus("406 Not Acceptable") 327 self.assertInBody("Your client sent this Accept-Charset header: " 328 "iso-8859-1, *;q=0. We tried these charsets: " 329 "iso-8859-1.") 330 331 # Ask for x-mac-ce, which should be unknown. See ticket #569. 332 self.getPage('/mao_zedong', [('Accept-Charset', 333 'us-ascii, ISO-8859-1, x-mac-ce')]) 334 self.assertStatus("406 Not Acceptable") 335 self.assertInBody("Your client sent this Accept-Charset header: " 336 "us-ascii, ISO-8859-1, x-mac-ce. We tried these " 337 "charsets: ISO-8859-1, us-ascii, x-mac-ce.") 338 339 # Test the 'encoding' arg to encode. 340 self.getPage('/utf8') 341 self.assertBody(sing8) 342 self.getPage('/utf8', [('Accept-Charset', 'us-ascii, ISO-8859-1')]) 343 self.assertStatus("406 Not Acceptable")
344
345 - def testGzip(self):
346 zbuf = BytesIO() 347 zfile = gzip.GzipFile(mode='wb', fileobj=zbuf, compresslevel=9) 348 zfile.write(ntob("Hello, world")) 349 zfile.close() 350 351 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip")]) 352 self.assertInBody(zbuf.getvalue()[:3]) 353 self.assertHeader("Vary", "Accept-Encoding") 354 self.assertHeader("Content-Encoding", "gzip") 355 356 # Test when gzip is denied. 357 self.getPage('/gzip/', headers=[("Accept-Encoding", "identity")]) 358 self.assertHeader("Vary", "Accept-Encoding") 359 self.assertNoHeader("Content-Encoding") 360 self.assertBody("Hello, world") 361 362 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip;q=0")]) 363 self.assertHeader("Vary", "Accept-Encoding") 364 self.assertNoHeader("Content-Encoding") 365 self.assertBody("Hello, world") 366 367 self.getPage('/gzip/', headers=[("Accept-Encoding", "*;q=0")]) 368 self.assertStatus(406) 369 self.assertNoHeader("Content-Encoding") 370 self.assertErrorPage(406, "identity, gzip") 371 372 # Test for ticket #147 373 self.getPage('/gzip/noshow', headers=[("Accept-Encoding", "gzip")]) 374 self.assertNoHeader('Content-Encoding') 375 self.assertStatus(500) 376 self.assertErrorPage(500, pattern="IndexError\n") 377 378 # In this case, there's nothing we can do to deliver a 379 # readable page, since 1) the gzip header is already set, 380 # and 2) we may have already written some of the body. 381 # The fix is to never stream yields when using gzip. 382 if (cherrypy.server.protocol_version == "HTTP/1.0" or 383 getattr(cherrypy.server, "using_apache", False)): 384 self.getPage('/gzip/noshow_stream', 385 headers=[("Accept-Encoding", "gzip")]) 386 self.assertHeader('Content-Encoding', 'gzip') 387 self.assertInBody('\x1f\x8b\x08\x00') 388 else: 389 # The wsgiserver will simply stop sending data, and the HTTP client 390 # will error due to an incomplete chunk-encoded stream. 391 self.assertRaises((ValueError, IncompleteRead), self.getPage, 392 '/gzip/noshow_stream', 393 headers=[("Accept-Encoding", "gzip")])
394
395 - def test_UnicodeHeaders(self):
396 self.getPage('/cookies_and_headers') 397 self.assertBody('Any content')
398