122 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			122 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | """
 | ||
|  | Tools to open .py files as Unicode, using the encoding specified within the file, | ||
|  | as per PEP 263. | ||
|  | 
 | ||
|  | Much of the code is taken from the tokenize module in Python 3.2. | ||
|  | """
 | ||
|  | 
 | ||
|  | import io | ||
|  | from io import TextIOWrapper, BytesIO | ||
|  | import re | ||
|  | from tokenize import open, detect_encoding | ||
|  | 
 | ||
|  | cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE) | ||
|  | cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) | ||
|  | 
 | ||
|  | def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): | ||
|  |     """Converts a bytes string with python source code to unicode.
 | ||
|  | 
 | ||
|  |     Unicode strings are passed through unchanged. Byte strings are checked | ||
|  |     for the python source file encoding cookie to determine encoding. | ||
|  |     txt can be either a bytes buffer or a string containing the source | ||
|  |     code. | ||
|  |     """
 | ||
|  |     if isinstance(txt, str): | ||
|  |         return txt | ||
|  |     if isinstance(txt, bytes): | ||
|  |         buffer = BytesIO(txt) | ||
|  |     else: | ||
|  |         buffer = txt | ||
|  |     try: | ||
|  |         encoding, _ = detect_encoding(buffer.readline) | ||
|  |     except SyntaxError: | ||
|  |         encoding = "ascii" | ||
|  |     buffer.seek(0) | ||
|  |     text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) | ||
|  |     text.mode = 'r' | ||
|  |     if skip_encoding_cookie: | ||
|  |         return u"".join(strip_encoding_cookie(text)) | ||
|  |     else: | ||
|  |         return text.read() | ||
|  | 
 | ||
|  | def strip_encoding_cookie(filelike): | ||
|  |     """Generator to pull lines from a text-mode file, skipping the encoding
 | ||
|  |     cookie if it is found in the first two lines. | ||
|  |     """
 | ||
|  |     it = iter(filelike) | ||
|  |     try: | ||
|  |         first = next(it) | ||
|  |         if not cookie_comment_re.match(first): | ||
|  |             yield first | ||
|  |         second = next(it) | ||
|  |         if not cookie_comment_re.match(second): | ||
|  |             yield second | ||
|  |     except StopIteration: | ||
|  |         return | ||
|  |      | ||
|  |     for line in it: | ||
|  |         yield line | ||
|  | 
 | ||
|  | def read_py_file(filename, skip_encoding_cookie=True): | ||
|  |     """Read a Python file, using the encoding declared inside the file.
 | ||
|  |      | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     filename : str | ||
|  |       The path to the file to read. | ||
|  |     skip_encoding_cookie : bool | ||
|  |       If True (the default), and the encoding declaration is found in the first | ||
|  |       two lines, that line will be excluded from the output - compiling a | ||
|  |       unicode string with an encoding declaration is a SyntaxError in Python 2. | ||
|  |      | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     A unicode string containing the contents of the file. | ||
|  |     """
 | ||
|  |     with open(filename) as f:   # the open function defined in this module. | ||
|  |         if skip_encoding_cookie: | ||
|  |             return "".join(strip_encoding_cookie(f)) | ||
|  |         else: | ||
|  |             return f.read() | ||
|  | 
 | ||
|  | def read_py_url(url, errors='replace', skip_encoding_cookie=True): | ||
|  |     """Read a Python file from a URL, using the encoding declared inside the file.
 | ||
|  |      | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     url : str | ||
|  |       The URL from which to fetch the file. | ||
|  |     errors : str | ||
|  |       How to handle decoding errors in the file. Options are the same as for | ||
|  |       bytes.decode(), but here 'replace' is the default. | ||
|  |     skip_encoding_cookie : bool | ||
|  |       If True (the default), and the encoding declaration is found in the first | ||
|  |       two lines, that line will be excluded from the output - compiling a | ||
|  |       unicode string with an encoding declaration is a SyntaxError in Python 2. | ||
|  |      | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     A unicode string containing the contents of the file. | ||
|  |     """
 | ||
|  |     # Deferred import for faster start | ||
|  |     from urllib.request import urlopen  | ||
|  |     response = urlopen(url) | ||
|  |     buffer = io.BytesIO(response.read()) | ||
|  |     return source_to_unicode(buffer, errors, skip_encoding_cookie) | ||
|  | 
 | ||
|  | def _list_readline(x): | ||
|  |     """Given a list, returns a readline() function that returns the next element
 | ||
|  |     with each call. | ||
|  |     """
 | ||
|  |     x = iter(x) | ||
|  |     def readline(): | ||
|  |         return next(x) | ||
|  |     return readline | ||
|  | 
 | ||
|  | # Code for going between .py files and cached .pyc files ---------------------- | ||
|  | try:  | ||
|  |     from importlib.util import source_from_cache, cache_from_source | ||
|  | except ImportError : | ||
|  |     ## deprecated since 3.4 | ||
|  |     from imp import source_from_cache, cache_from_source |