cornsnake.util_network
Download files or make a POST request without adding extra libraries.
1""" 2Download files or make a POST request without adding extra libraries. 3 4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_network.html) 5""" 6 7from datetime import datetime 8import os 9import urllib.request 10 11from . import util_log 12 13logger = util_log.getLogger(__name__) 14 15 16def get_file( 17 url: str, 18 local_path_to_write_file: str, 19 timeout: int = 60, 20 headers: dict[str, str] = {}, 21) -> None: 22 """ 23 Function to download a file from a URL to a local file path, via a HTTP GET request. 24 25 Args: 26 url (str): The URL to download from, via a GET request. 27 timeout (int): Optional timeout for the request in seconds. 28 """ 29 req = urllib.request.Request( 30 url=url, 31 data=None, 32 headers=headers, 33 origin_req_host=None, 34 unverifiable=False, 35 method="GET", 36 ) 37 with urllib.request.urlopen(req, timeout=timeout) as response: 38 if response.status == 200: 39 with open(local_path_to_write_file, "wb") as file: 40 file.write(response.read()) 41 else: 42 raise RuntimeError( 43 f"File download failed. HTTP status code: {response.status_code}" 44 ) 45 46 47def _get_file_name_from_url(url: str, text_file_extensions: list[str]) -> str: 48 # credit to scottleibrand 49 50 # Strip any trailing /'s from the end of the URL 51 stripped_url = url.rstrip("/") 52 53 # Get the base name of the URL 54 base_name = stripped_url.split("/")[-1] 55 56 for ext in text_file_extensions: 57 if base_name.endswith(ext): 58 return base_name 59 60 return base_name + ".html" 61 62 63def _get_timestamped_filename(filename: str) -> str: 64 # add timestamp to make unique filename, since URL content may have changed 65 now = datetime.now() 66 timestamp = now.strftime("%Y_%m_%d__%H%M%S") 67 68 filename_parts = filename.split(".") 69 extension = filename_parts[-1] 70 filename_parts = filename_parts[:-1] 71 filename_parts += [timestamp, extension] 72 filename = ".".join(filename_parts) 73 74 return filename 75 76 77def get_file_timestamped( 78 url: str, 79 path_to_dir: str, 80 prefix: str = "", 81 text_file_extensions: list[str] = [".txt", ".html", ".md", ".yaml"], 82 timeout: int = 60, 83) -> str: 84 """ 85 Function to download a *timestamped* file from a URL to an automatically generated local file path, via a HTTP GET request. 86 This helps to ensure the latest copy of the URL is saved, in case there was a previous download. 87 88 Args: 89 url (str): The URL to download from, via a GET request. 90 path_to_dir (str): The local directory to write the file to. 91 prefix (str): Optional prefix for the new filename. 92 text_file_extensions (list[str]): Optional file extensions to recognise - if the URL ending is not recognised, then the file is saved as '.html'. 93 timeout (int): Optional timeout for the request in seconds. 94 95 Returns: 96 str: The path to the new local file. 97 """ 98 filename = _get_file_name_from_url(url, text_file_extensions) 99 100 filename = _get_timestamped_filename(filename) 101 102 local_filepath = os.path.join(path_to_dir, f"{prefix}-{filename}") 103 get_file(url, local_filepath, timeout) 104 return local_filepath 105 106 107def post_request(api_url: str, headers: dict[str, str], timeout: int = 60) -> bool: 108 """ 109 Function to make a POST request to a specified API URL. 110 111 Args: 112 api_url (str): The URL to which the POST request will be made. 113 headers (dict): The headers to be included in the request. 114 timeout (int): The timeout for the request in seconds. 115 116 Returns: 117 bool: True if the POST request is successful (status code 200). Otherwise raises RuntimeError. 118 """ 119 req = urllib.request.Request( 120 url=api_url, 121 data=None, 122 headers=headers, 123 origin_req_host=None, 124 unverifiable=False, 125 method="POST", 126 ) 127 with urllib.request.urlopen(req, timeout=timeout) as response: 128 if response.status == 200: 129 return True 130 raise RuntimeError(f"POST failed. HTTP status code: [{response.status}]")
17def get_file( 18 url: str, 19 local_path_to_write_file: str, 20 timeout: int = 60, 21 headers: dict[str, str] = {}, 22) -> None: 23 """ 24 Function to download a file from a URL to a local file path, via a HTTP GET request. 25 26 Args: 27 url (str): The URL to download from, via a GET request. 28 timeout (int): Optional timeout for the request in seconds. 29 """ 30 req = urllib.request.Request( 31 url=url, 32 data=None, 33 headers=headers, 34 origin_req_host=None, 35 unverifiable=False, 36 method="GET", 37 ) 38 with urllib.request.urlopen(req, timeout=timeout) as response: 39 if response.status == 200: 40 with open(local_path_to_write_file, "wb") as file: 41 file.write(response.read()) 42 else: 43 raise RuntimeError( 44 f"File download failed. HTTP status code: {response.status_code}" 45 )
Function to download a file from a URL to a local file path, via a HTTP GET request.
Args: url (str): The URL to download from, via a GET request. timeout (int): Optional timeout for the request in seconds.
78def get_file_timestamped( 79 url: str, 80 path_to_dir: str, 81 prefix: str = "", 82 text_file_extensions: list[str] = [".txt", ".html", ".md", ".yaml"], 83 timeout: int = 60, 84) -> str: 85 """ 86 Function to download a *timestamped* file from a URL to an automatically generated local file path, via a HTTP GET request. 87 This helps to ensure the latest copy of the URL is saved, in case there was a previous download. 88 89 Args: 90 url (str): The URL to download from, via a GET request. 91 path_to_dir (str): The local directory to write the file to. 92 prefix (str): Optional prefix for the new filename. 93 text_file_extensions (list[str]): Optional file extensions to recognise - if the URL ending is not recognised, then the file is saved as '.html'. 94 timeout (int): Optional timeout for the request in seconds. 95 96 Returns: 97 str: The path to the new local file. 98 """ 99 filename = _get_file_name_from_url(url, text_file_extensions) 100 101 filename = _get_timestamped_filename(filename) 102 103 local_filepath = os.path.join(path_to_dir, f"{prefix}-{filename}") 104 get_file(url, local_filepath, timeout) 105 return local_filepath
Function to download a timestamped file from a URL to an automatically generated local file path, via a HTTP GET request. This helps to ensure the latest copy of the URL is saved, in case there was a previous download.
Args: url (str): The URL to download from, via a GET request. path_to_dir (str): The local directory to write the file to. prefix (str): Optional prefix for the new filename. text_file_extensions (list[str]): Optional file extensions to recognise - if the URL ending is not recognised, then the file is saved as '.html'. timeout (int): Optional timeout for the request in seconds.
Returns: str: The path to the new local file.
108def post_request(api_url: str, headers: dict[str, str], timeout: int = 60) -> bool: 109 """ 110 Function to make a POST request to a specified API URL. 111 112 Args: 113 api_url (str): The URL to which the POST request will be made. 114 headers (dict): The headers to be included in the request. 115 timeout (int): The timeout for the request in seconds. 116 117 Returns: 118 bool: True if the POST request is successful (status code 200). Otherwise raises RuntimeError. 119 """ 120 req = urllib.request.Request( 121 url=api_url, 122 data=None, 123 headers=headers, 124 origin_req_host=None, 125 unverifiable=False, 126 method="POST", 127 ) 128 with urllib.request.urlopen(req, timeout=timeout) as response: 129 if response.status == 200: 130 return True 131 raise RuntimeError(f"POST failed. HTTP status code: [{response.status}]")
Function to make a POST request to a specified API URL.
Args: api_url (str): The URL to which the POST request will be made. headers (dict): The headers to be included in the request. timeout (int): The timeout for the request in seconds.
Returns: bool: True if the POST request is successful (status code 200). Otherwise raises RuntimeError.