cornsnake.util_network

Download files or make a POST request without adding extra libraries.

Documentation

  1"""
  2Download files or make a POST request without adding extra libraries.
  3
  4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_network.html)
  5"""
  6
  7from datetime import datetime
  8import os
  9import urllib.request
 10
 11from . import util_log
 12
 13logger = util_log.getLogger(__name__)
 14
 15
 16def get_file(
 17    url: str,
 18    local_path_to_write_file: str,
 19    timeout: int = 60,
 20    headers: dict[str, str] = {},
 21) -> None:
 22    """
 23    Function to download a file from a URL to a local file path, via a HTTP GET request.
 24
 25    Args:
 26    url (str): The URL to download from, via a GET request.
 27    timeout (int): Optional timeout for the request in seconds.
 28    """
 29    req = urllib.request.Request(
 30        url=url,
 31        data=None,
 32        headers=headers,
 33        origin_req_host=None,
 34        unverifiable=False,
 35        method="GET",
 36    )
 37    with urllib.request.urlopen(req, timeout=timeout) as response:
 38        if response.status == 200:
 39            with open(local_path_to_write_file, "wb") as file:
 40                file.write(response.read())
 41        else:
 42            raise RuntimeError(
 43                f"File download failed. HTTP status code: {response.status_code}"
 44            )
 45
 46
 47def _get_file_name_from_url(url: str, text_file_extensions: list[str]) -> str:
 48    # credit to scottleibrand
 49
 50    # Strip any trailing /'s from the end of the URL
 51    stripped_url = url.rstrip("/")
 52
 53    # Get the base name of the URL
 54    base_name = stripped_url.split("/")[-1]
 55
 56    for ext in text_file_extensions:
 57        if base_name.endswith(ext):
 58            return base_name
 59
 60    return base_name + ".html"
 61
 62
 63def _get_timestamped_filename(filename: str) -> str:
 64    # add timestamp to make unique filename, since URL content may have changed
 65    now = datetime.now()
 66    timestamp = now.strftime("%Y_%m_%d__%H%M%S")
 67
 68    filename_parts = filename.split(".")
 69    extension = filename_parts[-1]
 70    filename_parts = filename_parts[:-1]
 71    filename_parts += [timestamp, extension]
 72    filename = ".".join(filename_parts)
 73
 74    return filename
 75
 76
 77def get_file_timestamped(
 78    url: str,
 79    path_to_dir: str,
 80    prefix: str = "",
 81    text_file_extensions: list[str] = [".txt", ".html", ".md", ".yaml"],
 82    timeout: int = 60,
 83) -> str:
 84    """
 85    Function to download a *timestamped* file from a URL to an automatically generated local file path, via a HTTP GET request.
 86    This helps to ensure the latest copy of the URL is saved, in case there was a previous download.
 87
 88    Args:
 89    url (str): The URL to download from, via a GET request.
 90    path_to_dir (str): The local directory to write the file to.
 91    prefix (str): Optional prefix for the new filename.
 92    text_file_extensions (list[str]): Optional file extensions to recognise - if the URL ending is not recognised, then the file is saved as '.html'.
 93    timeout (int): Optional timeout for the request in seconds.
 94
 95    Returns:
 96    str: The path to the new local file.
 97    """
 98    filename = _get_file_name_from_url(url, text_file_extensions)
 99
100    filename = _get_timestamped_filename(filename)
101
102    local_filepath = os.path.join(path_to_dir, f"{prefix}-{filename}")
103    get_file(url, local_filepath, timeout)
104    return local_filepath
105
106
107def post_request(api_url: str, headers: dict[str, str], timeout: int = 60) -> bool:
108    """
109    Function to make a POST request to a specified API URL.
110
111    Args:
112    api_url (str): The URL to which the POST request will be made.
113    headers (dict): The headers to be included in the request.
114    timeout (int): The timeout for the request in seconds.
115
116    Returns:
117    bool: True if the POST request is successful (status code 200). Otherwise raises RuntimeError.
118    """
119    req = urllib.request.Request(
120        url=api_url,
121        data=None,
122        headers=headers,
123        origin_req_host=None,
124        unverifiable=False,
125        method="POST",
126    )
127    with urllib.request.urlopen(req, timeout=timeout) as response:
128        if response.status == 200:
129            return True
130        raise RuntimeError(f"POST failed. HTTP status code: [{response.status}]")
logger = <Logger cornsnake.util_network (DEBUG)>
def get_file( url: str, local_path_to_write_file: str, timeout: int = 60, headers: dict[str, str] = {}) -> None:
17def get_file(
18    url: str,
19    local_path_to_write_file: str,
20    timeout: int = 60,
21    headers: dict[str, str] = {},
22) -> None:
23    """
24    Function to download a file from a URL to a local file path, via a HTTP GET request.
25
26    Args:
27    url (str): The URL to download from, via a GET request.
28    timeout (int): Optional timeout for the request in seconds.
29    """
30    req = urllib.request.Request(
31        url=url,
32        data=None,
33        headers=headers,
34        origin_req_host=None,
35        unverifiable=False,
36        method="GET",
37    )
38    with urllib.request.urlopen(req, timeout=timeout) as response:
39        if response.status == 200:
40            with open(local_path_to_write_file, "wb") as file:
41                file.write(response.read())
42        else:
43            raise RuntimeError(
44                f"File download failed. HTTP status code: {response.status_code}"
45            )

Function to download a file from a URL to a local file path, via a HTTP GET request.

Args: url (str): The URL to download from, via a GET request. timeout (int): Optional timeout for the request in seconds.

def get_file_timestamped( url: str, path_to_dir: str, prefix: str = '', text_file_extensions: list[str] = ['.txt', '.html', '.md', '.yaml'], timeout: int = 60) -> str:
 78def get_file_timestamped(
 79    url: str,
 80    path_to_dir: str,
 81    prefix: str = "",
 82    text_file_extensions: list[str] = [".txt", ".html", ".md", ".yaml"],
 83    timeout: int = 60,
 84) -> str:
 85    """
 86    Function to download a *timestamped* file from a URL to an automatically generated local file path, via a HTTP GET request.
 87    This helps to ensure the latest copy of the URL is saved, in case there was a previous download.
 88
 89    Args:
 90    url (str): The URL to download from, via a GET request.
 91    path_to_dir (str): The local directory to write the file to.
 92    prefix (str): Optional prefix for the new filename.
 93    text_file_extensions (list[str]): Optional file extensions to recognise - if the URL ending is not recognised, then the file is saved as '.html'.
 94    timeout (int): Optional timeout for the request in seconds.
 95
 96    Returns:
 97    str: The path to the new local file.
 98    """
 99    filename = _get_file_name_from_url(url, text_file_extensions)
100
101    filename = _get_timestamped_filename(filename)
102
103    local_filepath = os.path.join(path_to_dir, f"{prefix}-{filename}")
104    get_file(url, local_filepath, timeout)
105    return local_filepath

Function to download a timestamped file from a URL to an automatically generated local file path, via a HTTP GET request. This helps to ensure the latest copy of the URL is saved, in case there was a previous download.

Args: url (str): The URL to download from, via a GET request. path_to_dir (str): The local directory to write the file to. prefix (str): Optional prefix for the new filename. text_file_extensions (list[str]): Optional file extensions to recognise - if the URL ending is not recognised, then the file is saved as '.html'. timeout (int): Optional timeout for the request in seconds.

Returns: str: The path to the new local file.

def post_request(api_url: str, headers: dict[str, str], timeout: int = 60) -> bool:
108def post_request(api_url: str, headers: dict[str, str], timeout: int = 60) -> bool:
109    """
110    Function to make a POST request to a specified API URL.
111
112    Args:
113    api_url (str): The URL to which the POST request will be made.
114    headers (dict): The headers to be included in the request.
115    timeout (int): The timeout for the request in seconds.
116
117    Returns:
118    bool: True if the POST request is successful (status code 200). Otherwise raises RuntimeError.
119    """
120    req = urllib.request.Request(
121        url=api_url,
122        data=None,
123        headers=headers,
124        origin_req_host=None,
125        unverifiable=False,
126        method="POST",
127    )
128    with urllib.request.urlopen(req, timeout=timeout) as response:
129        if response.status == 200:
130            return True
131        raise RuntimeError(f"POST failed. HTTP status code: [{response.status}]")

Function to make a POST request to a specified API URL.

Args: api_url (str): The URL to which the POST request will be made. headers (dict): The headers to be included in the request. timeout (int): The timeout for the request in seconds.

Returns: bool: True if the POST request is successful (status code 200). Otherwise raises RuntimeError.