cornsnake.util_file
File operations including copying, reading, and writing text to files.
1""" 2File operations including copying, reading, and writing text to files. 3 4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_file.html) 5""" 6 7import datetime 8import os 9import shutil 10 11from . import util_os 12from . import util_pdf 13from . import util_text 14from . import util_string 15 16 17def backup_file_by_copying( 18 path_to_file: str, backup_dir: str, backup_filename: str 19) -> str: 20 """ 21 Backup the given file by copying it to a new uniquely named file. 22 """ 23 path_to_backup = os.path.join(backup_dir, backup_filename) 24 path_to_backup = get_unique_filepath(path_to_backup) 25 copy_file(path_to_file, path_to_backup) 26 return path_to_backup 27 28 29def change_extension(input_filename: str, new_extension: str) -> str: 30 """ 31 Change the extension of the given filename. 32 33 Examples: 34 - ('input1.txt', '.yaml') -> 'input1.yaml') 35 - ('input2', '.yaml.txt') -> 'input2.yaml.txt') 36 - ('input3', '.xml') -> 'input3.xml') 37 - ('input1.txt.zip', '.zip') -> 'input1.zip') 38 """ 39 if not new_extension.startswith("."): 40 raise ValueError("new_extension must start with a '.'. For example: '.txt'") 41 base_filename = input_filename 42 if "." in input_filename: 43 parts = input_filename.split(".") 44 base_filename = ".".join(parts[:-1]) 45 if base_filename.endswith(new_extension): 46 return base_filename 47 return base_filename + new_extension 48 49 50def get_file_extension(filename: str, to_lower: bool = True) -> str: 51 """Get the extension part of the filename - for example '.txt'.""" 52 _, extension = os.path.splitext(filename) 53 return extension.lower() if to_lower else extension 54 55 56def remove_file_extension(filename: str) -> str: 57 """Remove the extension part of the file name - for example 'my-file.txt' -> 'my-file'.""" 58 return filename.removesuffix(get_file_extension(filename=filename, to_lower=False)) 59 60 61def make_filename_valid(filename: str) -> str: 62 """ 63 Return an altered filename so that it is valid. 64 - the new filename will only have alphanumerics, underscore and full-stop. 65 """ 66 return util_string.filter_string_via_regex( 67 text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_" 68 ) 69 70 71def copy_file(from_path: str, to_path: str) -> None: 72 """ 73 Copy a file from one path to another. 74 75 Args: 76 from_path (str): The path of the file to copy. 77 to_path (str): The destination path to copy the file to. 78 """ 79 shutil.copyfile(from_path, to_path) 80 81 82def delete_file(path_to_file: str) -> None: 83 """ 84 Delete a file from the disk. 85 """ 86 os.remove(path_to_file) 87 88 89def get_modified_date(path_to_file: str) -> datetime.datetime: 90 return datetime.datetime.fromtimestamp(os.path.getmtime(path_to_file)) 91 92 93def get_unique_filepath(path_to_file: str) -> str: 94 """ 95 Get a unique new filepath, similar to the given path. 96 """ 97 filename_no_extension, extension = os.path.splitext(path_to_file) 98 99 suffix = 2 100 while os.path.exists(path_to_file): 101 path_to_file = f"{filename_no_extension}-{suffix:02}{extension}" 102 suffix += 1 103 return path_to_file 104 105 106def get_this_script_dir(this_file: str) -> str: 107 """ 108 Get the directory of the current script file. 109 110 Args: 111 this_file (str): The path of the current script file (__file__). 112 113 Returns: 114 str: The directory of the current script file. 115 """ 116 return os.path.dirname(os.path.realpath(this_file)) 117 118 119def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool: 120 """ 121 Does that file exist under that directory or a sub-directory. 122 """ 123 path_to_file = os.path.normpath(path_to_file) 124 path_to_dir = os.path.normpath(path_to_dir) + os.sep 125 return path_to_file.startswith(path_to_dir) 126 127 128def _get_long_file_path(path_to_file: str) -> str: 129 """ 130 Get the long file path for Windows. 131 132 Args: 133 path_to_file (str): The original file path. 134 135 Returns: 136 str: The long file path for Windows. 137 """ 138 return "\\\\?\\" + path_to_file if util_os.is_windows() else path_to_file 139 140 141def is_empty_directory_only_subdirectories(path_to_file: str) -> bool: 142 """ 143 Check if a directory is empty (only subdirectories are empty). 144 145 Args: 146 path_to_file (str): The path to the directory to check. 147 148 Returns: 149 bool: True if the directory is empty, False otherwise. 150 """ 151 if os.path.isfile(path_to_file): 152 return is_empty_file(path_to_file) 153 contents = os.listdir(path_to_file) 154 for content in contents: 155 path_to_sub = os.path.join(path_to_file, content) 156 if os.path.isfile(path_to_sub): 157 return is_empty_file(path_to_sub) 158 if not os.path.isfile(path_to_sub): 159 is_empty = is_empty_directory_only_subdirectories(path_to_sub) 160 if not is_empty: 161 return False 162 return True 163 164 165def is_empty_file(path_to_file: str) -> bool: 166 """ 167 Check if a file is empty. 168 169 Args: 170 path_to_file (str): The path to the file to check. 171 172 Returns: 173 bool: True if the file is empty, False otherwise. 174 """ 175 if not os.path.isfile(path_to_file): 176 return False 177 if os.path.islink(path_to_file): 178 return False 179 fp_allow_long_path = _get_long_file_path(path_to_file) 180 size = os.path.getsize(fp_allow_long_path) 181 return size == 0 182 183 184def move_file(from_filepath: str, to_filepath: str) -> None: 185 """ 186 Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination. 187 188 If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist. 189 """ 190 shutil.move(from_filepath, to_filepath) 191 192 193def read_lines_from_file( 194 filepath: str, skip_comments: bool = False, encoding: str = "utf-8" 195) -> list[str]: 196 """ 197 Read lines from a text file. 198 199 Args: 200 filepath (str): The path to the text file. 201 skip_comments (bool): Whether to skip lines starting with '#'. Default is False. 202 encoding (str): The file encoding to apply - defaults to utf-8. 203 204 Returns: 205 list: A list of lines read from the file. 206 """ 207 lines = [] 208 with open(filepath, encoding=encoding) as file: 209 lines = [line.strip() for line in file] 210 if skip_comments: 211 lines = _remove_comments(lines) 212 return lines 213 214 215def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str: 216 """ 217 Read text from a text file. 218 219 Args: 220 filepath (str): The path to the text file. 221 encoding (str): The file encoding to apply - defaults to utf-8. 222 223 Returns: 224 str: The text read from the file. 225 """ 226 with open(filepath, encoding=encoding) as file: 227 return file.read() 228 229 230def _remove_comments(lines: list[str]) -> list[str]: 231 """ 232 Remove lines starting with '#' from a list of lines. 233 234 Args: 235 lines (list): List of lines to filter. 236 237 Returns: 238 list: Filtered list of lines without comments. 239 """ 240 filtered_lines = [] 241 for line in lines: 242 if not line.startswith("#"): 243 filtered_lines.append(line) 244 return filtered_lines 245 246 247def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str: 248 """ 249 Read text from a text or PDF file, skipping comments. 250 251 Args: 252 filepath (str): The path to the text or PDF file. 253 254 Returns: 255 str: The text read from the file without comments. 256 """ 257 if util_pdf.is_pdf(filepath): 258 return util_pdf.extract_text_from_pdf(filepath) 259 lines = read_lines_from_file(filepath) 260 filtered_lines = _remove_comments(lines) 261 return util_text.LINE_END.join(filtered_lines) 262 263 264def write_text_lines_to_file( 265 lines: list[str], filepath: str, encoding: str = "utf-8" 266) -> None: 267 """ 268 Write lines of text to a text file. 269 270 Args: 271 lines (list): List of lines to write to the file. 272 filepath (str): The path to the output text file. 273 encoding (str): The file encoding to apply - defaults to utf-8. 274 """ 275 with open(filepath, encoding=encoding, mode="w") as file: 276 for line in lines: 277 file.write(line + util_text.LINE_END) 278 279 280def write_array_to_file_skipping_empty( 281 path_to_output_text_file: str, lines: list[str] 282) -> None: 283 """ 284 Write non-empty lines from an array to a file, skipping empty lines. 285 286 Args: 287 PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. 288 lines (list): List of lines to write to the file. 289 """ 290 with open(path_to_output_text_file, "w") as f: 291 for line in lines: 292 if line is not None and len(line) > 0: 293 f.write(line + "\n") 294 295 296def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None: 297 """ 298 Write text to a text file. 299 300 Args: 301 text (str): The text to write to the file. 302 filepath (str): The path to the output text file. 303 encoding (str): The file encoding to apply - defaults to utf-8. 304 """ 305 with open(filepath, "w", encoding=encoding) as f: 306 f.write(text) 307 308 309def _get_last_part_of_path(file_path: str, sep: str) -> str: 310 return file_path.split(sep)[-1] 311 312 313def get_last_part_of_path(file_path: str) -> str: 314 """ 315 Get the last part of a file path (filename). 316 317 Args: 318 file_path (str): The full file path. 319 320 Returns: 321 str: The last part of the file path (filename). 322 """ 323 last_part = _get_last_part_of_path(file_path, os.sep) 324 325 # Windows can sometimes use unix separators (e.g. from bash shell) 326 if "/" in last_part: 327 return _get_last_part_of_path(last_part, "/") 328 return last_part
18def backup_file_by_copying( 19 path_to_file: str, backup_dir: str, backup_filename: str 20) -> str: 21 """ 22 Backup the given file by copying it to a new uniquely named file. 23 """ 24 path_to_backup = os.path.join(backup_dir, backup_filename) 25 path_to_backup = get_unique_filepath(path_to_backup) 26 copy_file(path_to_file, path_to_backup) 27 return path_to_backup
Backup the given file by copying it to a new uniquely named file.
30def change_extension(input_filename: str, new_extension: str) -> str: 31 """ 32 Change the extension of the given filename. 33 34 Examples: 35 - ('input1.txt', '.yaml') -> 'input1.yaml') 36 - ('input2', '.yaml.txt') -> 'input2.yaml.txt') 37 - ('input3', '.xml') -> 'input3.xml') 38 - ('input1.txt.zip', '.zip') -> 'input1.zip') 39 """ 40 if not new_extension.startswith("."): 41 raise ValueError("new_extension must start with a '.'. For example: '.txt'") 42 base_filename = input_filename 43 if "." in input_filename: 44 parts = input_filename.split(".") 45 base_filename = ".".join(parts[:-1]) 46 if base_filename.endswith(new_extension): 47 return base_filename 48 return base_filename + new_extension
Change the extension of the given filename.
Examples:
- ('input1.txt', '.yaml') -> 'input1.yaml')
- ('input2', '.yaml.txt') -> 'input2.yaml.txt')
- ('input3', '.xml') -> 'input3.xml')
- ('input1.txt.zip', '.zip') -> 'input1.zip')
51def get_file_extension(filename: str, to_lower: bool = True) -> str: 52 """Get the extension part of the filename - for example '.txt'.""" 53 _, extension = os.path.splitext(filename) 54 return extension.lower() if to_lower else extension
Get the extension part of the filename - for example '.txt'.
57def remove_file_extension(filename: str) -> str: 58 """Remove the extension part of the file name - for example 'my-file.txt' -> 'my-file'.""" 59 return filename.removesuffix(get_file_extension(filename=filename, to_lower=False))
Remove the extension part of the file name - for example 'my-file.txt' -> 'my-file'.
62def make_filename_valid(filename: str) -> str: 63 """ 64 Return an altered filename so that it is valid. 65 - the new filename will only have alphanumerics, underscore and full-stop. 66 """ 67 return util_string.filter_string_via_regex( 68 text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_" 69 )
Return an altered filename so that it is valid.
- the new filename will only have alphanumerics, underscore and full-stop.
72def copy_file(from_path: str, to_path: str) -> None: 73 """ 74 Copy a file from one path to another. 75 76 Args: 77 from_path (str): The path of the file to copy. 78 to_path (str): The destination path to copy the file to. 79 """ 80 shutil.copyfile(from_path, to_path)
Copy a file from one path to another.
Args: from_path (str): The path of the file to copy. to_path (str): The destination path to copy the file to.
83def delete_file(path_to_file: str) -> None: 84 """ 85 Delete a file from the disk. 86 """ 87 os.remove(path_to_file)
Delete a file from the disk.
94def get_unique_filepath(path_to_file: str) -> str: 95 """ 96 Get a unique new filepath, similar to the given path. 97 """ 98 filename_no_extension, extension = os.path.splitext(path_to_file) 99 100 suffix = 2 101 while os.path.exists(path_to_file): 102 path_to_file = f"{filename_no_extension}-{suffix:02}{extension}" 103 suffix += 1 104 return path_to_file
Get a unique new filepath, similar to the given path.
107def get_this_script_dir(this_file: str) -> str: 108 """ 109 Get the directory of the current script file. 110 111 Args: 112 this_file (str): The path of the current script file (__file__). 113 114 Returns: 115 str: The directory of the current script file. 116 """ 117 return os.path.dirname(os.path.realpath(this_file))
Get the directory of the current script file.
Args: this_file (str): The path of the current script file (__file__).
Returns: str: The directory of the current script file.
120def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool: 121 """ 122 Does that file exist under that directory or a sub-directory. 123 """ 124 path_to_file = os.path.normpath(path_to_file) 125 path_to_dir = os.path.normpath(path_to_dir) + os.sep 126 return path_to_file.startswith(path_to_dir)
Does that file exist under that directory or a sub-directory.
142def is_empty_directory_only_subdirectories(path_to_file: str) -> bool: 143 """ 144 Check if a directory is empty (only subdirectories are empty). 145 146 Args: 147 path_to_file (str): The path to the directory to check. 148 149 Returns: 150 bool: True if the directory is empty, False otherwise. 151 """ 152 if os.path.isfile(path_to_file): 153 return is_empty_file(path_to_file) 154 contents = os.listdir(path_to_file) 155 for content in contents: 156 path_to_sub = os.path.join(path_to_file, content) 157 if os.path.isfile(path_to_sub): 158 return is_empty_file(path_to_sub) 159 if not os.path.isfile(path_to_sub): 160 is_empty = is_empty_directory_only_subdirectories(path_to_sub) 161 if not is_empty: 162 return False 163 return True
Check if a directory is empty (only subdirectories are empty).
Args: path_to_file (str): The path to the directory to check.
Returns: bool: True if the directory is empty, False otherwise.
166def is_empty_file(path_to_file: str) -> bool: 167 """ 168 Check if a file is empty. 169 170 Args: 171 path_to_file (str): The path to the file to check. 172 173 Returns: 174 bool: True if the file is empty, False otherwise. 175 """ 176 if not os.path.isfile(path_to_file): 177 return False 178 if os.path.islink(path_to_file): 179 return False 180 fp_allow_long_path = _get_long_file_path(path_to_file) 181 size = os.path.getsize(fp_allow_long_path) 182 return size == 0
Check if a file is empty.
Args: path_to_file (str): The path to the file to check.
Returns: bool: True if the file is empty, False otherwise.
185def move_file(from_filepath: str, to_filepath: str) -> None: 186 """ 187 Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination. 188 189 If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist. 190 """ 191 shutil.move(from_filepath, to_filepath)
Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.
If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.
194def read_lines_from_file( 195 filepath: str, skip_comments: bool = False, encoding: str = "utf-8" 196) -> list[str]: 197 """ 198 Read lines from a text file. 199 200 Args: 201 filepath (str): The path to the text file. 202 skip_comments (bool): Whether to skip lines starting with '#'. Default is False. 203 encoding (str): The file encoding to apply - defaults to utf-8. 204 205 Returns: 206 list: A list of lines read from the file. 207 """ 208 lines = [] 209 with open(filepath, encoding=encoding) as file: 210 lines = [line.strip() for line in file] 211 if skip_comments: 212 lines = _remove_comments(lines) 213 return lines
Read lines from a text file.
Args: filepath (str): The path to the text file. skip_comments (bool): Whether to skip lines starting with '#'. Default is False. encoding (str): The file encoding to apply - defaults to utf-8.
Returns: list: A list of lines read from the file.
216def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str: 217 """ 218 Read text from a text file. 219 220 Args: 221 filepath (str): The path to the text file. 222 encoding (str): The file encoding to apply - defaults to utf-8. 223 224 Returns: 225 str: The text read from the file. 226 """ 227 with open(filepath, encoding=encoding) as file: 228 return file.read()
Read text from a text file.
Args: filepath (str): The path to the text file. encoding (str): The file encoding to apply - defaults to utf-8.
Returns: str: The text read from the file.
248def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str: 249 """ 250 Read text from a text or PDF file, skipping comments. 251 252 Args: 253 filepath (str): The path to the text or PDF file. 254 255 Returns: 256 str: The text read from the file without comments. 257 """ 258 if util_pdf.is_pdf(filepath): 259 return util_pdf.extract_text_from_pdf(filepath) 260 lines = read_lines_from_file(filepath) 261 filtered_lines = _remove_comments(lines) 262 return util_text.LINE_END.join(filtered_lines)
Read text from a text or PDF file, skipping comments.
Args: filepath (str): The path to the text or PDF file.
Returns: str: The text read from the file without comments.
265def write_text_lines_to_file( 266 lines: list[str], filepath: str, encoding: str = "utf-8" 267) -> None: 268 """ 269 Write lines of text to a text file. 270 271 Args: 272 lines (list): List of lines to write to the file. 273 filepath (str): The path to the output text file. 274 encoding (str): The file encoding to apply - defaults to utf-8. 275 """ 276 with open(filepath, encoding=encoding, mode="w") as file: 277 for line in lines: 278 file.write(line + util_text.LINE_END)
Write lines of text to a text file.
Args: lines (list): List of lines to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.
281def write_array_to_file_skipping_empty( 282 path_to_output_text_file: str, lines: list[str] 283) -> None: 284 """ 285 Write non-empty lines from an array to a file, skipping empty lines. 286 287 Args: 288 PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. 289 lines (list): List of lines to write to the file. 290 """ 291 with open(path_to_output_text_file, "w") as f: 292 for line in lines: 293 if line is not None and len(line) > 0: 294 f.write(line + "\n")
Write non-empty lines from an array to a file, skipping empty lines.
Args: PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. lines (list): List of lines to write to the file.
297def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None: 298 """ 299 Write text to a text file. 300 301 Args: 302 text (str): The text to write to the file. 303 filepath (str): The path to the output text file. 304 encoding (str): The file encoding to apply - defaults to utf-8. 305 """ 306 with open(filepath, "w", encoding=encoding) as f: 307 f.write(text)
Write text to a text file.
Args: text (str): The text to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.
314def get_last_part_of_path(file_path: str) -> str: 315 """ 316 Get the last part of a file path (filename). 317 318 Args: 319 file_path (str): The full file path. 320 321 Returns: 322 str: The last part of the file path (filename). 323 """ 324 last_part = _get_last_part_of_path(file_path, os.sep) 325 326 # Windows can sometimes use unix separators (e.g. from bash shell) 327 if "/" in last_part: 328 return _get_last_part_of_path(last_part, "/") 329 return last_part
Get the last part of a file path (filename).
Args: file_path (str): The full file path.
Returns: str: The last part of the file path (filename).