cornsnake.util_file
File operations including copying, reading, and writing text to files.
1""" 2File operations including copying, reading, and writing text to files. 3 4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_file.html) 5""" 6 7import datetime 8import os 9import shutil 10 11from . import util_os 12from . import util_pdf 13from . import util_text 14from . import util_string 15 16 17def backup_file_by_copying( 18 path_to_file: str, backup_dir: str, backup_filename: str 19) -> str: 20 """ 21 Backup the given file by copying it to a new uniquely named file. 22 """ 23 path_to_backup = os.path.join(backup_dir, backup_filename) 24 path_to_backup = get_unique_filepath(path_to_backup) 25 copy_file(path_to_file, path_to_backup) 26 return path_to_backup 27 28 29def change_extension(input_filename: str, new_extension: str) -> str: 30 """ 31 Change the extension of the given filename. 32 33 Examples: 34 - ('input1.txt', '.yaml') -> 'input1.yaml') 35 - ('input2', '.yaml.txt') -> 'input2.yaml.txt') 36 - ('input3', '.xml') -> 'input3.xml') 37 - ('input1.txt.zip', '.zip') -> 'input1.zip') 38 """ 39 if not new_extension.startswith("."): 40 raise ValueError("new_extension must start with a '.'. For example: '.txt'") 41 base_filename = input_filename 42 if "." in input_filename: 43 parts = input_filename.split(".") 44 base_filename = ".".join(parts[:-1]) 45 if base_filename.endswith(new_extension): 46 return base_filename 47 return base_filename + new_extension 48 49 50def make_filename_valid(filename: str) -> str: 51 """ 52 Return an altered filename so that it is valid. 53 - the new filename will only have alphanumerics, underscore and full-stop. 54 """ 55 return util_string.filter_string_via_regex( 56 text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_" 57 ) 58 59 60def copy_file(from_path: str, to_path: str) -> None: 61 """ 62 Copy a file from one path to another. 63 64 Args: 65 from_path (str): The path of the file to copy. 66 to_path (str): The destination path to copy the file to. 67 """ 68 shutil.copyfile(from_path, to_path) 69 70 71def delete_file(path_to_file: str) -> None: 72 """ 73 Delete a file from the disk. 74 """ 75 os.remove(path_to_file) 76 77 78def get_modified_date(path_to_file: str) -> datetime.datetime: 79 return datetime.datetime.fromtimestamp(os.path.getmtime(path_to_file)) 80 81 82def get_unique_filepath(path_to_file: str) -> str: 83 """ 84 Get a unique new filepath, similar to the given path. 85 """ 86 filename_no_extension, extension = os.path.splitext(path_to_file) 87 88 suffix = 2 89 while os.path.exists(path_to_file): 90 path_to_file = f"{filename_no_extension}-{suffix:02}{extension}" 91 suffix += 1 92 return path_to_file 93 94 95def get_this_script_dir(this_file: str) -> str: 96 """ 97 Get the directory of the current script file. 98 99 Args: 100 this_file (str): The path of the current script file (__file__). 101 102 Returns: 103 str: The directory of the current script file. 104 """ 105 return os.path.dirname(os.path.realpath(this_file)) 106 107 108def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool: 109 """ 110 Does that file exist under that directory or a sub-directory. 111 """ 112 path_to_file = os.path.normpath(path_to_file) 113 path_to_dir = os.path.normpath(path_to_dir) + os.sep 114 return path_to_file.startswith(path_to_dir) 115 116 117def _get_long_file_path(path_to_file: str) -> str: 118 """ 119 Get the long file path for Windows. 120 121 Args: 122 path_to_file (str): The original file path. 123 124 Returns: 125 str: The long file path for Windows. 126 """ 127 return "\\\\?\\" + path_to_file if util_os.is_windows() else path_to_file 128 129 130def is_empty_directory_only_subdirectories(path_to_file: str) -> bool: 131 """ 132 Check if a directory is empty (only subdirectories are empty). 133 134 Args: 135 path_to_file (str): The path to the directory to check. 136 137 Returns: 138 bool: True if the directory is empty, False otherwise. 139 """ 140 if os.path.isfile(path_to_file): 141 return is_empty_file(path_to_file) 142 contents = os.listdir(path_to_file) 143 for content in contents: 144 path_to_sub = os.path.join(path_to_file, content) 145 if os.path.isfile(path_to_sub): 146 return is_empty_file(path_to_sub) 147 if not os.path.isfile(path_to_sub): 148 is_empty = is_empty_directory_only_subdirectories(path_to_sub) 149 if not is_empty: 150 return False 151 return True 152 153 154def is_empty_file(path_to_file: str) -> bool: 155 """ 156 Check if a file is empty. 157 158 Args: 159 path_to_file (str): The path to the file to check. 160 161 Returns: 162 bool: True if the file is empty, False otherwise. 163 """ 164 if not os.path.isfile(path_to_file): 165 return False 166 if os.path.islink(path_to_file): 167 return False 168 fp_allow_long_path = _get_long_file_path(path_to_file) 169 size = os.path.getsize(fp_allow_long_path) 170 return size == 0 171 172 173def move_file(from_filepath: str, to_filepath: str) -> None: 174 """ 175 Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination. 176 177 If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist. 178 """ 179 shutil.move(from_filepath, to_filepath) 180 181 182def read_lines_from_file( 183 filepath: str, skip_comments: bool = False, encoding: str = "utf-8" 184) -> list[str]: 185 """ 186 Read lines from a text file. 187 188 Args: 189 filepath (str): The path to the text file. 190 skip_comments (bool): Whether to skip lines starting with '#'. Default is False. 191 encoding (str): The file encoding to apply - defaults to utf-8. 192 193 Returns: 194 list: A list of lines read from the file. 195 """ 196 lines = [] 197 with open(filepath, encoding=encoding) as file: 198 lines = [line.strip() for line in file] 199 if skip_comments: 200 lines = _remove_comments(lines) 201 return lines 202 203 204def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str: 205 """ 206 Read text from a text file. 207 208 Args: 209 filepath (str): The path to the text file. 210 encoding (str): The file encoding to apply - defaults to utf-8. 211 212 Returns: 213 str: The text read from the file. 214 """ 215 with open(filepath, encoding=encoding) as file: 216 return file.read() 217 218 219def _remove_comments(lines: list[str]) -> list[str]: 220 """ 221 Remove lines starting with '#' from a list of lines. 222 223 Args: 224 lines (list): List of lines to filter. 225 226 Returns: 227 list: Filtered list of lines without comments. 228 """ 229 filtered_lines = [] 230 for line in lines: 231 if not line.startswith("#"): 232 filtered_lines.append(line) 233 return filtered_lines 234 235 236def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str: 237 """ 238 Read text from a text or PDF file, skipping comments. 239 240 Args: 241 filepath (str): The path to the text or PDF file. 242 243 Returns: 244 str: The text read from the file without comments. 245 """ 246 if util_pdf.is_pdf(filepath): 247 return util_pdf.extract_text_from_pdf(filepath) 248 lines = read_lines_from_file(filepath) 249 filtered_lines = _remove_comments(lines) 250 return util_text.LINE_END.join(filtered_lines) 251 252 253def write_text_lines_to_file( 254 lines: list[str], filepath: str, encoding: str = "utf-8" 255) -> None: 256 """ 257 Write lines of text to a text file. 258 259 Args: 260 lines (list): List of lines to write to the file. 261 filepath (str): The path to the output text file. 262 encoding (str): The file encoding to apply - defaults to utf-8. 263 """ 264 with open(filepath, encoding=encoding, mode="w") as file: 265 for line in lines: 266 file.write(line + util_text.LINE_END) 267 268 269def write_array_to_file_skipping_empty( 270 path_to_output_text_file: str, lines: list[str] 271) -> None: 272 """ 273 Write non-empty lines from an array to a file, skipping empty lines. 274 275 Args: 276 PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. 277 lines (list): List of lines to write to the file. 278 """ 279 with open(path_to_output_text_file, "w") as f: 280 for line in lines: 281 if line is not None and len(line) > 0: 282 f.write(line + "\n") 283 284 285def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None: 286 """ 287 Write text to a text file. 288 289 Args: 290 text (str): The text to write to the file. 291 filepath (str): The path to the output text file. 292 encoding (str): The file encoding to apply - defaults to utf-8. 293 """ 294 with open(filepath, "w", encoding=encoding) as f: 295 f.write(text) 296 297 298def _get_last_part_of_path(file_path: str, sep: str) -> str: 299 return file_path.split(sep)[-1] 300 301 302def get_last_part_of_path(file_path: str) -> str: 303 """ 304 Get the last part of a file path (filename). 305 306 Args: 307 file_path (str): The full file path. 308 309 Returns: 310 str: The last part of the file path (filename). 311 """ 312 last_part = _get_last_part_of_path(file_path, os.sep) 313 314 # Windows can sometimes use unix separators (e.g. from bash shell) 315 if "/" in last_part: 316 return _get_last_part_of_path(last_part, "/") 317 return last_part
18def backup_file_by_copying( 19 path_to_file: str, backup_dir: str, backup_filename: str 20) -> str: 21 """ 22 Backup the given file by copying it to a new uniquely named file. 23 """ 24 path_to_backup = os.path.join(backup_dir, backup_filename) 25 path_to_backup = get_unique_filepath(path_to_backup) 26 copy_file(path_to_file, path_to_backup) 27 return path_to_backup
Backup the given file by copying it to a new uniquely named file.
30def change_extension(input_filename: str, new_extension: str) -> str: 31 """ 32 Change the extension of the given filename. 33 34 Examples: 35 - ('input1.txt', '.yaml') -> 'input1.yaml') 36 - ('input2', '.yaml.txt') -> 'input2.yaml.txt') 37 - ('input3', '.xml') -> 'input3.xml') 38 - ('input1.txt.zip', '.zip') -> 'input1.zip') 39 """ 40 if not new_extension.startswith("."): 41 raise ValueError("new_extension must start with a '.'. For example: '.txt'") 42 base_filename = input_filename 43 if "." in input_filename: 44 parts = input_filename.split(".") 45 base_filename = ".".join(parts[:-1]) 46 if base_filename.endswith(new_extension): 47 return base_filename 48 return base_filename + new_extension
Change the extension of the given filename.
Examples:
- ('input1.txt', '.yaml') -> 'input1.yaml')
- ('input2', '.yaml.txt') -> 'input2.yaml.txt')
- ('input3', '.xml') -> 'input3.xml')
- ('input1.txt.zip', '.zip') -> 'input1.zip')
51def make_filename_valid(filename: str) -> str: 52 """ 53 Return an altered filename so that it is valid. 54 - the new filename will only have alphanumerics, underscore and full-stop. 55 """ 56 return util_string.filter_string_via_regex( 57 text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_" 58 )
Return an altered filename so that it is valid.
- the new filename will only have alphanumerics, underscore and full-stop.
61def copy_file(from_path: str, to_path: str) -> None: 62 """ 63 Copy a file from one path to another. 64 65 Args: 66 from_path (str): The path of the file to copy. 67 to_path (str): The destination path to copy the file to. 68 """ 69 shutil.copyfile(from_path, to_path)
Copy a file from one path to another.
Args: from_path (str): The path of the file to copy. to_path (str): The destination path to copy the file to.
72def delete_file(path_to_file: str) -> None: 73 """ 74 Delete a file from the disk. 75 """ 76 os.remove(path_to_file)
Delete a file from the disk.
83def get_unique_filepath(path_to_file: str) -> str: 84 """ 85 Get a unique new filepath, similar to the given path. 86 """ 87 filename_no_extension, extension = os.path.splitext(path_to_file) 88 89 suffix = 2 90 while os.path.exists(path_to_file): 91 path_to_file = f"{filename_no_extension}-{suffix:02}{extension}" 92 suffix += 1 93 return path_to_file
Get a unique new filepath, similar to the given path.
96def get_this_script_dir(this_file: str) -> str: 97 """ 98 Get the directory of the current script file. 99 100 Args: 101 this_file (str): The path of the current script file (__file__). 102 103 Returns: 104 str: The directory of the current script file. 105 """ 106 return os.path.dirname(os.path.realpath(this_file))
Get the directory of the current script file.
Args: this_file (str): The path of the current script file (__file__).
Returns: str: The directory of the current script file.
109def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool: 110 """ 111 Does that file exist under that directory or a sub-directory. 112 """ 113 path_to_file = os.path.normpath(path_to_file) 114 path_to_dir = os.path.normpath(path_to_dir) + os.sep 115 return path_to_file.startswith(path_to_dir)
Does that file exist under that directory or a sub-directory.
131def is_empty_directory_only_subdirectories(path_to_file: str) -> bool: 132 """ 133 Check if a directory is empty (only subdirectories are empty). 134 135 Args: 136 path_to_file (str): The path to the directory to check. 137 138 Returns: 139 bool: True if the directory is empty, False otherwise. 140 """ 141 if os.path.isfile(path_to_file): 142 return is_empty_file(path_to_file) 143 contents = os.listdir(path_to_file) 144 for content in contents: 145 path_to_sub = os.path.join(path_to_file, content) 146 if os.path.isfile(path_to_sub): 147 return is_empty_file(path_to_sub) 148 if not os.path.isfile(path_to_sub): 149 is_empty = is_empty_directory_only_subdirectories(path_to_sub) 150 if not is_empty: 151 return False 152 return True
Check if a directory is empty (only subdirectories are empty).
Args: path_to_file (str): The path to the directory to check.
Returns: bool: True if the directory is empty, False otherwise.
155def is_empty_file(path_to_file: str) -> bool: 156 """ 157 Check if a file is empty. 158 159 Args: 160 path_to_file (str): The path to the file to check. 161 162 Returns: 163 bool: True if the file is empty, False otherwise. 164 """ 165 if not os.path.isfile(path_to_file): 166 return False 167 if os.path.islink(path_to_file): 168 return False 169 fp_allow_long_path = _get_long_file_path(path_to_file) 170 size = os.path.getsize(fp_allow_long_path) 171 return size == 0
Check if a file is empty.
Args: path_to_file (str): The path to the file to check.
Returns: bool: True if the file is empty, False otherwise.
174def move_file(from_filepath: str, to_filepath: str) -> None: 175 """ 176 Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination. 177 178 If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist. 179 """ 180 shutil.move(from_filepath, to_filepath)
Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.
If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.
183def read_lines_from_file( 184 filepath: str, skip_comments: bool = False, encoding: str = "utf-8" 185) -> list[str]: 186 """ 187 Read lines from a text file. 188 189 Args: 190 filepath (str): The path to the text file. 191 skip_comments (bool): Whether to skip lines starting with '#'. Default is False. 192 encoding (str): The file encoding to apply - defaults to utf-8. 193 194 Returns: 195 list: A list of lines read from the file. 196 """ 197 lines = [] 198 with open(filepath, encoding=encoding) as file: 199 lines = [line.strip() for line in file] 200 if skip_comments: 201 lines = _remove_comments(lines) 202 return lines
Read lines from a text file.
Args: filepath (str): The path to the text file. skip_comments (bool): Whether to skip lines starting with '#'. Default is False. encoding (str): The file encoding to apply - defaults to utf-8.
Returns: list: A list of lines read from the file.
205def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str: 206 """ 207 Read text from a text file. 208 209 Args: 210 filepath (str): The path to the text file. 211 encoding (str): The file encoding to apply - defaults to utf-8. 212 213 Returns: 214 str: The text read from the file. 215 """ 216 with open(filepath, encoding=encoding) as file: 217 return file.read()
Read text from a text file.
Args: filepath (str): The path to the text file. encoding (str): The file encoding to apply - defaults to utf-8.
Returns: str: The text read from the file.
237def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str: 238 """ 239 Read text from a text or PDF file, skipping comments. 240 241 Args: 242 filepath (str): The path to the text or PDF file. 243 244 Returns: 245 str: The text read from the file without comments. 246 """ 247 if util_pdf.is_pdf(filepath): 248 return util_pdf.extract_text_from_pdf(filepath) 249 lines = read_lines_from_file(filepath) 250 filtered_lines = _remove_comments(lines) 251 return util_text.LINE_END.join(filtered_lines)
Read text from a text or PDF file, skipping comments.
Args: filepath (str): The path to the text or PDF file.
Returns: str: The text read from the file without comments.
254def write_text_lines_to_file( 255 lines: list[str], filepath: str, encoding: str = "utf-8" 256) -> None: 257 """ 258 Write lines of text to a text file. 259 260 Args: 261 lines (list): List of lines to write to the file. 262 filepath (str): The path to the output text file. 263 encoding (str): The file encoding to apply - defaults to utf-8. 264 """ 265 with open(filepath, encoding=encoding, mode="w") as file: 266 for line in lines: 267 file.write(line + util_text.LINE_END)
Write lines of text to a text file.
Args: lines (list): List of lines to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.
270def write_array_to_file_skipping_empty( 271 path_to_output_text_file: str, lines: list[str] 272) -> None: 273 """ 274 Write non-empty lines from an array to a file, skipping empty lines. 275 276 Args: 277 PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. 278 lines (list): List of lines to write to the file. 279 """ 280 with open(path_to_output_text_file, "w") as f: 281 for line in lines: 282 if line is not None and len(line) > 0: 283 f.write(line + "\n")
Write non-empty lines from an array to a file, skipping empty lines.
Args: PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. lines (list): List of lines to write to the file.
286def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None: 287 """ 288 Write text to a text file. 289 290 Args: 291 text (str): The text to write to the file. 292 filepath (str): The path to the output text file. 293 encoding (str): The file encoding to apply - defaults to utf-8. 294 """ 295 with open(filepath, "w", encoding=encoding) as f: 296 f.write(text)
Write text to a text file.
Args: text (str): The text to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.
303def get_last_part_of_path(file_path: str) -> str: 304 """ 305 Get the last part of a file path (filename). 306 307 Args: 308 file_path (str): The full file path. 309 310 Returns: 311 str: The last part of the file path (filename). 312 """ 313 last_part = _get_last_part_of_path(file_path, os.sep) 314 315 # Windows can sometimes use unix separators (e.g. from bash shell) 316 if "/" in last_part: 317 return _get_last_part_of_path(last_part, "/") 318 return last_part
Get the last part of a file path (filename).
Args: file_path (str): The full file path.
Returns: str: The last part of the file path (filename).