cornsnake.util_file

File operations including copying, reading, and writing text to files.

Documentation

  1"""
  2File operations including copying, reading, and writing text to files.
  3
  4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_file.html)
  5"""
  6
  7import datetime
  8import os
  9import shutil
 10
 11from . import util_os
 12from . import util_pdf
 13from . import util_text
 14from . import util_string
 15
 16
 17def backup_file_by_copying(
 18    path_to_file: str, backup_dir: str, backup_filename: str
 19) -> str:
 20    """
 21    Backup the given file by copying it to a new uniquely named file.
 22    """
 23    path_to_backup = os.path.join(backup_dir, backup_filename)
 24    path_to_backup = get_unique_filepath(path_to_backup)
 25    copy_file(path_to_file, path_to_backup)
 26    return path_to_backup
 27
 28
 29def change_extension(input_filename: str, new_extension: str) -> str:
 30    """
 31    Change the extension of the given filename.
 32
 33    Examples:
 34    - ('input1.txt', '.yaml') -> 'input1.yaml')
 35    - ('input2', '.yaml.txt') -> 'input2.yaml.txt')
 36    - ('input3', '.xml') -> 'input3.xml')
 37    - ('input1.txt.zip', '.zip') -> 'input1.zip')
 38    """
 39    if not new_extension.startswith("."):
 40        raise ValueError("new_extension must start with a '.'. For example: '.txt'")
 41    base_filename = input_filename
 42    if "." in input_filename:
 43        parts = input_filename.split(".")
 44        base_filename = ".".join(parts[:-1])
 45    if base_filename.endswith(new_extension):
 46        return base_filename
 47    return base_filename + new_extension
 48
 49
 50def get_file_extension(filename: str, to_lower: bool = True) -> str:
 51    """Get the extension part of the filename - for example '.txt'."""
 52    _, extension = os.path.splitext(filename)
 53    return extension.lower() if to_lower else extension
 54
 55
 56def remove_file_extension(filename: str) -> str:
 57    """Remove the extension part of the file name - for example 'my-file.txt' -> 'my-file'."""
 58    return filename.removesuffix(get_file_extension(filename=filename, to_lower=False))
 59
 60
 61def make_filename_valid(filename: str) -> str:
 62    """
 63    Return an altered filename so that it is valid.
 64    - the new filename will only have alphanumerics, underscore and full-stop.
 65    """
 66    return util_string.filter_string_via_regex(
 67        text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_"
 68    )
 69
 70
 71def copy_file(from_path: str, to_path: str) -> None:
 72    """
 73    Copy a file from one path to another.
 74
 75    Args:
 76    from_path (str): The path of the file to copy.
 77    to_path (str): The destination path to copy the file to.
 78    """
 79    shutil.copyfile(from_path, to_path)
 80
 81
 82def delete_file(path_to_file: str) -> None:
 83    """
 84    Delete a file from the disk.
 85    """
 86    os.remove(path_to_file)
 87
 88
 89def get_modified_date(path_to_file: str) -> datetime.datetime:
 90    return datetime.datetime.fromtimestamp(os.path.getmtime(path_to_file))
 91
 92
 93def get_unique_filepath(path_to_file: str) -> str:
 94    """
 95    Get a unique new filepath, similar to the given path.
 96    """
 97    filename_no_extension, extension = os.path.splitext(path_to_file)
 98
 99    suffix = 2
100    while os.path.exists(path_to_file):
101        path_to_file = f"{filename_no_extension}-{suffix:02}{extension}"
102        suffix += 1
103    return path_to_file
104
105
106def get_this_script_dir(this_file: str) -> str:
107    """
108    Get the directory of the current script file.
109
110    Args:
111    this_file (str): The path of the current script file (__file__).
112
113    Returns:
114    str: The directory of the current script file.
115    """
116    return os.path.dirname(os.path.realpath(this_file))
117
118
119def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool:
120    """
121    Does that file exist under that directory or a sub-directory.
122    """
123    path_to_file = os.path.normpath(path_to_file)
124    path_to_dir = os.path.normpath(path_to_dir) + os.sep
125    return path_to_file.startswith(path_to_dir)
126
127
128def _get_long_file_path(path_to_file: str) -> str:
129    """
130    Get the long file path for Windows.
131
132    Args:
133    path_to_file (str): The original file path.
134
135    Returns:
136    str: The long file path for Windows.
137    """
138    return "\\\\?\\" + path_to_file if util_os.is_windows() else path_to_file
139
140
141def is_empty_directory_only_subdirectories(path_to_file: str) -> bool:
142    """
143    Check if a directory is empty (only subdirectories are empty).
144
145    Args:
146    path_to_file (str): The path to the directory to check.
147
148    Returns:
149    bool: True if the directory is empty, False otherwise.
150    """
151    if os.path.isfile(path_to_file):
152        return is_empty_file(path_to_file)
153    contents = os.listdir(path_to_file)
154    for content in contents:
155        path_to_sub = os.path.join(path_to_file, content)
156        if os.path.isfile(path_to_sub):
157            return is_empty_file(path_to_sub)
158        if not os.path.isfile(path_to_sub):
159            is_empty = is_empty_directory_only_subdirectories(path_to_sub)
160            if not is_empty:
161                return False
162    return True
163
164
165def is_empty_file(path_to_file: str) -> bool:
166    """
167    Check if a file is empty.
168
169    Args:
170    path_to_file (str): The path to the file to check.
171
172    Returns:
173    bool: True if the file is empty, False otherwise.
174    """
175    if not os.path.isfile(path_to_file):
176        return False
177    if os.path.islink(path_to_file):
178        return False
179    fp_allow_long_path = _get_long_file_path(path_to_file)
180    size = os.path.getsize(fp_allow_long_path)
181    return size == 0
182
183
184def move_file(from_filepath: str, to_filepath: str) -> None:
185    """
186    Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.
187
188    If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.
189    """
190    shutil.move(from_filepath, to_filepath)
191
192
193def read_lines_from_file(
194    filepath: str, skip_comments: bool = False, encoding: str = "utf-8"
195) -> list[str]:
196    """
197    Read lines from a text file.
198
199    Args:
200    filepath (str): The path to the text file.
201    skip_comments (bool): Whether to skip lines starting with '#'. Default is False.
202    encoding (str): The file encoding to apply - defaults to utf-8.
203
204    Returns:
205    list: A list of lines read from the file.
206    """
207    lines = []
208    with open(filepath, encoding=encoding) as file:
209        lines = [line.strip() for line in file]
210    if skip_comments:
211        lines = _remove_comments(lines)
212    return lines
213
214
215def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str:
216    """
217    Read text from a text file.
218
219    Args:
220    filepath (str): The path to the text file.
221    encoding (str): The file encoding to apply - defaults to utf-8.
222
223    Returns:
224    str: The text read from the file.
225    """
226    with open(filepath, encoding=encoding) as file:
227        return file.read()
228
229
230def _remove_comments(lines: list[str]) -> list[str]:
231    """
232    Remove lines starting with '#' from a list of lines.
233
234    Args:
235    lines (list): List of lines to filter.
236
237    Returns:
238    list: Filtered list of lines without comments.
239    """
240    filtered_lines = []
241    for line in lines:
242        if not line.startswith("#"):
243            filtered_lines.append(line)
244    return filtered_lines
245
246
247def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str:
248    """
249    Read text from a text or PDF file, skipping comments.
250
251    Args:
252    filepath (str): The path to the text or PDF file.
253
254    Returns:
255    str: The text read from the file without comments.
256    """
257    if util_pdf.is_pdf(filepath):
258        return util_pdf.extract_text_from_pdf(filepath)
259    lines = read_lines_from_file(filepath)
260    filtered_lines = _remove_comments(lines)
261    return util_text.LINE_END.join(filtered_lines)
262
263
264def write_text_lines_to_file(
265    lines: list[str], filepath: str, encoding: str = "utf-8"
266) -> None:
267    """
268    Write lines of text to a text file.
269
270    Args:
271    lines (list): List of lines to write to the file.
272    filepath (str): The path to the output text file.
273    encoding (str): The file encoding to apply - defaults to utf-8.
274    """
275    with open(filepath, encoding=encoding, mode="w") as file:
276        for line in lines:
277            file.write(line + util_text.LINE_END)
278
279
280def write_array_to_file_skipping_empty(
281    path_to_output_text_file: str, lines: list[str]
282) -> None:
283    """
284    Write non-empty lines from an array to a file, skipping empty lines.
285
286    Args:
287    PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file.
288    lines (list): List of lines to write to the file.
289    """
290    with open(path_to_output_text_file, "w") as f:
291        for line in lines:
292            if line is not None and len(line) > 0:
293                f.write(line + "\n")
294
295
296def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None:
297    """
298    Write text to a text file.
299
300    Args:
301    text (str): The text to write to the file.
302    filepath (str): The path to the output text file.
303    encoding (str): The file encoding to apply - defaults to utf-8.
304    """
305    with open(filepath, "w", encoding=encoding) as f:
306        f.write(text)
307
308
309def _get_last_part_of_path(file_path: str, sep: str) -> str:
310    return file_path.split(sep)[-1]
311
312
313def get_last_part_of_path(file_path: str) -> str:
314    """
315    Get the last part of a file path (filename).
316
317    Args:
318    file_path (str): The full file path.
319
320    Returns:
321    str: The last part of the file path (filename).
322    """
323    last_part = _get_last_part_of_path(file_path, os.sep)
324
325    # Windows can sometimes use unix separators (e.g. from bash shell)
326    if "/" in last_part:
327        return _get_last_part_of_path(last_part, "/")
328    return last_part
def backup_file_by_copying(path_to_file: str, backup_dir: str, backup_filename: str) -> str:
18def backup_file_by_copying(
19    path_to_file: str, backup_dir: str, backup_filename: str
20) -> str:
21    """
22    Backup the given file by copying it to a new uniquely named file.
23    """
24    path_to_backup = os.path.join(backup_dir, backup_filename)
25    path_to_backup = get_unique_filepath(path_to_backup)
26    copy_file(path_to_file, path_to_backup)
27    return path_to_backup

Backup the given file by copying it to a new uniquely named file.

def change_extension(input_filename: str, new_extension: str) -> str:
30def change_extension(input_filename: str, new_extension: str) -> str:
31    """
32    Change the extension of the given filename.
33
34    Examples:
35    - ('input1.txt', '.yaml') -> 'input1.yaml')
36    - ('input2', '.yaml.txt') -> 'input2.yaml.txt')
37    - ('input3', '.xml') -> 'input3.xml')
38    - ('input1.txt.zip', '.zip') -> 'input1.zip')
39    """
40    if not new_extension.startswith("."):
41        raise ValueError("new_extension must start with a '.'. For example: '.txt'")
42    base_filename = input_filename
43    if "." in input_filename:
44        parts = input_filename.split(".")
45        base_filename = ".".join(parts[:-1])
46    if base_filename.endswith(new_extension):
47        return base_filename
48    return base_filename + new_extension

Change the extension of the given filename.

Examples:

  • ('input1.txt', '.yaml') -> 'input1.yaml')
  • ('input2', '.yaml.txt') -> 'input2.yaml.txt')
  • ('input3', '.xml') -> 'input3.xml')
  • ('input1.txt.zip', '.zip') -> 'input1.zip')
def get_file_extension(filename: str, to_lower: bool = True) -> str:
51def get_file_extension(filename: str, to_lower: bool = True) -> str:
52    """Get the extension part of the filename - for example '.txt'."""
53    _, extension = os.path.splitext(filename)
54    return extension.lower() if to_lower else extension

Get the extension part of the filename - for example '.txt'.

def remove_file_extension(filename: str) -> str:
57def remove_file_extension(filename: str) -> str:
58    """Remove the extension part of the file name - for example 'my-file.txt' -> 'my-file'."""
59    return filename.removesuffix(get_file_extension(filename=filename, to_lower=False))

Remove the extension part of the file name - for example 'my-file.txt' -> 'my-file'.

def make_filename_valid(filename: str) -> str:
62def make_filename_valid(filename: str) -> str:
63    """
64    Return an altered filename so that it is valid.
65    - the new filename will only have alphanumerics, underscore and full-stop.
66    """
67    return util_string.filter_string_via_regex(
68        text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_"
69    )

Return an altered filename so that it is valid.

  • the new filename will only have alphanumerics, underscore and full-stop.
def copy_file(from_path: str, to_path: str) -> None:
72def copy_file(from_path: str, to_path: str) -> None:
73    """
74    Copy a file from one path to another.
75
76    Args:
77    from_path (str): The path of the file to copy.
78    to_path (str): The destination path to copy the file to.
79    """
80    shutil.copyfile(from_path, to_path)

Copy a file from one path to another.

Args: from_path (str): The path of the file to copy. to_path (str): The destination path to copy the file to.

def delete_file(path_to_file: str) -> None:
83def delete_file(path_to_file: str) -> None:
84    """
85    Delete a file from the disk.
86    """
87    os.remove(path_to_file)

Delete a file from the disk.

def get_modified_date(path_to_file: str) -> datetime.datetime:
90def get_modified_date(path_to_file: str) -> datetime.datetime:
91    return datetime.datetime.fromtimestamp(os.path.getmtime(path_to_file))
def get_unique_filepath(path_to_file: str) -> str:
 94def get_unique_filepath(path_to_file: str) -> str:
 95    """
 96    Get a unique new filepath, similar to the given path.
 97    """
 98    filename_no_extension, extension = os.path.splitext(path_to_file)
 99
100    suffix = 2
101    while os.path.exists(path_to_file):
102        path_to_file = f"{filename_no_extension}-{suffix:02}{extension}"
103        suffix += 1
104    return path_to_file

Get a unique new filepath, similar to the given path.

def get_this_script_dir(this_file: str) -> str:
107def get_this_script_dir(this_file: str) -> str:
108    """
109    Get the directory of the current script file.
110
111    Args:
112    this_file (str): The path of the current script file (__file__).
113
114    Returns:
115    str: The directory of the current script file.
116    """
117    return os.path.dirname(os.path.realpath(this_file))

Get the directory of the current script file.

Args: this_file (str): The path of the current script file (__file__).

Returns: str: The directory of the current script file.

def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool:
120def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool:
121    """
122    Does that file exist under that directory or a sub-directory.
123    """
124    path_to_file = os.path.normpath(path_to_file)
125    path_to_dir = os.path.normpath(path_to_dir) + os.sep
126    return path_to_file.startswith(path_to_dir)

Does that file exist under that directory or a sub-directory.

def is_empty_directory_only_subdirectories(path_to_file: str) -> bool:
142def is_empty_directory_only_subdirectories(path_to_file: str) -> bool:
143    """
144    Check if a directory is empty (only subdirectories are empty).
145
146    Args:
147    path_to_file (str): The path to the directory to check.
148
149    Returns:
150    bool: True if the directory is empty, False otherwise.
151    """
152    if os.path.isfile(path_to_file):
153        return is_empty_file(path_to_file)
154    contents = os.listdir(path_to_file)
155    for content in contents:
156        path_to_sub = os.path.join(path_to_file, content)
157        if os.path.isfile(path_to_sub):
158            return is_empty_file(path_to_sub)
159        if not os.path.isfile(path_to_sub):
160            is_empty = is_empty_directory_only_subdirectories(path_to_sub)
161            if not is_empty:
162                return False
163    return True

Check if a directory is empty (only subdirectories are empty).

Args: path_to_file (str): The path to the directory to check.

Returns: bool: True if the directory is empty, False otherwise.

def is_empty_file(path_to_file: str) -> bool:
166def is_empty_file(path_to_file: str) -> bool:
167    """
168    Check if a file is empty.
169
170    Args:
171    path_to_file (str): The path to the file to check.
172
173    Returns:
174    bool: True if the file is empty, False otherwise.
175    """
176    if not os.path.isfile(path_to_file):
177        return False
178    if os.path.islink(path_to_file):
179        return False
180    fp_allow_long_path = _get_long_file_path(path_to_file)
181    size = os.path.getsize(fp_allow_long_path)
182    return size == 0

Check if a file is empty.

Args: path_to_file (str): The path to the file to check.

Returns: bool: True if the file is empty, False otherwise.

def move_file(from_filepath: str, to_filepath: str) -> None:
185def move_file(from_filepath: str, to_filepath: str) -> None:
186    """
187    Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.
188
189    If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.
190    """
191    shutil.move(from_filepath, to_filepath)

Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.

If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.

def read_lines_from_file( filepath: str, skip_comments: bool = False, encoding: str = 'utf-8') -> list[str]:
194def read_lines_from_file(
195    filepath: str, skip_comments: bool = False, encoding: str = "utf-8"
196) -> list[str]:
197    """
198    Read lines from a text file.
199
200    Args:
201    filepath (str): The path to the text file.
202    skip_comments (bool): Whether to skip lines starting with '#'. Default is False.
203    encoding (str): The file encoding to apply - defaults to utf-8.
204
205    Returns:
206    list: A list of lines read from the file.
207    """
208    lines = []
209    with open(filepath, encoding=encoding) as file:
210        lines = [line.strip() for line in file]
211    if skip_comments:
212        lines = _remove_comments(lines)
213    return lines

Read lines from a text file.

Args: filepath (str): The path to the text file. skip_comments (bool): Whether to skip lines starting with '#'. Default is False. encoding (str): The file encoding to apply - defaults to utf-8.

Returns: list: A list of lines read from the file.

def read_text_from_file(filepath: str, encoding: str = 'utf-8') -> str:
216def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str:
217    """
218    Read text from a text file.
219
220    Args:
221    filepath (str): The path to the text file.
222    encoding (str): The file encoding to apply - defaults to utf-8.
223
224    Returns:
225    str: The text read from the file.
226    """
227    with open(filepath, encoding=encoding) as file:
228        return file.read()

Read text from a text file.

Args: filepath (str): The path to the text file. encoding (str): The file encoding to apply - defaults to utf-8.

Returns: str: The text read from the file.

def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str:
248def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str:
249    """
250    Read text from a text or PDF file, skipping comments.
251
252    Args:
253    filepath (str): The path to the text or PDF file.
254
255    Returns:
256    str: The text read from the file without comments.
257    """
258    if util_pdf.is_pdf(filepath):
259        return util_pdf.extract_text_from_pdf(filepath)
260    lines = read_lines_from_file(filepath)
261    filtered_lines = _remove_comments(lines)
262    return util_text.LINE_END.join(filtered_lines)

Read text from a text or PDF file, skipping comments.

Args: filepath (str): The path to the text or PDF file.

Returns: str: The text read from the file without comments.

def write_text_lines_to_file(lines: list[str], filepath: str, encoding: str = 'utf-8') -> None:
265def write_text_lines_to_file(
266    lines: list[str], filepath: str, encoding: str = "utf-8"
267) -> None:
268    """
269    Write lines of text to a text file.
270
271    Args:
272    lines (list): List of lines to write to the file.
273    filepath (str): The path to the output text file.
274    encoding (str): The file encoding to apply - defaults to utf-8.
275    """
276    with open(filepath, encoding=encoding, mode="w") as file:
277        for line in lines:
278            file.write(line + util_text.LINE_END)

Write lines of text to a text file.

Args: lines (list): List of lines to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.

def write_array_to_file_skipping_empty(path_to_output_text_file: str, lines: list[str]) -> None:
281def write_array_to_file_skipping_empty(
282    path_to_output_text_file: str, lines: list[str]
283) -> None:
284    """
285    Write non-empty lines from an array to a file, skipping empty lines.
286
287    Args:
288    PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file.
289    lines (list): List of lines to write to the file.
290    """
291    with open(path_to_output_text_file, "w") as f:
292        for line in lines:
293            if line is not None and len(line) > 0:
294                f.write(line + "\n")

Write non-empty lines from an array to a file, skipping empty lines.

Args: PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. lines (list): List of lines to write to the file.

def write_text_to_file(text: str, filepath: str, encoding: str = 'utf-8') -> None:
297def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None:
298    """
299    Write text to a text file.
300
301    Args:
302    text (str): The text to write to the file.
303    filepath (str): The path to the output text file.
304    encoding (str): The file encoding to apply - defaults to utf-8.
305    """
306    with open(filepath, "w", encoding=encoding) as f:
307        f.write(text)

Write text to a text file.

Args: text (str): The text to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.

def get_last_part_of_path(file_path: str) -> str:
314def get_last_part_of_path(file_path: str) -> str:
315    """
316    Get the last part of a file path (filename).
317
318    Args:
319    file_path (str): The full file path.
320
321    Returns:
322    str: The last part of the file path (filename).
323    """
324    last_part = _get_last_part_of_path(file_path, os.sep)
325
326    # Windows can sometimes use unix separators (e.g. from bash shell)
327    if "/" in last_part:
328        return _get_last_part_of_path(last_part, "/")
329    return last_part

Get the last part of a file path (filename).

Args: file_path (str): The full file path.

Returns: str: The last part of the file path (filename).