cornsnake.util_file

File operations including copying, reading, and writing text to files.

Documentation

  1"""
  2File operations including copying, reading, and writing text to files.
  3
  4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_file.html)
  5"""
  6
  7import datetime
  8import os
  9import shutil
 10
 11from . import util_os
 12from . import util_pdf
 13from . import util_text
 14from . import util_string
 15
 16
 17def backup_file_by_copying(
 18    path_to_file: str, backup_dir: str, backup_filename: str
 19) -> str:
 20    """
 21    Backup the given file by copying it to a new uniquely named file.
 22    """
 23    path_to_backup = os.path.join(backup_dir, backup_filename)
 24    path_to_backup = get_unique_filepath(path_to_backup)
 25    copy_file(path_to_file, path_to_backup)
 26    return path_to_backup
 27
 28
 29def change_extension(input_filename: str, new_extension: str) -> str:
 30    """
 31    Change the extension of the given filename.
 32
 33    Examples:
 34    - ('input1.txt', '.yaml') -> 'input1.yaml')
 35    - ('input2', '.yaml.txt') -> 'input2.yaml.txt')
 36    - ('input3', '.xml') -> 'input3.xml')
 37    - ('input1.txt.zip', '.zip') -> 'input1.zip')
 38    """
 39    if not new_extension.startswith("."):
 40        raise ValueError("new_extension must start with a '.'. For example: '.txt'")
 41    base_filename = input_filename
 42    if "." in input_filename:
 43        parts = input_filename.split(".")
 44        base_filename = ".".join(parts[:-1])
 45    if base_filename.endswith(new_extension):
 46        return base_filename
 47    return base_filename + new_extension
 48
 49
 50def make_filename_valid(filename: str) -> str:
 51    """
 52    Return an altered filename so that it is valid.
 53    - the new filename will only have alphanumerics, underscore and full-stop.
 54    """
 55    return util_string.filter_string_via_regex(
 56        text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_"
 57    )
 58
 59
 60def copy_file(from_path: str, to_path: str) -> None:
 61    """
 62    Copy a file from one path to another.
 63
 64    Args:
 65    from_path (str): The path of the file to copy.
 66    to_path (str): The destination path to copy the file to.
 67    """
 68    shutil.copyfile(from_path, to_path)
 69
 70
 71def delete_file(path_to_file: str) -> None:
 72    """
 73    Delete a file from the disk.
 74    """
 75    os.remove(path_to_file)
 76
 77
 78def get_modified_date(path_to_file: str) -> datetime.datetime:
 79    return datetime.datetime.fromtimestamp(os.path.getmtime(path_to_file))
 80
 81
 82def get_unique_filepath(path_to_file: str) -> str:
 83    """
 84    Get a unique new filepath, similar to the given path.
 85    """
 86    filename_no_extension, extension = os.path.splitext(path_to_file)
 87
 88    suffix = 2
 89    while os.path.exists(path_to_file):
 90        path_to_file = f"{filename_no_extension}-{suffix:02}{extension}"
 91        suffix += 1
 92    return path_to_file
 93
 94
 95def get_this_script_dir(this_file: str) -> str:
 96    """
 97    Get the directory of the current script file.
 98
 99    Args:
100    this_file (str): The path of the current script file (__file__).
101
102    Returns:
103    str: The directory of the current script file.
104    """
105    return os.path.dirname(os.path.realpath(this_file))
106
107
108def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool:
109    """
110    Does that file exist under that directory or a sub-directory.
111    """
112    path_to_file = os.path.normpath(path_to_file)
113    path_to_dir = os.path.normpath(path_to_dir) + os.sep
114    return path_to_file.startswith(path_to_dir)
115
116
117def _get_long_file_path(path_to_file: str) -> str:
118    """
119    Get the long file path for Windows.
120
121    Args:
122    path_to_file (str): The original file path.
123
124    Returns:
125    str: The long file path for Windows.
126    """
127    return "\\\\?\\" + path_to_file if util_os.is_windows() else path_to_file
128
129
130def is_empty_directory_only_subdirectories(path_to_file: str) -> bool:
131    """
132    Check if a directory is empty (only subdirectories are empty).
133
134    Args:
135    path_to_file (str): The path to the directory to check.
136
137    Returns:
138    bool: True if the directory is empty, False otherwise.
139    """
140    if os.path.isfile(path_to_file):
141        return is_empty_file(path_to_file)
142    contents = os.listdir(path_to_file)
143    for content in contents:
144        path_to_sub = os.path.join(path_to_file, content)
145        if os.path.isfile(path_to_sub):
146            return is_empty_file(path_to_sub)
147        if not os.path.isfile(path_to_sub):
148            is_empty = is_empty_directory_only_subdirectories(path_to_sub)
149            if not is_empty:
150                return False
151    return True
152
153
154def is_empty_file(path_to_file: str) -> bool:
155    """
156    Check if a file is empty.
157
158    Args:
159    path_to_file (str): The path to the file to check.
160
161    Returns:
162    bool: True if the file is empty, False otherwise.
163    """
164    if not os.path.isfile(path_to_file):
165        return False
166    if os.path.islink(path_to_file):
167        return False
168    fp_allow_long_path = _get_long_file_path(path_to_file)
169    size = os.path.getsize(fp_allow_long_path)
170    return size == 0
171
172
173def move_file(from_filepath: str, to_filepath: str) -> None:
174    """
175    Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.
176
177    If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.
178    """
179    shutil.move(from_filepath, to_filepath)
180
181
182def read_lines_from_file(
183    filepath: str, skip_comments: bool = False, encoding: str = "utf-8"
184) -> list[str]:
185    """
186    Read lines from a text file.
187
188    Args:
189    filepath (str): The path to the text file.
190    skip_comments (bool): Whether to skip lines starting with '#'. Default is False.
191    encoding (str): The file encoding to apply - defaults to utf-8.
192
193    Returns:
194    list: A list of lines read from the file.
195    """
196    lines = []
197    with open(filepath, encoding=encoding) as file:
198        lines = [line.strip() for line in file]
199    if skip_comments:
200        lines = _remove_comments(lines)
201    return lines
202
203
204def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str:
205    """
206    Read text from a text file.
207
208    Args:
209    filepath (str): The path to the text file.
210    encoding (str): The file encoding to apply - defaults to utf-8.
211
212    Returns:
213    str: The text read from the file.
214    """
215    with open(filepath, encoding=encoding) as file:
216        return file.read()
217
218
219def _remove_comments(lines: list[str]) -> list[str]:
220    """
221    Remove lines starting with '#' from a list of lines.
222
223    Args:
224    lines (list): List of lines to filter.
225
226    Returns:
227    list: Filtered list of lines without comments.
228    """
229    filtered_lines = []
230    for line in lines:
231        if not line.startswith("#"):
232            filtered_lines.append(line)
233    return filtered_lines
234
235
236def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str:
237    """
238    Read text from a text or PDF file, skipping comments.
239
240    Args:
241    filepath (str): The path to the text or PDF file.
242
243    Returns:
244    str: The text read from the file without comments.
245    """
246    if util_pdf.is_pdf(filepath):
247        return util_pdf.extract_text_from_pdf(filepath)
248    lines = read_lines_from_file(filepath)
249    filtered_lines = _remove_comments(lines)
250    return util_text.LINE_END.join(filtered_lines)
251
252
253def write_text_lines_to_file(
254    lines: list[str], filepath: str, encoding: str = "utf-8"
255) -> None:
256    """
257    Write lines of text to a text file.
258
259    Args:
260    lines (list): List of lines to write to the file.
261    filepath (str): The path to the output text file.
262    encoding (str): The file encoding to apply - defaults to utf-8.
263    """
264    with open(filepath, encoding=encoding, mode="w") as file:
265        for line in lines:
266            file.write(line + util_text.LINE_END)
267
268
269def write_array_to_file_skipping_empty(
270    path_to_output_text_file: str, lines: list[str]
271) -> None:
272    """
273    Write non-empty lines from an array to a file, skipping empty lines.
274
275    Args:
276    PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file.
277    lines (list): List of lines to write to the file.
278    """
279    with open(path_to_output_text_file, "w") as f:
280        for line in lines:
281            if line is not None and len(line) > 0:
282                f.write(line + "\n")
283
284
285def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None:
286    """
287    Write text to a text file.
288
289    Args:
290    text (str): The text to write to the file.
291    filepath (str): The path to the output text file.
292    encoding (str): The file encoding to apply - defaults to utf-8.
293    """
294    with open(filepath, "w", encoding=encoding) as f:
295        f.write(text)
296
297
298def _get_last_part_of_path(file_path: str, sep: str) -> str:
299    return file_path.split(sep)[-1]
300
301
302def get_last_part_of_path(file_path: str) -> str:
303    """
304    Get the last part of a file path (filename).
305
306    Args:
307    file_path (str): The full file path.
308
309    Returns:
310    str: The last part of the file path (filename).
311    """
312    last_part = _get_last_part_of_path(file_path, os.sep)
313
314    # Windows can sometimes use unix separators (e.g. from bash shell)
315    if "/" in last_part:
316        return _get_last_part_of_path(last_part, "/")
317    return last_part
def backup_file_by_copying(path_to_file: str, backup_dir: str, backup_filename: str) -> str:
18def backup_file_by_copying(
19    path_to_file: str, backup_dir: str, backup_filename: str
20) -> str:
21    """
22    Backup the given file by copying it to a new uniquely named file.
23    """
24    path_to_backup = os.path.join(backup_dir, backup_filename)
25    path_to_backup = get_unique_filepath(path_to_backup)
26    copy_file(path_to_file, path_to_backup)
27    return path_to_backup

Backup the given file by copying it to a new uniquely named file.

def change_extension(input_filename: str, new_extension: str) -> str:
30def change_extension(input_filename: str, new_extension: str) -> str:
31    """
32    Change the extension of the given filename.
33
34    Examples:
35    - ('input1.txt', '.yaml') -> 'input1.yaml')
36    - ('input2', '.yaml.txt') -> 'input2.yaml.txt')
37    - ('input3', '.xml') -> 'input3.xml')
38    - ('input1.txt.zip', '.zip') -> 'input1.zip')
39    """
40    if not new_extension.startswith("."):
41        raise ValueError("new_extension must start with a '.'. For example: '.txt'")
42    base_filename = input_filename
43    if "." in input_filename:
44        parts = input_filename.split(".")
45        base_filename = ".".join(parts[:-1])
46    if base_filename.endswith(new_extension):
47        return base_filename
48    return base_filename + new_extension

Change the extension of the given filename.

Examples:

  • ('input1.txt', '.yaml') -> 'input1.yaml')
  • ('input2', '.yaml.txt') -> 'input2.yaml.txt')
  • ('input3', '.xml') -> 'input3.xml')
  • ('input1.txt.zip', '.zip') -> 'input1.zip')
def make_filename_valid(filename: str) -> str:
51def make_filename_valid(filename: str) -> str:
52    """
53    Return an altered filename so that it is valid.
54    - the new filename will only have alphanumerics, underscore and full-stop.
55    """
56    return util_string.filter_string_via_regex(
57        text=filename, regex="^[a-zA-Z0-9_\\.]+$", replacement_char="_"
58    )

Return an altered filename so that it is valid.

  • the new filename will only have alphanumerics, underscore and full-stop.
def copy_file(from_path: str, to_path: str) -> None:
61def copy_file(from_path: str, to_path: str) -> None:
62    """
63    Copy a file from one path to another.
64
65    Args:
66    from_path (str): The path of the file to copy.
67    to_path (str): The destination path to copy the file to.
68    """
69    shutil.copyfile(from_path, to_path)

Copy a file from one path to another.

Args: from_path (str): The path of the file to copy. to_path (str): The destination path to copy the file to.

def delete_file(path_to_file: str) -> None:
72def delete_file(path_to_file: str) -> None:
73    """
74    Delete a file from the disk.
75    """
76    os.remove(path_to_file)

Delete a file from the disk.

def get_modified_date(path_to_file: str) -> datetime.datetime:
79def get_modified_date(path_to_file: str) -> datetime.datetime:
80    return datetime.datetime.fromtimestamp(os.path.getmtime(path_to_file))
def get_unique_filepath(path_to_file: str) -> str:
83def get_unique_filepath(path_to_file: str) -> str:
84    """
85    Get a unique new filepath, similar to the given path.
86    """
87    filename_no_extension, extension = os.path.splitext(path_to_file)
88
89    suffix = 2
90    while os.path.exists(path_to_file):
91        path_to_file = f"{filename_no_extension}-{suffix:02}{extension}"
92        suffix += 1
93    return path_to_file

Get a unique new filepath, similar to the given path.

def get_this_script_dir(this_file: str) -> str:
 96def get_this_script_dir(this_file: str) -> str:
 97    """
 98    Get the directory of the current script file.
 99
100    Args:
101    this_file (str): The path of the current script file (__file__).
102
103    Returns:
104    str: The directory of the current script file.
105    """
106    return os.path.dirname(os.path.realpath(this_file))

Get the directory of the current script file.

Args: this_file (str): The path of the current script file (__file__).

Returns: str: The directory of the current script file.

def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool:
109def is_file_under_dir(path_to_file: str, path_to_dir: str) -> bool:
110    """
111    Does that file exist under that directory or a sub-directory.
112    """
113    path_to_file = os.path.normpath(path_to_file)
114    path_to_dir = os.path.normpath(path_to_dir) + os.sep
115    return path_to_file.startswith(path_to_dir)

Does that file exist under that directory or a sub-directory.

def is_empty_directory_only_subdirectories(path_to_file: str) -> bool:
131def is_empty_directory_only_subdirectories(path_to_file: str) -> bool:
132    """
133    Check if a directory is empty (only subdirectories are empty).
134
135    Args:
136    path_to_file (str): The path to the directory to check.
137
138    Returns:
139    bool: True if the directory is empty, False otherwise.
140    """
141    if os.path.isfile(path_to_file):
142        return is_empty_file(path_to_file)
143    contents = os.listdir(path_to_file)
144    for content in contents:
145        path_to_sub = os.path.join(path_to_file, content)
146        if os.path.isfile(path_to_sub):
147            return is_empty_file(path_to_sub)
148        if not os.path.isfile(path_to_sub):
149            is_empty = is_empty_directory_only_subdirectories(path_to_sub)
150            if not is_empty:
151                return False
152    return True

Check if a directory is empty (only subdirectories are empty).

Args: path_to_file (str): The path to the directory to check.

Returns: bool: True if the directory is empty, False otherwise.

def is_empty_file(path_to_file: str) -> bool:
155def is_empty_file(path_to_file: str) -> bool:
156    """
157    Check if a file is empty.
158
159    Args:
160    path_to_file (str): The path to the file to check.
161
162    Returns:
163    bool: True if the file is empty, False otherwise.
164    """
165    if not os.path.isfile(path_to_file):
166        return False
167    if os.path.islink(path_to_file):
168        return False
169    fp_allow_long_path = _get_long_file_path(path_to_file)
170    size = os.path.getsize(fp_allow_long_path)
171    return size == 0

Check if a file is empty.

Args: path_to_file (str): The path to the file to check.

Returns: bool: True if the file is empty, False otherwise.

def move_file(from_filepath: str, to_filepath: str) -> None:
174def move_file(from_filepath: str, to_filepath: str) -> None:
175    """
176    Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.
177
178    If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.
179    """
180    shutil.move(from_filepath, to_filepath)

Recursively move a file or directory to another location. This is similar to the Unix "mv" command. Return the file or directory's destination.

If the destination is a directory or a symlink to a directory, the source is moved inside the directory. The destination path must not already exist.

def read_lines_from_file( filepath: str, skip_comments: bool = False, encoding: str = 'utf-8') -> list[str]:
183def read_lines_from_file(
184    filepath: str, skip_comments: bool = False, encoding: str = "utf-8"
185) -> list[str]:
186    """
187    Read lines from a text file.
188
189    Args:
190    filepath (str): The path to the text file.
191    skip_comments (bool): Whether to skip lines starting with '#'. Default is False.
192    encoding (str): The file encoding to apply - defaults to utf-8.
193
194    Returns:
195    list: A list of lines read from the file.
196    """
197    lines = []
198    with open(filepath, encoding=encoding) as file:
199        lines = [line.strip() for line in file]
200    if skip_comments:
201        lines = _remove_comments(lines)
202    return lines

Read lines from a text file.

Args: filepath (str): The path to the text file. skip_comments (bool): Whether to skip lines starting with '#'. Default is False. encoding (str): The file encoding to apply - defaults to utf-8.

Returns: list: A list of lines read from the file.

def read_text_from_file(filepath: str, encoding: str = 'utf-8') -> str:
205def read_text_from_file(filepath: str, encoding: str = "utf-8") -> str:
206    """
207    Read text from a text file.
208
209    Args:
210    filepath (str): The path to the text file.
211    encoding (str): The file encoding to apply - defaults to utf-8.
212
213    Returns:
214    str: The text read from the file.
215    """
216    with open(filepath, encoding=encoding) as file:
217        return file.read()

Read text from a text file.

Args: filepath (str): The path to the text file. encoding (str): The file encoding to apply - defaults to utf-8.

Returns: str: The text read from the file.

def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str:
237def read_text_from_text_or_pdf_file_skipping_comments(filepath: str) -> str:
238    """
239    Read text from a text or PDF file, skipping comments.
240
241    Args:
242    filepath (str): The path to the text or PDF file.
243
244    Returns:
245    str: The text read from the file without comments.
246    """
247    if util_pdf.is_pdf(filepath):
248        return util_pdf.extract_text_from_pdf(filepath)
249    lines = read_lines_from_file(filepath)
250    filtered_lines = _remove_comments(lines)
251    return util_text.LINE_END.join(filtered_lines)

Read text from a text or PDF file, skipping comments.

Args: filepath (str): The path to the text or PDF file.

Returns: str: The text read from the file without comments.

def write_text_lines_to_file(lines: list[str], filepath: str, encoding: str = 'utf-8') -> None:
254def write_text_lines_to_file(
255    lines: list[str], filepath: str, encoding: str = "utf-8"
256) -> None:
257    """
258    Write lines of text to a text file.
259
260    Args:
261    lines (list): List of lines to write to the file.
262    filepath (str): The path to the output text file.
263    encoding (str): The file encoding to apply - defaults to utf-8.
264    """
265    with open(filepath, encoding=encoding, mode="w") as file:
266        for line in lines:
267            file.write(line + util_text.LINE_END)

Write lines of text to a text file.

Args: lines (list): List of lines to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.

def write_array_to_file_skipping_empty(path_to_output_text_file: str, lines: list[str]) -> None:
270def write_array_to_file_skipping_empty(
271    path_to_output_text_file: str, lines: list[str]
272) -> None:
273    """
274    Write non-empty lines from an array to a file, skipping empty lines.
275
276    Args:
277    PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file.
278    lines (list): List of lines to write to the file.
279    """
280    with open(path_to_output_text_file, "w") as f:
281        for line in lines:
282            if line is not None and len(line) > 0:
283                f.write(line + "\n")

Write non-empty lines from an array to a file, skipping empty lines.

Args: PATH_TO_OUTPUT_TEXT_FILE (str): The path to the output text file. lines (list): List of lines to write to the file.

def write_text_to_file(text: str, filepath: str, encoding: str = 'utf-8') -> None:
286def write_text_to_file(text: str, filepath: str, encoding: str = "utf-8") -> None:
287    """
288    Write text to a text file.
289
290    Args:
291    text (str): The text to write to the file.
292    filepath (str): The path to the output text file.
293    encoding (str): The file encoding to apply - defaults to utf-8.
294    """
295    with open(filepath, "w", encoding=encoding) as f:
296        f.write(text)

Write text to a text file.

Args: text (str): The text to write to the file. filepath (str): The path to the output text file. encoding (str): The file encoding to apply - defaults to utf-8.

def get_last_part_of_path(file_path: str) -> str:
303def get_last_part_of_path(file_path: str) -> str:
304    """
305    Get the last part of a file path (filename).
306
307    Args:
308    file_path (str): The full file path.
309
310    Returns:
311    str: The last part of the file path (filename).
312    """
313    last_part = _get_last_part_of_path(file_path, os.sep)
314
315    # Windows can sometimes use unix separators (e.g. from bash shell)
316    if "/" in last_part:
317        return _get_last_part_of_path(last_part, "/")
318    return last_part

Get the last part of a file path (filename).

Args: file_path (str): The full file path.

Returns: str: The last part of the file path (filename).