cornsnake.util_string
Functions for working with strings: filtering by regex, checking if is mostly empty, replacing whilst maintaining casing, splitting into lines, counting words.
1""" 2Functions for working with strings: filtering by regex, checking if is mostly empty, replacing whilst maintaining casing, splitting into lines, counting words. 3 4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_string.html) 5""" 6 7import re 8import string 9 10 11def count_words(text: str) -> int: 12 # Remove punctuation 13 cleaned = text.translate(str.maketrans("", "", string.punctuation)) 14 # Split by whitespace 15 words = cleaned.split() 16 return len(words) 17 18 19def filter_string_via_regex(text: str, regex: str, replacement_char: str) -> str: 20 """ 21 Filter the given string, to only characters that match the given regex. 22 - characters that do not match are replaced with 'replacement_char' 23 24 Example: filter_string_via_regex("this is a test 123 !@#", "_") -> "this_is_a_test_123____" 25 """ 26 27 def _is_ok(c: str) -> bool: 28 return True if re.match(regex, c) else False 29 30 def _process_char(c: str) -> str: 31 return c if _is_ok(c) else replacement_char 32 33 return "".join([_process_char(c) for c in text]) 34 35 36def is_empty(text: str) -> bool: 37 """ 38 Function to check if a text string is empty or '-'. 39 40 Args: 41 text (str): The text string to check. 42 43 Returns: 44 bool: True if the text is empty (None or contains only whitespace or a hyphen), False otherwise. 45 """ 46 if text is None: 47 return True 48 stripped = text.strip() 49 return stripped == "" or stripped == "-" 50 51 52def replace_keep_case(word__for_regex: str, replacement: str, text: str) -> str: 53 """ 54 Replace ocurrences of 'word__for_regex' in 'text', with 'replacement', trying to maintain the same casing. 55 56 - supports lower, titla and upper casing 57 """ 58 59 def func(match: re.Match[str]) -> str: 60 g = match.group() 61 if g.islower(): 62 return replacement.lower() 63 if g.istitle(): 64 return replacement.title() 65 if g.isupper(): 66 return replacement.upper() 67 return replacement 68 69 return re.sub(word__for_regex, func, text, flags=re.I) 70 71 72def shorten(s: str, max_length: int = 40) -> str: 73 """Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.""" 74 if len(s) <= max_length: 75 return s 76 return s[: max_length - 1] + "…" 77 78 79def shorten_at_end(s: str, max_length: int = 40) -> str: 80 """[alias for shorten()] Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.""" 81 return shorten(s, max_length) 82 83 84def shorten_at_middle(s: str, max_length: int = 40) -> str: 85 """Shorten a string to max_length by removing characters from the middle. Add ellipsis if truncated.""" 86 if len(s) <= max_length: 87 return s 88 # Calculate the number of characters to remove 89 num_chars_to_remove = len(s) - max_length + 1 90 # Remove characters from the middle 91 start = (len(s) - num_chars_to_remove) // 2 # // also rounds down 92 end = start + num_chars_to_remove 93 return s[:start] + "…" + s[end:] 94 95 96def shorten_at_start(s: str, max_length: int = 40) -> str: 97 """Shorten a string to max_length by removing characters from the start. Add ellipsis if truncated.""" 98 if len(s) <= max_length: 99 return s 100 # Calculate the number of characters to remove 101 num_chars_to_remove = len(s) - max_length + 1 102 # Remove characters from the start 103 return "…" + s[num_chars_to_remove:] 104 105 106def split_into_lines(text: str, max_length: int = 200) -> list[str]: 107 """ 108 Split text into lines of maximum length at word boundaries. 109 110 Args: 111 text (str): Text to split 112 max_length (int): Maximum line length (default: 200) 113 114 Returns: 115 list: List of lines 116 """ 117 if not text: 118 return [] 119 120 result = [] 121 current_line = "" 122 123 for word in text.split(): 124 # Check if adding word would exceed max_length 125 if len(current_line) + len(word) + (1 if current_line else 0) <= max_length: 126 # Add word with a space if not the first word 127 current_line += " " + word if current_line else word 128 else: 129 # Line would be too long, start a new one 130 result.append(current_line) 131 current_line = word 132 133 # Add the last line if it has content 134 if current_line: 135 result.append(current_line) 136 137 return result
20def filter_string_via_regex(text: str, regex: str, replacement_char: str) -> str: 21 """ 22 Filter the given string, to only characters that match the given regex. 23 - characters that do not match are replaced with 'replacement_char' 24 25 Example: filter_string_via_regex("this is a test 123 !@#", "_") -> "this_is_a_test_123____" 26 """ 27 28 def _is_ok(c: str) -> bool: 29 return True if re.match(regex, c) else False 30 31 def _process_char(c: str) -> str: 32 return c if _is_ok(c) else replacement_char 33 34 return "".join([_process_char(c) for c in text])
Filter the given string, to only characters that match the given regex.
- characters that do not match are replaced with 'replacement_char'
Example: filter_string_via_regex("this is a test 123 !@#", "_") -> "this_is_a_test_123____"
37def is_empty(text: str) -> bool: 38 """ 39 Function to check if a text string is empty or '-'. 40 41 Args: 42 text (str): The text string to check. 43 44 Returns: 45 bool: True if the text is empty (None or contains only whitespace or a hyphen), False otherwise. 46 """ 47 if text is None: 48 return True 49 stripped = text.strip() 50 return stripped == "" or stripped == "-"
Function to check if a text string is empty or '-'.
Args: text (str): The text string to check.
Returns: bool: True if the text is empty (None or contains only whitespace or a hyphen), False otherwise.
53def replace_keep_case(word__for_regex: str, replacement: str, text: str) -> str: 54 """ 55 Replace ocurrences of 'word__for_regex' in 'text', with 'replacement', trying to maintain the same casing. 56 57 - supports lower, titla and upper casing 58 """ 59 60 def func(match: re.Match[str]) -> str: 61 g = match.group() 62 if g.islower(): 63 return replacement.lower() 64 if g.istitle(): 65 return replacement.title() 66 if g.isupper(): 67 return replacement.upper() 68 return replacement 69 70 return re.sub(word__for_regex, func, text, flags=re.I)
Replace ocurrences of 'word__for_regex' in 'text', with 'replacement', trying to maintain the same casing.
- supports lower, titla and upper casing
73def shorten(s: str, max_length: int = 40) -> str: 74 """Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.""" 75 if len(s) <= max_length: 76 return s 77 return s[: max_length - 1] + "…"
Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.
80def shorten_at_end(s: str, max_length: int = 40) -> str: 81 """[alias for shorten()] Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.""" 82 return shorten(s, max_length)
[alias for shorten()] Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.
85def shorten_at_middle(s: str, max_length: int = 40) -> str: 86 """Shorten a string to max_length by removing characters from the middle. Add ellipsis if truncated.""" 87 if len(s) <= max_length: 88 return s 89 # Calculate the number of characters to remove 90 num_chars_to_remove = len(s) - max_length + 1 91 # Remove characters from the middle 92 start = (len(s) - num_chars_to_remove) // 2 # // also rounds down 93 end = start + num_chars_to_remove 94 return s[:start] + "…" + s[end:]
Shorten a string to max_length by removing characters from the middle. Add ellipsis if truncated.
97def shorten_at_start(s: str, max_length: int = 40) -> str: 98 """Shorten a string to max_length by removing characters from the start. Add ellipsis if truncated.""" 99 if len(s) <= max_length: 100 return s 101 # Calculate the number of characters to remove 102 num_chars_to_remove = len(s) - max_length + 1 103 # Remove characters from the start 104 return "…" + s[num_chars_to_remove:]
Shorten a string to max_length by removing characters from the start. Add ellipsis if truncated.
107def split_into_lines(text: str, max_length: int = 200) -> list[str]: 108 """ 109 Split text into lines of maximum length at word boundaries. 110 111 Args: 112 text (str): Text to split 113 max_length (int): Maximum line length (default: 200) 114 115 Returns: 116 list: List of lines 117 """ 118 if not text: 119 return [] 120 121 result = [] 122 current_line = "" 123 124 for word in text.split(): 125 # Check if adding word would exceed max_length 126 if len(current_line) + len(word) + (1 if current_line else 0) <= max_length: 127 # Add word with a space if not the first word 128 current_line += " " + word if current_line else word 129 else: 130 # Line would be too long, start a new one 131 result.append(current_line) 132 current_line = word 133 134 # Add the last line if it has content 135 if current_line: 136 result.append(current_line) 137 138 return result
Split text into lines of maximum length at word boundaries.
Args: text (str): Text to split max_length (int): Maximum line length (default: 200)
Returns: list: List of lines