cornsnake.util_string

Functions for working with strings: filtering by regex, checking if is mostly empty, replacing whilst maintaining casing, splitting into lines, counting words.

Documentation

  1"""
  2Functions for working with strings: filtering by regex, checking if is mostly empty, replacing whilst maintaining casing, splitting into lines, counting words.
  3
  4[Documentation](http://docs.mrseanryan.cornsnake.s3-website-eu-west-1.amazonaws.com/cornsnake/util_string.html)
  5"""
  6
  7import re
  8import string
  9
 10
 11def count_words(text: str) -> int:
 12    # Remove punctuation
 13    cleaned = text.translate(str.maketrans("", "", string.punctuation))
 14    # Split by whitespace
 15    words = cleaned.split()
 16    return len(words)
 17
 18
 19def filter_string_via_regex(text: str, regex: str, replacement_char: str) -> str:
 20    """
 21    Filter the given string, to only characters that match the given regex.
 22      - characters that do not match are replaced with 'replacement_char'
 23
 24    Example: filter_string_via_regex("this is a test 123 !@#", "_") -> "this_is_a_test_123____"
 25    """
 26
 27    def _is_ok(c: str) -> bool:
 28        return True if re.match(regex, c) else False
 29
 30    def _process_char(c: str) -> str:
 31        return c if _is_ok(c) else replacement_char
 32
 33    return "".join([_process_char(c) for c in text])
 34
 35
 36def is_empty(text: str) -> bool:
 37    """
 38    Function to check if a text string is empty or '-'.
 39
 40    Args:
 41    text (str): The text string to check.
 42
 43    Returns:
 44    bool: True if the text is empty (None or contains only whitespace or a hyphen), False otherwise.
 45    """
 46    if text is None:
 47        return True
 48    stripped = text.strip()
 49    return stripped == "" or stripped == "-"
 50
 51
 52def replace_keep_case(word__for_regex: str, replacement: str, text: str) -> str:
 53    """
 54    Replace ocurrences of 'word__for_regex' in 'text', with 'replacement', trying to maintain the same casing.
 55
 56    - supports lower, titla and upper casing
 57    """
 58
 59    def func(match: re.Match[str]) -> str:
 60        g = match.group()
 61        if g.islower():
 62            return replacement.lower()
 63        if g.istitle():
 64            return replacement.title()
 65        if g.isupper():
 66            return replacement.upper()
 67        return replacement
 68
 69    return re.sub(word__for_regex, func, text, flags=re.I)
 70
 71
 72def shorten(s: str, max_length: int = 40) -> str:
 73    """Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated."""
 74    if len(s) <= max_length:
 75        return s
 76    return s[: max_length - 1] + "…"
 77
 78
 79def shorten_at_end(s: str, max_length: int = 40) -> str:
 80    """[alias for shorten()] Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated."""
 81    return shorten(s, max_length)
 82
 83
 84def shorten_at_middle(s: str, max_length: int = 40) -> str:
 85    """Shorten a string to max_length by removing characters from the middle. Add ellipsis if truncated."""
 86    if len(s) <= max_length:
 87        return s
 88    # Calculate the number of characters to remove
 89    num_chars_to_remove = len(s) - max_length + 1
 90    # Remove characters from the middle
 91    start = (len(s) - num_chars_to_remove) // 2  # // also rounds down
 92    end = start + num_chars_to_remove
 93    return s[:start] + "…" + s[end:]
 94
 95
 96def shorten_at_start(s: str, max_length: int = 40) -> str:
 97    """Shorten a string to max_length by removing characters from the start. Add ellipsis if truncated."""
 98    if len(s) <= max_length:
 99        return s
100    # Calculate the number of characters to remove
101    num_chars_to_remove = len(s) - max_length + 1
102    # Remove characters from the start
103    return "…" + s[num_chars_to_remove:]
104
105
106def split_into_lines(text: str, max_length: int = 200) -> list[str]:
107    """
108    Split text into lines of maximum length at word boundaries.
109
110    Args:
111        text (str): Text to split
112        max_length (int): Maximum line length (default: 200)
113
114    Returns:
115        list: List of lines
116    """
117    if not text:
118        return []
119
120    result = []
121    current_line = ""
122
123    for word in text.split():
124        # Check if adding word would exceed max_length
125        if len(current_line) + len(word) + (1 if current_line else 0) <= max_length:
126            # Add word with a space if not the first word
127            current_line += " " + word if current_line else word
128        else:
129            # Line would be too long, start a new one
130            result.append(current_line)
131            current_line = word
132
133    # Add the last line if it has content
134    if current_line:
135        result.append(current_line)
136
137    return result
def count_words(text: str) -> int:
12def count_words(text: str) -> int:
13    # Remove punctuation
14    cleaned = text.translate(str.maketrans("", "", string.punctuation))
15    # Split by whitespace
16    words = cleaned.split()
17    return len(words)
def filter_string_via_regex(text: str, regex: str, replacement_char: str) -> str:
20def filter_string_via_regex(text: str, regex: str, replacement_char: str) -> str:
21    """
22    Filter the given string, to only characters that match the given regex.
23      - characters that do not match are replaced with 'replacement_char'
24
25    Example: filter_string_via_regex("this is a test 123 !@#", "_") -> "this_is_a_test_123____"
26    """
27
28    def _is_ok(c: str) -> bool:
29        return True if re.match(regex, c) else False
30
31    def _process_char(c: str) -> str:
32        return c if _is_ok(c) else replacement_char
33
34    return "".join([_process_char(c) for c in text])

Filter the given string, to only characters that match the given regex.

  • characters that do not match are replaced with 'replacement_char'

Example: filter_string_via_regex("this is a test 123 !@#", "_") -> "this_is_a_test_123____"

def is_empty(text: str) -> bool:
37def is_empty(text: str) -> bool:
38    """
39    Function to check if a text string is empty or '-'.
40
41    Args:
42    text (str): The text string to check.
43
44    Returns:
45    bool: True if the text is empty (None or contains only whitespace or a hyphen), False otherwise.
46    """
47    if text is None:
48        return True
49    stripped = text.strip()
50    return stripped == "" or stripped == "-"

Function to check if a text string is empty or '-'.

Args: text (str): The text string to check.

Returns: bool: True if the text is empty (None or contains only whitespace or a hyphen), False otherwise.

def replace_keep_case(word__for_regex: str, replacement: str, text: str) -> str:
53def replace_keep_case(word__for_regex: str, replacement: str, text: str) -> str:
54    """
55    Replace ocurrences of 'word__for_regex' in 'text', with 'replacement', trying to maintain the same casing.
56
57    - supports lower, titla and upper casing
58    """
59
60    def func(match: re.Match[str]) -> str:
61        g = match.group()
62        if g.islower():
63            return replacement.lower()
64        if g.istitle():
65            return replacement.title()
66        if g.isupper():
67            return replacement.upper()
68        return replacement
69
70    return re.sub(word__for_regex, func, text, flags=re.I)

Replace ocurrences of 'word__for_regex' in 'text', with 'replacement', trying to maintain the same casing.

  • supports lower, titla and upper casing
def shorten(s: str, max_length: int = 40) -> str:
73def shorten(s: str, max_length: int = 40) -> str:
74    """Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated."""
75    if len(s) <= max_length:
76        return s
77    return s[: max_length - 1] + "…"

Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.

def shorten_at_end(s: str, max_length: int = 40) -> str:
80def shorten_at_end(s: str, max_length: int = 40) -> str:
81    """[alias for shorten()] Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated."""
82    return shorten(s, max_length)

[alias for shorten()] Shorten a string to max_length by removing characters from the end. Add ellipsis if truncated.

def shorten_at_middle(s: str, max_length: int = 40) -> str:
85def shorten_at_middle(s: str, max_length: int = 40) -> str:
86    """Shorten a string to max_length by removing characters from the middle. Add ellipsis if truncated."""
87    if len(s) <= max_length:
88        return s
89    # Calculate the number of characters to remove
90    num_chars_to_remove = len(s) - max_length + 1
91    # Remove characters from the middle
92    start = (len(s) - num_chars_to_remove) // 2  # // also rounds down
93    end = start + num_chars_to_remove
94    return s[:start] + "…" + s[end:]

Shorten a string to max_length by removing characters from the middle. Add ellipsis if truncated.

def shorten_at_start(s: str, max_length: int = 40) -> str:
 97def shorten_at_start(s: str, max_length: int = 40) -> str:
 98    """Shorten a string to max_length by removing characters from the start. Add ellipsis if truncated."""
 99    if len(s) <= max_length:
100        return s
101    # Calculate the number of characters to remove
102    num_chars_to_remove = len(s) - max_length + 1
103    # Remove characters from the start
104    return "…" + s[num_chars_to_remove:]

Shorten a string to max_length by removing characters from the start. Add ellipsis if truncated.

def split_into_lines(text: str, max_length: int = 200) -> list[str]:
107def split_into_lines(text: str, max_length: int = 200) -> list[str]:
108    """
109    Split text into lines of maximum length at word boundaries.
110
111    Args:
112        text (str): Text to split
113        max_length (int): Maximum line length (default: 200)
114
115    Returns:
116        list: List of lines
117    """
118    if not text:
119        return []
120
121    result = []
122    current_line = ""
123
124    for word in text.split():
125        # Check if adding word would exceed max_length
126        if len(current_line) + len(word) + (1 if current_line else 0) <= max_length:
127            # Add word with a space if not the first word
128            current_line += " " + word if current_line else word
129        else:
130            # Line would be too long, start a new one
131            result.append(current_line)
132            current_line = word
133
134    # Add the last line if it has content
135    if current_line:
136        result.append(current_line)
137
138    return result

Split text into lines of maximum length at word boundaries.

Args: text (str): Text to split max_length (int): Maximum line length (default: 200)

Returns: list: List of lines