Source code for absfuyu.dxt.strext

"""
Absfuyu: Data Extension
-----------------------
str extension

Version: 5.1.0
Date updated: 10/03/2025 (dd/mm/yyyy)
"""

# Module Package
# ---------------------------------------------------------------------------
__all__ = ["Text", "TextAnalyzeDictFormat"]


# Library
# ---------------------------------------------------------------------------
import random
from typing import NotRequired, Self, TypedDict

from absfuyu.core import ShowAllMethodsMixin, deprecated, versionadded, versionchanged
from absfuyu.logger import logger
from absfuyu.tools.generator import Charset, Generator
from absfuyu.util import set_min_max


# Class
# ---------------------------------------------------------------------------
[docs] class TextAnalyzeDictFormat(TypedDict): """ Dict format for ``Text.analyze()`` method Parameters ---------- digit : int Number of digit characters uppercase : int Number of uppercase characters lowercase : int Number of lowercase characters other : int Number of other printable characters is_pangram : NotRequired[bool] Is a pangram (Not required) is_palindrome : NotRequired[bool] Is a palindrome (Not required) """ digit: int uppercase: int lowercase: int other: int is_pangram: NotRequired[bool] is_palindrome: NotRequired[bool]
[docs] class Text(ShowAllMethodsMixin, str): """ ``str`` extension """
[docs] def divide(self, string_split_size: int = 60) -> list[str]: """ Divide long string into smaller size Parameters ---------- string_split_size : int Divide string every ``x`` character (Default: ``60``) Returns ------- list[str] A list in which each item is a smaller string with the size of ``string_split_size`` (need to be concaternate later) Example: -------- >>> test = Text("This is an extremely long line of text!") >>> test.divide(string_split_size=20) ['This is an extremely', ' long line of text!'] """ temp = str(self) output = [] while len(temp) != 0: output.append(temp[:string_split_size]) temp = temp[string_split_size:] return output
[docs] def divide_with_variable( self, split_size: int = 60, split_var_len: int = 12, custom_var_name: str | None = None, ) -> list[str]: """ Divide long string into smaller size, then assign a random variable to splited string for later use Parameters ---------- split_size : int Divide string every ``x`` character (Default: ``60``) split_var_len : int Length of variable name assigned to each item (Default: ``12``) custom_var_name : str Custom variable name when join string Returns ------- list[str] A list in which each item is a smaller string with the size of ``split_size`` and a way to concaternate them (when using ``print()``) Example: -------- >>> test = Text("This is an extremely long line of text!") >>> test.divide_with_variable(split_size=20) [ "qNTCnmkFPTJg='This is an extremely'", "vkmLBUykYYDG=' long line of text!'", 'sBoSwEfoxBIH=qNTCnmkFPTJg+vkmLBUykYYDG', 'sBoSwEfoxBIH' ] >>> test = Text("This is an extremely long line of text!") >>> test.divide_with_variable(split_size=20, custom_var_name="test") [ "test1='This is an extremely'", "test2=' long line of text!'", 'test=test1+test2', 'test' ] """ temp = self.divide(split_size) output = [] # split variable splt_len = len(temp) if custom_var_name is None: splt_name = Generator.generate_string( charset=Charset.ALPHABET, size=split_var_len, times=splt_len + 1 ) for i in range(splt_len): output.append(f"{splt_name[i]}='{temp[i]}'") else: for i in range(splt_len): output.append(f"{custom_var_name}{i + 1}='{temp[i]}'") # joined variable temp = [] if custom_var_name is None: for i in range(splt_len): if i == 0: temp.append(f"{splt_name[-1]}=") if i == splt_len - 1: temp.append(f"{splt_name[i]}") else: temp.append(f"{splt_name[i]}+") else: for i in range(splt_len): if i == 0: temp.append(f"{custom_var_name}=") if i == splt_len - 1: temp.append(f"{custom_var_name}{i + 1}") else: temp.append(f"{custom_var_name}{i + 1}+") output.append("".join(temp)) if custom_var_name is None: output.append(splt_name[-1]) else: output.append(custom_var_name) return output
[docs] @versionchanged("3.3.0", reason="Updated functionality") def analyze(self, full: bool = False) -> TextAnalyzeDictFormat: """ String analyze (count number of type of character) Parameters ---------- full : bool Full analyze when ``True`` (Default: ``False``) Returns ------- dict | TextAnalyzeDictFormat A dictionary contains number of digit character, uppercase character, lowercase character, and special character Example: -------- >>> test = Text("Random T3xt!") >>> test.analyze() {'digit': 1, 'uppercase': 2, 'lowercase': 7, 'other': 2} """ temp = self detail: TextAnalyzeDictFormat = { "digit": 0, "uppercase": 0, "lowercase": 0, "other": 0, } for x in temp: if ord(x) in range(48, 58): # num detail["digit"] += 1 elif ord(x) in range(65, 91): # cap detail["uppercase"] += 1 elif ord(x) in range(97, 123): # low detail["lowercase"] += 1 else: detail["other"] += 1 if full: detail["is_palindrome"] = self.is_palindrome() detail["is_pangram"] = self.is_pangram() return detail
[docs] def reverse(self) -> Self: """ Reverse the string Returns ------- Text Reversed string Example: -------- >>> test = Text("Hello, World!") >>> test.reverse() '!dlroW ,olleH' """ return self.__class__(self[::-1])
[docs] @versionchanged("5.0.0", reason="Add ``custom_alphabet`` parameter") def is_pangram(self, custom_alphabet: set[str] | None = None) -> bool: """ Check if string is a pangram A pangram is a unique sentence in which every letter of the alphabet is used at least once Parameters ---------- custom_alphabet : set[str] | None, optional Custom alphabet to use (Default: ``None``) Returns ------- bool | ``True`` if string is a pangram | ``False`` if string is not a pangram """ text = self if custom_alphabet is None: alphabet = set("abcdefghijklmnopqrstuvwxyz") else: alphabet = custom_alphabet return not set(alphabet) - set(text.lower())
[docs] def is_palindrome(self) -> bool: """ Check if string is a palindrome A palindrome is a word, verse, or sentence or a number that reads the same backward or forward Returns ------- bool | ``True`` if string is a palindrome | ``False`` if string is not a palindrome """ text = self # Use string slicing [start:end:step] return text == text[::-1]
[docs] def to_hex(self, raw: bool = False) -> str: r""" Convert string to hex form Parameters ---------- raw : bool | ``False``: hex string in the form of ``\x`` (default) | ``True``: normal hex string Returns ------- str Hexed string Example: -------- >>> test = Text("Hello, World!") >>> test.to_hex() '\\x48\\x65\\x6c\\x6c\\x6f\\x2c\\x20\\x57\\x6f\\x72\\x6c\\x64\\x21' """ text = self byte_str = text.encode("utf-8") # hex_str = byte_str.hex() if raw: return byte_str.hex() temp = byte_str.hex("x") return "\\x" + temp.replace("x", "\\x")
[docs] def random_capslock(self, probability: int = 50) -> Self: """ Randomly capslock letter in string Parameters ---------- probability : int Probability in range [0, 100] (Default: ``50``) Returns ------- Text Random capslocked text Example: -------- >>> test = Text("This is an extremely long line of text!") >>> test.random_capslock() 'tHis iS An ExtREmELY loNg liNE oF tExT!' """ probability = int(set_min_max(probability)) text = self.lower() temp = [] for x in text: if random.randint(1, 100) <= probability: x = x.upper() temp.append(x) logger.debug(temp) return self.__class__("".join(temp))
[docs] @versionchanged("5.0.0", reason="Use ``str.swapcase()``") def reverse_capslock(self) -> Self: """ Reverse capslock in string Returns ------- Text Reversed capslock ``Text`` Example: -------- >>> test = Text("Foo") >>> test.reverse_capslock() 'fOO' """ return self.__class__(self.swapcase())
[docs] def to_list(self) -> list[str]: """ Convert into list Returns ------- list[str] List of string Example: -------- >>> test = Text("test") >>> test.to_list() ['t', 'e', 's', 't'] """ return list(self)
[docs] @deprecated("5.0.0", reason="Unused") def to_listext(self) -> None: """Deprecated, will be removed soon""" raise NotImplementedError("Deprecated, will be removed soon")
[docs] @versionadded("3.3.0") def count_pattern(self, pattern: str, ignore_capslock: bool = False) -> int: """ Returns how many times ``pattern`` appears in text Parameters ---------- pattern : str Pattern to count ignore_capslock : bool Ignore the pattern uppercase or lowercase (Default: ``False`` - Exact match) Returns ------- int How many times pattern appeared Example: -------- >>> Text("test").count_pattern("t") 2 """ if len(pattern) > len(self): raise ValueError(f"len(<pattern>) must not larger than {len(self)}") temp = str(self) if ignore_capslock: pattern = pattern.lower() temp = temp.lower() out = [ 1 for i in range(len(temp) - len(pattern) + 1) if temp[i : i + len(pattern)] == pattern ] return sum(out)
[docs] @versionadded("3.3.0") def hapax(self, strict: bool = False) -> list[str]: """ A hapax legomenon (often abbreviated to hapax) is a word which occurs only once in either the written record of a language, the works of an author, or in a single text. This function returns a list of hapaxes (if any) (Lettercase is ignored) Parameters ---------- strict : bool Remove all special characters before checking for hapax (Default: ``False``) Returns ------- list[str] A list of hapaxes Example: -------- >>> test = Text("A a. a, b c c= C| d d") >>> test.hapax() ['a', 'a.', 'a,', 'b', 'c', 'c=', 'c|'] >>> test.hapax(strict=True) ['b'] """ word_list: list[str] = self.lower().split() if strict: remove_characters: list[str] = list(r"\"'.,:;|()[]{}\/!@#$%^&*-_=+?<>`~") temp = str(self) for x in remove_characters: temp = temp.replace(x, "") word_list = temp.lower().split() hapaxes = filter(lambda x: word_list.count(x) == 1, word_list) return list(hapaxes)
[docs] @versionadded("5.0.0") def shorten(self, shorten_size: int = 60) -> str: """ Shorten long text Parameters ---------- shorten_size : int, optional How many characters per line. Minimum is ``1``, by default ``60`` Returns ------- str Shortened text Example: -------- >>> test = Text("a" * 200) >>> test.shorten() ( 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' 'aaaaaaaaaaaaaaaaaaaa' ) """ shorten_text_list: list[str] = self.divide( string_split_size=max(1, shorten_size) ) shorten_text_list = [repr(x) for x in shorten_text_list] out = "(\n" + "\n".join(shorten_text_list) + "\n)" return out