"""
Absfuyu: Data Extension
-----------------------
str extension
Version: 5.1.0
Date updated: 10/03/2025 (dd/mm/yyyy)
"""
# Module Package
# ---------------------------------------------------------------------------
__all__ = ["Text", "TextAnalyzeDictFormat"]
# Library
# ---------------------------------------------------------------------------
import random
from typing import NotRequired, Self, TypedDict
from absfuyu.core import ShowAllMethodsMixin, deprecated, versionadded, versionchanged
from absfuyu.logger import logger
from absfuyu.tools.generator import Charset, Generator
from absfuyu.util import set_min_max
# Class
# ---------------------------------------------------------------------------
[docs]
class TextAnalyzeDictFormat(TypedDict):
"""
Dict format for ``Text.analyze()`` method
Parameters
----------
digit : int
Number of digit characters
uppercase : int
Number of uppercase characters
lowercase : int
Number of lowercase characters
other : int
Number of other printable characters
is_pangram : NotRequired[bool]
Is a pangram (Not required)
is_palindrome : NotRequired[bool]
Is a palindrome (Not required)
"""
digit: int
uppercase: int
lowercase: int
other: int
is_pangram: NotRequired[bool]
is_palindrome: NotRequired[bool]
[docs]
class Text(ShowAllMethodsMixin, str):
"""
``str`` extension
"""
[docs]
def divide(self, string_split_size: int = 60) -> list[str]:
"""
Divide long string into smaller size
Parameters
----------
string_split_size : int
Divide string every ``x`` character
(Default: ``60``)
Returns
-------
list[str]
A list in which each item is a smaller
string with the size of ``string_split_size``
(need to be concaternate later)
Example:
--------
>>> test = Text("This is an extremely long line of text!")
>>> test.divide(string_split_size=20)
['This is an extremely', ' long line of text!']
"""
temp = str(self)
output = []
while len(temp) != 0:
output.append(temp[:string_split_size])
temp = temp[string_split_size:]
return output
[docs]
def divide_with_variable(
self,
split_size: int = 60,
split_var_len: int = 12,
custom_var_name: str | None = None,
) -> list[str]:
"""
Divide long string into smaller size,
then assign a random variable to splited
string for later use
Parameters
----------
split_size : int
Divide string every ``x`` character
(Default: ``60``)
split_var_len : int
Length of variable name assigned to each item
(Default: ``12``)
custom_var_name : str
Custom variable name when join string
Returns
-------
list[str]
A list in which each item is a smaller
string with the size of ``split_size``
and a way to concaternate them (when using ``print()``)
Example:
--------
>>> test = Text("This is an extremely long line of text!")
>>> test.divide_with_variable(split_size=20)
[
"qNTCnmkFPTJg='This is an extremely'",
"vkmLBUykYYDG=' long line of text!'",
'sBoSwEfoxBIH=qNTCnmkFPTJg+vkmLBUykYYDG',
'sBoSwEfoxBIH'
]
>>> test = Text("This is an extremely long line of text!")
>>> test.divide_with_variable(split_size=20, custom_var_name="test")
[
"test1='This is an extremely'",
"test2=' long line of text!'",
'test=test1+test2',
'test'
]
"""
temp = self.divide(split_size)
output = []
# split variable
splt_len = len(temp)
if custom_var_name is None:
splt_name = Generator.generate_string(
charset=Charset.ALPHABET, size=split_var_len, times=splt_len + 1
)
for i in range(splt_len):
output.append(f"{splt_name[i]}='{temp[i]}'")
else:
for i in range(splt_len):
output.append(f"{custom_var_name}{i + 1}='{temp[i]}'")
# joined variable
temp = []
if custom_var_name is None:
for i in range(splt_len):
if i == 0:
temp.append(f"{splt_name[-1]}=")
if i == splt_len - 1:
temp.append(f"{splt_name[i]}")
else:
temp.append(f"{splt_name[i]}+")
else:
for i in range(splt_len):
if i == 0:
temp.append(f"{custom_var_name}=")
if i == splt_len - 1:
temp.append(f"{custom_var_name}{i + 1}")
else:
temp.append(f"{custom_var_name}{i + 1}+")
output.append("".join(temp))
if custom_var_name is None:
output.append(splt_name[-1])
else:
output.append(custom_var_name)
return output
[docs]
@versionchanged("3.3.0", reason="Updated functionality")
def analyze(self, full: bool = False) -> TextAnalyzeDictFormat:
"""
String analyze (count number of type of character)
Parameters
----------
full : bool
Full analyze when ``True``
(Default: ``False``)
Returns
-------
dict | TextAnalyzeDictFormat
A dictionary contains number of digit character,
uppercase character, lowercase character, and
special character
Example:
--------
>>> test = Text("Random T3xt!")
>>> test.analyze()
{'digit': 1, 'uppercase': 2, 'lowercase': 7, 'other': 2}
"""
temp = self
detail: TextAnalyzeDictFormat = {
"digit": 0,
"uppercase": 0,
"lowercase": 0,
"other": 0,
}
for x in temp:
if ord(x) in range(48, 58): # num
detail["digit"] += 1
elif ord(x) in range(65, 91): # cap
detail["uppercase"] += 1
elif ord(x) in range(97, 123): # low
detail["lowercase"] += 1
else:
detail["other"] += 1
if full:
detail["is_palindrome"] = self.is_palindrome()
detail["is_pangram"] = self.is_pangram()
return detail
[docs]
def reverse(self) -> Self:
"""
Reverse the string
Returns
-------
Text
Reversed string
Example:
--------
>>> test = Text("Hello, World!")
>>> test.reverse()
'!dlroW ,olleH'
"""
return self.__class__(self[::-1])
[docs]
@versionchanged("5.0.0", reason="Add ``custom_alphabet`` parameter")
def is_pangram(self, custom_alphabet: set[str] | None = None) -> bool:
"""
Check if string is a pangram
A pangram is a unique sentence in which
every letter of the alphabet is used at least once
Parameters
----------
custom_alphabet : set[str] | None, optional
Custom alphabet to use
(Default: ``None``)
Returns
-------
bool
| ``True`` if string is a pangram
| ``False`` if string is not a pangram
"""
text = self
if custom_alphabet is None:
alphabet = set("abcdefghijklmnopqrstuvwxyz")
else:
alphabet = custom_alphabet
return not set(alphabet) - set(text.lower())
[docs]
def is_palindrome(self) -> bool:
"""
Check if string is a palindrome
A palindrome is a word, verse, or sentence
or a number that reads the same backward or forward
Returns
-------
bool
| ``True`` if string is a palindrome
| ``False`` if string is not a palindrome
"""
text = self
# Use string slicing [start:end:step]
return text == text[::-1]
[docs]
def to_hex(self, raw: bool = False) -> str:
r"""
Convert string to hex form
Parameters
----------
raw : bool
| ``False``: hex string in the form of ``\x`` (default)
| ``True``: normal hex string
Returns
-------
str
Hexed string
Example:
--------
>>> test = Text("Hello, World!")
>>> test.to_hex()
'\\x48\\x65\\x6c\\x6c\\x6f\\x2c\\x20\\x57\\x6f\\x72\\x6c\\x64\\x21'
"""
text = self
byte_str = text.encode("utf-8")
# hex_str = byte_str.hex()
if raw:
return byte_str.hex()
temp = byte_str.hex("x")
return "\\x" + temp.replace("x", "\\x")
[docs]
def random_capslock(self, probability: int = 50) -> Self:
"""
Randomly capslock letter in string
Parameters
----------
probability : int
Probability in range [0, 100]
(Default: ``50``)
Returns
-------
Text
Random capslocked text
Example:
--------
>>> test = Text("This is an extremely long line of text!")
>>> test.random_capslock()
'tHis iS An ExtREmELY loNg liNE oF tExT!'
"""
probability = int(set_min_max(probability))
text = self.lower()
temp = []
for x in text:
if random.randint(1, 100) <= probability:
x = x.upper()
temp.append(x)
logger.debug(temp)
return self.__class__("".join(temp))
[docs]
@versionchanged("5.0.0", reason="Use ``str.swapcase()``")
def reverse_capslock(self) -> Self:
"""
Reverse capslock in string
Returns
-------
Text
Reversed capslock ``Text``
Example:
--------
>>> test = Text("Foo")
>>> test.reverse_capslock()
'fOO'
"""
return self.__class__(self.swapcase())
[docs]
def to_list(self) -> list[str]:
"""
Convert into list
Returns
-------
list[str]
List of string
Example:
--------
>>> test = Text("test")
>>> test.to_list()
['t', 'e', 's', 't']
"""
return list(self)
[docs]
@deprecated("5.0.0", reason="Unused")
def to_listext(self) -> None:
"""Deprecated, will be removed soon"""
raise NotImplementedError("Deprecated, will be removed soon")
[docs]
@versionadded("3.3.0")
def count_pattern(self, pattern: str, ignore_capslock: bool = False) -> int:
"""
Returns how many times ``pattern`` appears in text
Parameters
----------
pattern : str
Pattern to count
ignore_capslock : bool
Ignore the pattern uppercase or lowercase
(Default: ``False`` - Exact match)
Returns
-------
int
How many times pattern appeared
Example:
--------
>>> Text("test").count_pattern("t")
2
"""
if len(pattern) > len(self):
raise ValueError(f"len(<pattern>) must not larger than {len(self)}")
temp = str(self)
if ignore_capslock:
pattern = pattern.lower()
temp = temp.lower()
out = [
1
for i in range(len(temp) - len(pattern) + 1)
if temp[i : i + len(pattern)] == pattern
]
return sum(out)
[docs]
@versionadded("3.3.0")
def hapax(self, strict: bool = False) -> list[str]:
"""
A hapax legomenon (often abbreviated to hapax)
is a word which occurs only once in either
the written record of a language, the works of
an author, or in a single text.
This function returns a list of hapaxes (if any)
(Lettercase is ignored)
Parameters
----------
strict : bool
Remove all special characters before checking for hapax
(Default: ``False``)
Returns
-------
list[str]
A list of hapaxes
Example:
--------
>>> test = Text("A a. a, b c c= C| d d")
>>> test.hapax()
['a', 'a.', 'a,', 'b', 'c', 'c=', 'c|']
>>> test.hapax(strict=True)
['b']
"""
word_list: list[str] = self.lower().split()
if strict:
remove_characters: list[str] = list(r"\"'.,:;|()[]{}\/!@#$%^&*-_=+?<>`~")
temp = str(self)
for x in remove_characters:
temp = temp.replace(x, "")
word_list = temp.lower().split()
hapaxes = filter(lambda x: word_list.count(x) == 1, word_list)
return list(hapaxes)
[docs]
@versionadded("5.0.0")
def shorten(self, shorten_size: int = 60) -> str:
"""
Shorten long text
Parameters
----------
shorten_size : int, optional
How many characters per line.
Minimum is ``1``, by default ``60``
Returns
-------
str
Shortened text
Example:
--------
>>> test = Text("a" * 200)
>>> test.shorten()
(
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
'aaaaaaaaaaaaaaaaaaaa'
)
"""
shorten_text_list: list[str] = self.divide(
string_split_size=max(1, shorten_size)
)
shorten_text_list = [repr(x) for x in shorten_text_list]
out = "(\n" + "\n".join(shorten_text_list) + "\n)"
return out