Infinitode
diff --git a/‎duplipy/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎duplipy/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎duplipy/formatting.py‎
Lines changed: 28 additions & 28 deletions b/‎duplipy/formatting.py‎
Lines changed: 28 additions & 28 deletions
@@ -1,5 +1,5 @@
 import duplipy
 from .formatting import remove_stopwords, remove_numbers, remove_whitespace, normalize_whitespace, separate_symbols, remove_special_characters, standardize_text, tokenize_text, stem_words, lemmatize_words, pos_tag, remove_profanity_from_text, remove_sensitive_info_from_text, remove_hate_speech_from_text, post_format_text
-from .replication import replace_word_with_synonym, augment_text_with_synonyms, load_text_file, augment_file_with_synonyms, insert_random_word, delete_random_word, random_word_deletion, swap_random_words, insert_synonym, paraphrase, flip_horizontal, flip_vertical, rotate, random_rotation, resize, crop, random_crop, shuffle_words, random_flip, random_color_jitter, noise_overlay
+from .replication import replace_word_with_synonym, augment_text_with_synonyms, load_text_file, augment_file_with_synonyms, insert_random_word, delete_random_word, random_word_deletion, swap_random_words, insert_synonym, paraphrase, flip_horizontal, flip_vertical, rotate, random_rotation, resize, crop, random_crop, shuffle_words, random_flip, random_color_jitter, noise_overlay, add_noise, scale_data, shift_data, augment_time_series, balance_dataset, augment_csv_data
 from .similarity import edit_distance_score, bleu_score, jaccard_similarity_score, sorensen_dice_coefficient, cosine_similarity_score, mean_squared_error, psnr
-from .text_analysis import analyze_sentiment, named_entity_recognition
+from .text_analysis import analyze_sentiment, named_entity_recognition
@@ -6,7 +6,7 @@
 - `remove_numbers(text)`: Remove numbers from the input text.
 - `remove_whitespace(text)`: Remove excess whitespace from the input text.
 - `normalize_whitespace(text)`: Normalize multiple whitespaces into a single whitespace in the input text.
-- `seperate_symbols(text)`: Separate symbols and words with a space to ease tokenization.
+- `separate_symbols(text)`: Separate symbols and words with a space to ease tokenization.
 - `remove_special_characters(text)`: Remove special characters from the input text.
 - `standardize_text(text)`: Standardize the formatting of the input text.
 - `tokenize_text(text)`: Tokenize the input text into individual words.
@@ -27,7 +27,7 @@
 from nltk.tokenize import word_tokenize
 from nltk.stem import PorterStemmer, WordNetLemmatizer
 
-def remove_stopwords(text):
+def remove_stopwords(text: str) -> str:
     """
     Remove stopwords from the input text using NLTK's stopwords.
 
@@ -51,7 +51,7 @@ def remove_stopwords(text):
         print(f"An error occurred during stopwords removal: {str(e)}")
         return text
 
-def remove_numbers(text):
+def remove_numbers(text: str) -> str:
     """
     Remove numbers from the input text.
 
@@ -70,7 +70,7 @@ def remove_numbers(text):
         print(f"An error occurred during number removal: {str(e)}")
         return text
 
-def remove_whitespace(text):
+def remove_whitespace(text: str) -> str:
     """
     Remove excess whitespace from the input text.
 
@@ -90,7 +90,7 @@ def remove_whitespace(text):
         print(f"An error occurred during whitespace removal: {str(e)}")
         return text
 
-def normalize_whitespace(text):
+def normalize_whitespace(text: str) -> str:
     """
     Normalize multiple whitespaces into a single whitespace in the input text.
 
@@ -110,18 +110,18 @@ def normalize_whitespace(text):
         print(f"An error occurred during whitespace normalization: {str(e)}")
         return text
 
-def separate_symbols(text):
+def separate_symbols(text: str) -> str:
     """
     Separate symbols and words with a space to ease tokenization.
 
     Symbols in the input text are separated from words with a space to facilitate
     easier tokenization and analysis of the text.
     
     Parameters:
-    - `text` (str): The input text from which symbols needs to be seperated.
+    - `text` (str): The input text from which symbols needs to be separated.
 
     Returns:
-    - `str`: The text from which symbols have been seperated.
+    - `str`: The text from which symbols have been separated.
     """
     try:
         pattern = r"([\W])"
@@ -131,7 +131,7 @@ def separate_symbols(text):
         print(f"An error occurred during symbol separation: {str(e)}")
         return text
 
-def remove_special_characters(text):
+def remove_special_characters(text: str) -> str:
     """
     Remove special characters from the input text.
 
@@ -153,7 +153,7 @@ def remove_special_characters(text):
         print(f"An error occurred during special character removal: {str(e)}")
         return text
 
-def standardize_text(text):
+def standardize_text(text: str) -> str:
     """
     Standardize the formatting of the input text.
 
@@ -174,7 +174,7 @@ def standardize_text(text):
         print(f"An error occurred during text standardization: {str(e)}")
         return text
 
-def tokenize_text(text):
+def tokenize_text(text: str) -> list[str]:
     """
     Tokenize the input text into individual words.
 
@@ -186,48 +186,48 @@ def tokenize_text(text):
     - `text` (str): The input text to be tokenized.
 
     Returns:
-    - `list`: A list of tokens (words) from the input text.
+    - `list[str]`: A list of tokens (words) from the input text.
     """
     nltk.download('punkt', quiet=True)
     tokens = word_tokenize(text)
     return tokens
 
-def stem_words(words):
+def stem_words(words: list[str]) -> list[str]:
     """
     Stem the input words using Porter stemming algorithm.
 
     Stemming reduces words to their base or root form, helping to consolidate 
     variations of words and simplify text analysis.
 
     Parameters:
-    - `words` (list): A list of words to be stemmed.
+    - `words` (list[str]): A list of words to be stemmed.
 
     Returns:
-    - `list`: A list of stemmed words.
+    - `list[str]`: A list of stemmed words.
     """
     stemmer = PorterStemmer()
     stemmed_words = [stemmer.stem(word) for word in words]
     return stemmed_words
 
-def lemmatize_words(words):
+def lemmatize_words(words: list[str]) -> list[str]:
     """
     Lemmatize the input words using WordNet lemmatization.
 
     Lemmatization reduces words to their base or dictionary form, helping to 
     normalize variations and simplify text analysis.
     
     Parameters:
-    - `words` (list): A list of words to be lemmatized.
+    - `words` (list[str]): A list of words to be lemmatized.
 
     Returns:
-    - `list`: A list of lemmatized words.
+    - `list[str]`: A list of lemmatized words.
     """
     nltk.download('wordnet', quiet=True)
     lemmatizer = WordNetLemmatizer()
     lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
     return lemmatized_words
 
-def pos_tag(text):
+def pos_tag(text: str) -> list[tuple[str, str]]:
     """
     Perform part-of-speech (POS) tagging on the input text.
 
@@ -238,7 +238,7 @@ def pos_tag(text):
     - `text` (str): The input text to be POS tagged.
 
     Returns:
-    - `list`: A list of tuples containing (word, tag) pairs.
+    - `list[tuple[str, str]]`: A list of tuples containing (word, tag) pairs.
     """
     try:
         nltk.download('punkt', quiet=True)
@@ -250,7 +250,7 @@ def pos_tag(text):
         print(f"An error occurred during POS tagging: {str(e)}")
         return []
 
-def remove_profanity_from_text(text):
+def remove_profanity_from_text(text: str) -> str:
     """
     Remove profane words from the input text.
 
@@ -260,7 +260,7 @@ def remove_profanity_from_text(text):
     - `text` (str): The input text to remove profanity from.
 
     Returns:
-    - `text` (str): The cleaned output text.
+    - `str`: The cleaned output text.
     """
     nltk.download('punkt', quiet=True)
     sentences = nltk.sent_tokenize(text)
@@ -269,7 +269,7 @@ def remove_profanity_from_text(text):
 
     return cleaned_text
 
-def remove_sensitive_info_from_text(text):
+def remove_sensitive_info_from_text(text: str) -> str:
     """
     Remove sensitive information from the input text.
 
@@ -279,7 +279,7 @@ def remove_sensitive_info_from_text(text):
     - `text` (str): The input text to remove sensitive information from.
 
     Returns:
-    - `text` (str): The cleaned output text.
+    - `str`: The cleaned output text.
     """
     nltk.download('punkt', quiet=True)
     sentences = nltk.sent_tokenize(text)
@@ -288,7 +288,7 @@ def remove_sensitive_info_from_text(text):
 
     return cleaned_text
 
-def remove_hate_speech_from_text(text):
+def remove_hate_speech_from_text(text: str) -> str:
     """
     Remove hate speech or offensive speech from the input text.
 
@@ -298,7 +298,7 @@ def remove_hate_speech_from_text(text):
     - `text` (str): The input text to remove hate speech and offensive speech from.
 
     Returns:
-    - `text` (str): The cleaned output text.
+    - `str`: The cleaned output text.
     """
     nltk.download('punkt', quiet=True)
     sentences = nltk.sent_tokenize(text)
@@ -311,7 +311,7 @@ def remove_hate_speech_from_text(text):
 
     return cleaned_text
 
-def post_format_text(text):
+def post_format_text(text: str) -> str:
     """
     Post-format the text using regex.
 
@@ -328,4 +328,4 @@ def post_format_text(text):
     text = re.sub(r'\s+', ' ', text)
     # Ensure proper punctuation spacing
     text = re.sub(r'\s([.,!?;:])', r'\1', text)
-    return text
+    return text