Skip to content

Commit c544964

Browse files
committed
Updated DupliPy to version 0.2.4.
1 parent 063a23e commit c544964

6 files changed

Lines changed: 145 additions & 8 deletions

File tree

duplipy/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import duplipy
2-
from .formatting import remove_stopwords, remove_numbers, remove_whitespace, normalize_whitespace, separate_symbols, remove_special_characters, standardize_text, tokenize_text, stem_words, lemmatize_words, pos_tag, remove_profanity_from_text, remove_sensitive_info_from_text, remove_hate_speech_from_text
3-
from .replication import replace_word_with_synonym, augment_text_with_synonyms, load_text_file, augment_file_with_synonyms, insert_random_word, delete_random_word, insert_synonym, paraphrase, flip_horizontal, flip_vertical, rotate, random_rotation, resize, crop, random_crop, shuffle_words
2+
from .formatting import remove_stopwords, remove_numbers, remove_whitespace, normalize_whitespace, separate_symbols, remove_special_characters, standardize_text, tokenize_text, stem_words, lemmatize_words, pos_tag, remove_profanity_from_text, remove_sensitive_info_from_text, remove_hate_speech_from_text, post_format_text
3+
from .replication import replace_word_with_synonym, augment_text_with_synonyms, load_text_file, augment_file_with_synonyms, insert_random_word, delete_random_word, insert_synonym, paraphrase, flip_horizontal, flip_vertical, rotate, random_rotation, resize, crop, random_crop, shuffle_words, random_flip, random_color_jitter, noise_overlay
44
from .similarity import edit_distance_score, bleu_score, jaccard_similarity_score
55
from .text_analysis import analyze_sentiment

duplipy/formatting.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
- `remove_profanity_from_text(text)`: Remove profane words from the input text.
1717
- `remove_sensitive_info_from_text(text)`: Remove sensitive information from the input text.
1818
- `remove_hate_speech_from_text(text)`: Remove hate speech or offensive speech from the input text.
19+
- `post_format_text(text)`: Post-format the text using regex.
1920
"""
2021

21-
2222
import string
2323
import re
2424
import nltk
@@ -306,4 +306,23 @@ def remove_hate_speech_from_text(text):
306306
cleaned_sentences.append(sentence)
307307
cleaned_text = ' '.join(cleaned_sentences)
308308

309-
return cleaned_text
309+
return cleaned_text
310+
311+
def post_format_text(text):
312+
"""
313+
Post-format the text using regex.
314+
315+
This function post-formats the text by removing extra spaces and ensuring
316+
proper punctuation spacing.
317+
318+
Parameters:
319+
- `text` (str): The input text to be post-formatted.
320+
321+
Returns:
322+
- `str`: The post-formatted text.
323+
"""
324+
# Remove extra spaces
325+
text = re.sub(r'\s+', ' ', text)
326+
# Ensure proper punctuation spacing
327+
text = re.sub(r'\s([.,!?;:])', r'\1', text)
328+
return text

duplipy/replication.py

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
- `crop(image, box)`: Crop the input image to the specified rectangular region.
1919
- `random_crop(image, size)`: Randomly crop a region from the input image.
2020
- `shuffle_words(text)`: Randomly shuffle the order of words in each sentence.
21+
- `random_flip(image, horizontal, vertical)`: Randomly flip the input image horizontally and/or vertically.
22+
- `random_color_jitter(image, brightness, contrast, saturation, hue)`: Randomly adjust the brightness, contrast, saturation, and hue of the input image.
23+
- `noise_overlay(image, noise_factor, noise_type, grain_factor)`: Overlay noise on the input image.
2124
"""
2225

2326
import random
@@ -26,6 +29,7 @@
2629
from nltk.corpus import wordnet
2730
from PIL import Image
2831
import tqdm
32+
from PIL import ImageEnhance
2933

3034
nltk.download("wordnet", quiet=True)
3135
nltk.download("averaged_perceptron_tagger", quiet=True)
@@ -370,4 +374,94 @@ def shuffle_words(text):
370374
shuffled_sentence = ' '.join(shuffled_words)
371375
shuffled_text.append(shuffled_sentence)
372376
pbar.update(1)
373-
return shuffled_text
377+
return shuffled_text
378+
379+
def random_flip(image, horizontal=True, vertical=True):
380+
"""
381+
Randomly flip the input image horizontally and/or vertically.
382+
383+
Parameters:
384+
- `image` (PIL.Image.Image): The input image to be flipped.
385+
- `horizontal` (bool): Whether to flip the image horizontally.
386+
- `vertical` (bool): Whether to flip the image vertically.
387+
388+
Returns:
389+
- `PIL.Image.Image`: The randomly flipped image.
390+
"""
391+
if horizontal and vertical:
392+
flip = random.choice([Image.FLIP_LEFT_RIGHT, Image.FLIP_TOP_BOTTOM, Image.ROTATE_180])
393+
elif horizontal:
394+
flip = Image.FLIP_LEFT_RIGHT
395+
elif vertical:
396+
flip = Image.FLIP_TOP_BOTTOM
397+
else:
398+
return image
399+
400+
return image.transpose(flip)
401+
402+
def random_color_jitter(image, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1):
403+
"""
404+
Randomly adjust the brightness, contrast, saturation, and hue of the input image.
405+
406+
Parameters:
407+
- `image` (PIL.Image.Image): The input image to be color-jittered.
408+
- `brightness` (float): The maximum factor to adjust brightness.
409+
- `contrast` (float): The maximum factor to adjust contrast.
410+
- `saturation` (float): The maximum factor to adjust saturation.
411+
- `hue` (float): The maximum factor to adjust hue.
412+
413+
Returns:
414+
- `PIL.Image.Image`: The color-jittered image.
415+
"""
416+
image = ImageEnhance.Brightness(image).enhance(1 + random.uniform(-brightness, brightness))
417+
image = ImageEnhance.Contrast(image).enhance(1 + random.uniform(-contrast, contrast))
418+
image = ImageEnhance.Color(image).enhance(1 + random.uniform(-saturation, saturation))
419+
420+
h, s, v = image.convert("HSV").split()
421+
hue_factor = int(255 * random.uniform(-hue, hue))
422+
h = h.point(lambda i: (i + hue_factor) % 256)
423+
image = Image.merge("HSV", (h, s, v)).convert("RGB")
424+
425+
return image
426+
427+
def noise_overlay(image, noise_factor=0.1, noise_type="gaussian", grain_factor=0.0):
428+
"""
429+
Overlay noise on the input image.
430+
431+
Parameters:
432+
- `image` (PIL.Image.Image): The input image to overlay noise on.
433+
- `noise_factor` (float): The factor to control the intensity of the noise (0.0 to 1.0).
434+
- `noise_type` (str): The type of noise to overlay ("gaussian", "salt_and_pepper"). Defaults to "gaussian".
435+
- `grain_factor` (float): The factor to control the graininess of the noise (0.0 to 1.0). Defaults to 0.0.
436+
437+
Returns:
438+
- `PIL.Image.Image`: The image with overlaid noise.
439+
"""
440+
noise = Image.new("RGB", image.size)
441+
442+
if noise_type == "gaussian":
443+
# Generate random Gaussian noise with mean 128 and standard deviation proportional to noise_factor
444+
for x in range(noise.width):
445+
for y in range(noise.height):
446+
noise_value = int(128 + random.gauss(0, noise_factor * 255))
447+
noise.putpixel((x, y), (noise_value, noise_value, noise_value))
448+
elif noise_type == "salt_and_pepper":
449+
# Generate salt and pepper noise with probability proportional to noise_factor
450+
for x in range(noise.width):
451+
for y in range(noise.height):
452+
if random.random() < noise_factor:
453+
noise_value = 0 if random.random() < 0.5 else 255
454+
noise.putpixel((x, y), (noise_value, noise_value, noise_value))
455+
else:
456+
raise ValueError(f"Invalid noise type: {noise_type}")
457+
458+
# Add grain effect by scaling random noise and blending with original image
459+
grain_noise = Image.new("RGB", image.size)
460+
for x in range(grain_noise.width):
461+
for y in range(grain_noise.height):
462+
noise_value = int(random.uniform(-grain_factor * 255, grain_factor * 255))
463+
grain_noise.putpixel((x, y), (noise_value, noise_value, noise_value))
464+
465+
blended_noise = Image.blend(noise, grain_noise, grain_factor)
466+
467+
return Image.blend(image, blended_noise, noise_factor)

readme.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# DupliPy 0.2.3
1+
# DupliPy 0.2.4
22
![Python Version](https://img.shields.io/badge/python-3.12-blue.svg)
33
![Code Size](https://img.shields.io/github/languages/code-size/infinitode/duplipy)
44
![Downloads](https://pepy.tech/badge/duplipy)
@@ -7,6 +7,11 @@
77

88
An open source Python library for text formatting, augmentation, and similarity calculation tasks in NLP, the package now also includes additional methods for image augmentation.
99

10+
## Changes in DupliPy 0.2.4
11+
12+
- Created new functions in `duplipy.replication` for image augmentation: `random_flip`, `random_color_jitter`, and `noise_overlay`.
13+
- Created a new function (`post_format_text`) for post-formatting after DupliPy processing or augmentation tasks that cleans up extra whitespace and normalizes punctuation spacing.
14+
1015
## Changes to DupliPy 0.2.3
1116

1217
Duplipy now utilizes another one of our Python packages, called ValX, which provides quick methods we can use to clean and format our text data before training in preprocessing steps.

test_duplipy.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,8 @@
1818
print("Edit distance score between Hi and Hello: ", edit_distance_score("hi", "hello"))
1919
print("BLEU score calculation: ", bleu_score("Hello, how are you?", "Hi, how are you doing?"))
2020

21-
print("Analyze sentiment: ", analyze_sentiment(text))
21+
print("Analyze sentiment: ", analyze_sentiment(text))
22+
23+
textFormatting = "This is a test sentence ,John, how are you ? It has a few words in it . It is a test sentence ."
24+
25+
print(post_format_text(textFormatting))

test_image_augmentation.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22
import os
33
from PIL import Image
4-
from duplipy.replication import flip_horizontal, flip_vertical, rotate, random_rotation, resize, crop, random_crop
4+
from duplipy.replication import flip_horizontal, flip_vertical, rotate, random_rotation, resize, crop, random_crop, random_flip, random_color_jitter, noise_overlay
55

66
class TestImageAugmentation(unittest.TestCase):
77

@@ -48,6 +48,21 @@ def test_random_crop(self):
4848
self.assertTrue(isinstance(random_cropped_image, Image.Image))
4949
self.assertEqual(random_cropped_image.size, (50, 50))
5050

51+
def test_random_flip(self):
52+
random_flipped_image = random_flip(self.test_image)
53+
self.assertTrue(isinstance(random_flipped_image, Image.Image))
54+
self.assertEqual(random_flipped_image.size, self.test_image.size)
55+
56+
def test_random_color_jitter(self):
57+
jittered_image = random_color_jitter(self.test_image, brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5)
58+
self.assertTrue(isinstance(jittered_image, Image.Image))
59+
self.assertEqual(jittered_image.size, self.test_image.size)
60+
61+
def test_noise_overlay(self):
62+
noisy_image = noise_overlay(self.test_image, noise_factor=0.5)
63+
self.assertTrue(isinstance(noisy_image, Image.Image))
64+
self.assertEqual(noisy_image.size, self.test_image.size)
65+
5166

5267
if __name__ == "__main__":
5368
unittest.main()

0 commit comments

Comments
 (0)