Skip to content

Commit

Permalink
Initial Commit
Browse files Browse the repository at this point in the history
  • Loading branch information
anonmanak2000 committed Oct 9, 2024
1 parent 508e655 commit 51af1fe
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 0 deletions.
Empty file added __init__.py
Empty file.
9 changes: 9 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from mask import Mask

if __name__ == "__main__":
mask_pii = Mask()
try:
masked_text = mask_pii.mask_file('file.txt')
print("Masked Text: " + masked_text)
except:
print('Unexpected error!!')
1 change: 1 addition & 0 deletions mask/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .mask import Mask
35 changes: 35 additions & 0 deletions mask/mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import spacy
import re

class Mask:
def __init__(self) -> None:
self.patterns = {
'phone': r'(?:\+\d{1,3}[-\s]?)?\(?\d{3}\)?[-\s]?\d{3}[-\s]?\d{4}',
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
'ipv6': r'\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b',
'ipv4': r'\b((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}\b'
}

def mask_text(self,text:str) -> str:

for category,pattern in self.patterns.items():
text = re.sub(pattern, f'[REDACTED {category.upper()}]', text)

text = self.mask_nlp(text)

return text

def mask_nlp(self,text:str) -> str:
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

for ent in doc.ents:
print(f'Label: {ent.label_}: Value: {ent.text}')
text = re.sub(ent.text,f'[REDACTED {ent.label_}]',text)
return text

def mask_file(self,file_name:str) -> str:
print('File Name: ' + file_name)
with open(file_name,'r') as file:
file_text = file.read()
return self.mask_text(file_text)

0 comments on commit 51af1fe

Please sign in to comment.