Skip to content

Commit

Permalink
Bring code closer to PEP 8 style
Browse files Browse the repository at this point in the history
  • Loading branch information
Jordan Anderson committed Sep 3, 2019
1 parent 0f6b177 commit ea6e4c8
Show file tree
Hide file tree
Showing 2 changed files with 583 additions and 532 deletions.
155 changes: 83 additions & 72 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,46 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function, with_statement)
import time
import os
import mimetypes
import codecs
import sys
from functools import partial
from calibre_plugins.language_clean_plugin.cleaner import *
from calibre.ebooks.tweak import *
from optparse import OptionGroup, Option
from calibre.customize import FileTypePlugin
logdir = "c:/Scratch/calibre"
__license__ = 'GPL v3'
__license__ = 'GPL v3'
__copyright__ = '2012, Jordan Anderson'
__docformat__ = 'restructuredtext en'

#from __future__ import with_statement
import sys, os, time
from calibre.customize import FileTypePlugin
from optparse import OptionGroup, Option
from calibre.ebooks.tweak import *
from calibre_plugins.language_clean_plugin.cleaner import *
from functools import partial
import codecs, mimetypes


class CleanerPlugin(FileTypePlugin):

name = 'Language Cleaner' # Name of the plugin
description = 'Replace naughty or offensive language with something more acceptable (to me at least), recovered version'
supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
author = 'Jordan Anderson' # The author of this plugin
version = (1, 5, 2017) # The version number of this plugin
file_types = set(['epub']) # The file types that this plugin will be applied to
on_preprocess = True # Run this plugin after conversion is complete
name = 'Language Cleaner' # Name of the plugin
description = ('Replace naughty or offensive language with something more '
'acceptable (to me at least), recovered version')
# Platforms this plugin will run on
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jordan Anderson' # The author of this plugin
version = (2019, 9, 2) # The version number of this plugin
# The file types that this plugin will be applied to
file_types = set(['epub'])
on_preprocess = True # Run this plugin after conversion is complete
minimum_calibre_version = (0, 7, 53)

def run(self, path_to_ebook):
#print ("*"*60,"\n","you are in Language Cleaner")
#print ("*"*60,"\n")
ebook_file=path_to_ebook
ebook_file = path_to_ebook
fmt = ebook_file.rpartition('.')[-1].lower()
exploder, rebuilder = get_tools(fmt)
with TemporaryDirectory('_tweak_'+
os.path.basename(ebook_file).rpartition('.')[0]) as tdir:
tmppath = '_tweak_' + os.path.basename(ebook_file).rpartition('.')[0]
with TemporaryDirectory(tmppath) as tdir:
#prints ("Relevant info:",tdir,fmt,ebook_file)
try:
opf = exploder(ebook_file, tdir)
Expand All @@ -45,68 +52,74 @@ def run(self, path_to_ebook):
except Error as e:
prints(as_unicode(e), file=sys.stderr)
raise SystemExit(1)
#Debug
print ("Created tdir:",tdir,"and found opf",opf)
# Debug
print ("Created tdir:", tdir, "and found opf", opf)
#print (os.popen("ll "+tdir).read())
#print ("OPF CONTENTS:")
#print (open(opf,'r').read())
#manipulate all of the files
opf = open(opf,'r').read().split('\n')
# manipulate all of the files
opf = open(opf, 'r').read().split('\n')
# first, assemble the entire text to evaluate context
text=""
text = ""
for f in walk(tdir):
opf_line = [ii for ii in opf if os.path.basename(f).lower() in ii.lower()]
ftype = mimetypes.guess_type(f)[0]
if not ftype and "html" in f.split('.')[-1]:
print('Non-text type %s for file %s but forcing text mode'%(ftype, f))
ftype = 'text'
if not ftype:
print('Non-text type %s for file %s'%(ftype, f))
elif opf_line and 'text' in ftype:
encodings = ['utf-8', 'windows-1252', 'windows-1250']
for e in encodings:
try:
text += codecs.open(f,'r',encoding=e).read()
except UnicodeDecodeError:
print('File %s: got unicode error with %s , trying different encoding' % (f,e))
else:
print('File %s: opening the file with encoding: %s ' % (f,e))
break
opf_line = [ii for ii in opf if
os.path.basename(f).lower() in ii.lower()]
ftype = mimetypes.guess_type(f)[0]
if not ftype and "html" in f.split('.')[-1]:
print('Non-text type %s for file %s but forcing text mode'
% (ftype, f))
ftype = 'text'
if not ftype:
print('Non-text type %s for file %s' % (ftype, f))
elif opf_line and 'text' in ftype:
encodings = ['utf-8', 'windows-1252', 'windows-1250']
for e in encodings:
try:
text += codecs.open(f, 'r', encoding=e).read()
except UnicodeDecodeError:
print('File %s: got unicode error with %s , trying different encoding' % (f, e))
else:
print('File %s: opening the file with encoding: %s ' % (f, e))
break
replacement_list = language_check(text)
start_text=text
end_text=""
#Now do replacements on each file
start_text = text
end_text = ""
# Now do replacements on each file
for f in walk(tdir):
opf_line = [ii for ii in opf if os.path.basename(f).lower() in ii.lower()]
#Not sure what the correct way to determine which files should
# be edited. Seems like most are marked 'application/' in type
print ("File",f,"\nOPF line:\n",opf_line)
ftype = mimetypes.guess_type(f)[0]
if not ftype and "html" in f.split('.')[-1]:
print('Non-text type %s for file %s but forcing text mode'%(ftype, f))
ftype = 'text'
if not ftype:
print('Non-text type %s for file %s'%(ftype, f))
elif opf_line and 'text' in ftype:
print ("Cleaning",f)
text = open(f,'r').read()
output = ""
for line in text.split("\n"):
#Go through all elements of replacement_list
for search,sub,pcase in replacement_list:
if pcase: # Preserve case
line = search.sub(partial(pcase,sub),line)
else: # Don't preserve case
line = search.sub(sub,line)
output += line + "\n"
open(f,'w').write(output)
end_text += output
if start_text.replace('\n',"") == end_text.replace('\n',''):
opf_line = [ii for ii in opf if
os.path.basename(f).lower() in ii.lower()]
# Not sure what the correct way to determine which files should
# be edited. Seems like most are marked 'application/' in type
print ("File", f, "\nOPF line:\n", opf_line)
ftype = mimetypes.guess_type(f)[0]
if not ftype and "html" in f.split('.')[-1]:
print('Non-text type %s for file %s but forcing text mode'
% (ftype, f))
ftype = 'text'
if not ftype:
print('Non-text type %s for file %s' % (ftype, f))
elif opf_line and 'text' in ftype:
print ("Cleaning", f)
text = open(f, 'r').read()
output = ""
for line in text.split("\n"):
# Go through all elements of replacement_list
for search, sub, pcase in replacement_list:
if pcase: # Preserve case
line = search.sub(partial(pcase, sub), line)
else: # Don't preserve case
line = search.sub(sub, line)
output += line + "\n"
open(f, 'w').write(output)
end_text += output
if start_text.replace('\n', "") == end_text.replace('\n', ''):
print ("Language cleaner made no changes")
else:
if os.path.exists(logdir):
open(logdir+os.sep+'%s_init.txt'%(os.path.basename(ebook_file)+str(time.time())),'w').write(start_text)
open(logdir+os.sep+'%s_mod.txt'%(os.path.basename(ebook_file)+str(time.time())),'w').write(end_text)
open(logdir+os.sep+'%s_init.txt' %
(os.path.basename(ebook_file)+str(time.time())), 'w').write(start_text)
open(logdir+os.sep+'%s_mod.txt' %
(os.path.basename(ebook_file)+str(time.time())), 'w').write(end_text)
prints('Rebuilding', ebook_file, 'please wait ...')
try:
rebuilder(tdir, ebook_file)
Expand All @@ -119,5 +132,3 @@ def run(self, path_to_ebook):
#print (path_to_ebook,ext,str(mi))
#print ("you are returning from Language Cleaner")
return ebook_file


Loading

0 comments on commit ea6e4c8

Please sign in to comment.