diff --git a/.gitignore b/.gitignore index b25c15b..9b5e2e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *~ +__pycache__ diff --git a/Makefile b/Makefile index 537592e..9d79987 100644 --- a/Makefile +++ b/Makefile @@ -232,14 +232,14 @@ publish: $(All_Versions) gradint.py grep ^program_name < src/top.py|head -1|sed -e 's/.*radint v/v/' -e 's/ .*/./' > ~/homepage/public/gradint/latest-version.txt make clean ~/homepage/update - ssh st0rage "cd eGuidedog/ssb22/gradint; screen -d -m /bin/bash -c 'sleep 60;. build-sync.sh'" gradint-build.7z: mkdir /tmp/gradint-build00 cp -r * /tmp/gradint-build00 rm -r /tmp/gradint-build00/LICENSE /tmp/gradint-build00/README.md /tmp/gradint-build00/charlearn mv /tmp/gradint-build00 gradint - cd gradint ; make clean ; rm -rf extras ; cd .. + make -C gradint clean + rm -rf gradint/extras 7za a gradint-build.7z gradint/ rm -rf gradint @@ -266,6 +266,7 @@ CD: $(Mac_Files) gradint.zip echo;echo;echo "Made CD directory. Can add gradint/samples, gradint/vocab.txt, gradint/espeak for Windows, gradint/espeak-.. for OSX, sox Win/Mac binaries, oggenc or whatever for Windows, etc." cleanup: - rm -f `find . -type f -name '*~' -o -name '*.pyc' -o -name DEADJOE` + find . -type f '(' -name '*~' -o -name '*.pyc' -o -name DEADJOE ')' -exec rm -vf '{}' ';' + rm -rvf __pycache__ # must be separate from find, as some find implementations exec before trying to descend and then error clean: cleanup - rm -f gradint.py $(All_Versions) src/defaults.py gradint-installer.command gradint.dmg + rm -rf gradint.py $(All_Versions) src/defaults.py gradint-installer.command gradint.dmg diff --git a/advanced.txt b/advanced.txt index 0dc41ae..ca37321 100644 --- a/advanced.txt +++ b/advanced.txt @@ -47,7 +47,8 @@ otherLanguages = ["cant","ko","jp"] # able to tell the difference between cant_en.wav and an # ordinary English prompt and might use it wrongly. -possible_otherLanguages = ["cant","ko","jp","en","zh"] +possible_otherLanguages = ["cant","ko","jp","en","zh", + "zhy","zh-yue"] # You can also fill in otherFirstLanguages below # (using the same ["item","item"] format) to @@ -95,7 +96,7 @@ prefer_espeak = "en" # "zh" for Zhongwen (Mandarin). # - You can improve eSpeak's English by installing # Festival's dictionary and using lexconvert to convert -# it, see http://ssb22.user.srcf.net/gradint/lexconvert.html +# it, see http://ssb22.user.srcf.net/lexconvert/ # (this has already been done in the bundled version). # - eSpeak is not very natural-sounding, but it is very # clear and accurate in English and some other languages @@ -201,7 +202,7 @@ systemVoice = "en" # - Festival Lite on Windows (if all else fails) : # put flite.exe in the gradint folder # -# - Linux: install Festival, or flite if you want a US accent +# - GNU/Linux: install Festival, or flite for US accent # # - S60: the phone's built-in speech can be used # @@ -211,6 +212,22 @@ systemVoice = "en" # older "Speech!" utility. These can be used only for # playing in real-time, not for generating files. +# Coqui voices are experimentally supported on GNU/Linux. +# Setup: pip install coqui-tts[server,zh,ja,ko] +# Then download the voices you want, e.g.: +# from TTS.api import TTS;langs = {} +# for m in TTS().list_models(): langs.setdefault(m.split('/')[1].split('-')[0],[]).append(m) +# +# TTS(langs["zh"][0]) +# TTS('tts_models/en/jenny/jenny') +# (If any model crashes during download, be sure to delete the +# result from ~/.local/share/tts before running Gradint. For +# example vocoder_models--ja--kokoro--hifigan_v1 may crash. +# I did say support for these voices is experimental.) +# Gradint detects voices that have been downloaded +# (but prefer_espeak overrides this). The Chinese +# voice does NOT support pinyin. + # You can also set extra_speech to a list of # (language prefix, command), for example: # extra_speech=[ ("la","say-latvian"),("de","say-german") ] @@ -350,7 +367,7 @@ lily_file = "C:\\Program Files\\NeoSpeech\\Lily16\\data-common\\userdict\\userdi # somewhere under C:\Program Files\VW\VT\Lily\M16-SAPI5\lib\ # but I don't know exactly) -# If you want to use SAPI under WINE in Linux +# If you want to use SAPI under WINE in GNU/Linux # then you can set ptts_program: ptts_program = None # (hint: run winecfg and set Windows version to Millenium (ME) @@ -759,7 +776,7 @@ gui_output_directory = "output" # in which case the first directory that EXISTS will be used # (or the last one on the list if all else fail). # Useful if the directory to your MP3 player only appears when -# it's plugged in for example. With Linux automounters you can +# it's plugged in for example. With GNU/Linux automounters # set "/media/*" as one of the directories, and it will expand to # whatever removable device is mounted IF there is only one. diff --git a/hanzi-prompts/begin_zh-yue.txt b/hanzi-prompts/begin_zh-yue.txt new file mode 100644 index 0000000..62cad49 --- /dev/null +++ b/hanzi-prompts/begin_zh-yue.txt @@ -0,0 +1 @@ +開頭 diff --git a/hanzi-prompts/end_zh-yue.txt b/hanzi-prompts/end_zh-yue.txt new file mode 100644 index 0000000..679afff --- /dev/null +++ b/hanzi-prompts/end_zh-yue.txt @@ -0,0 +1 @@ +今日個堂上完啦 diff --git a/hanzi-prompts/longpause_zh-yue.txt b/hanzi-prompts/longpause_zh-yue.txt new file mode 100644 index 0000000..18d9f6c --- /dev/null +++ b/hanzi-prompts/longpause_zh-yue.txt @@ -0,0 +1 @@ +而家我哋要等一陣,然後翻溫。喺第一課我哋仲未學習好多嘅詞語,所以停頓會比較長,但係喺未來嘅課程,我哋唔會有咁長嘅停頓 diff --git a/hanzi-prompts/meaningis_zh-yue.txt b/hanzi-prompts/meaningis_zh-yue.txt new file mode 100644 index 0000000..a4c75cb --- /dev/null +++ b/hanzi-prompts/meaningis_zh-yue.txt @@ -0,0 +1 @@ +意思係 diff --git a/hanzi-prompts/nowPleaseSay_zh-yue.txt b/hanzi-prompts/nowPleaseSay_zh-yue.txt new file mode 100644 index 0000000..92923db --- /dev/null +++ b/hanzi-prompts/nowPleaseSay_zh-yue.txt @@ -0,0 +1 @@ +而家請講 diff --git a/hanzi-prompts/pleaseSay_zh-yue.txt b/hanzi-prompts/pleaseSay_zh-yue.txt new file mode 100644 index 0000000..cce3b70 --- /dev/null +++ b/hanzi-prompts/pleaseSay_zh-yue.txt @@ -0,0 +1 @@ +請講 diff --git a/hanzi-prompts/repeatAfterMe_zh-yue.txt b/hanzi-prompts/repeatAfterMe_zh-yue.txt new file mode 100644 index 0000000..09aaa03 --- /dev/null +++ b/hanzi-prompts/repeatAfterMe_zh-yue.txt @@ -0,0 +1 @@ +請跟住講 diff --git a/hanzi-prompts/sayAgain_zh-yue.txt b/hanzi-prompts/sayAgain_zh-yue.txt new file mode 100644 index 0000000..13ca92f --- /dev/null +++ b/hanzi-prompts/sayAgain_zh-yue.txt @@ -0,0 +1 @@ +再講一次 diff --git a/hanzi-prompts/tryToSay_zh-yue.txt b/hanzi-prompts/tryToSay_zh-yue.txt new file mode 100644 index 0000000..d43c674 --- /dev/null +++ b/hanzi-prompts/tryToSay_zh-yue.txt @@ -0,0 +1 @@ +試吓講 diff --git a/hanzi-prompts/whatSay_zh-yue.txt b/hanzi-prompts/whatSay_zh-yue.txt new file mode 100644 index 0000000..aed1a57 --- /dev/null +++ b/hanzi-prompts/whatSay_zh-yue.txt @@ -0,0 +1 @@ +點講 diff --git a/hanzi-prompts/whatmean_zh-yue.txt b/hanzi-prompts/whatmean_zh-yue.txt new file mode 100644 index 0000000..0aaf415 --- /dev/null +++ b/hanzi-prompts/whatmean_zh-yue.txt @@ -0,0 +1 @@ +乜嘢意思? diff --git a/hanzi-prompts/whatmean_zh-yue_2.txt b/hanzi-prompts/whatmean_zh-yue_2.txt new file mode 100644 index 0000000..87e6d63 --- /dev/null +++ b/hanzi-prompts/whatmean_zh-yue_2.txt @@ -0,0 +1 @@ +係乜嘢意思? diff --git a/hanzi-prompts/whatmean_zh-yue_3.txt b/hanzi-prompts/whatmean_zh-yue_3.txt new file mode 100644 index 0000000..da79d2e --- /dev/null +++ b/hanzi-prompts/whatmean_zh-yue_3.txt @@ -0,0 +1 @@ +乜嘢意思呢? diff --git a/mac/start-gradint.app/Contents/MacOS/start-gradint b/mac/start-gradint.app/Contents/MacOS/start-gradint index 1b697b7..c0f531b 100755 --- a/mac/start-gradint.app/Contents/MacOS/start-gradint +++ b/mac/start-gradint.app/Contents/MacOS/start-gradint @@ -1,5 +1,6 @@ #!/bin/bash -export PATH="$PATH:/usr/local/bin" # in case lame etc is there +export PATH="/usr/local/bin:$PATH" # for python3 override + in case lame etc is there +cd "${BASH_SOURCE%/*}/../.." # needed on macOS 14, possibly 13 if sw_vers 2>/dev/null|grep ^ProductVersion.*1[2-9]; then # macOS 12+ if test $(python3 -c 'import tkinter,sys;print(sys.version_info[:3]>=(3,10,1))' 2>/dev/null) = "True"; then exec python3 gradint.py; fi osascript -e "tell application (path to frontmost application as text) to display dialog \"macOS 12 bundled a broken version of the GUI libraries: please install Python 3 from python.org before running Gradint\" buttons {\"OK\"} with icon stop" diff --git a/samples/utils/autosplit.py b/samples/utils/autosplit.py old mode 100644 new mode 100755 diff --git a/samples/utils/cache-synth.py b/samples/utils/cache-synth.py old mode 100644 new mode 100755 diff --git a/samples/utils/cleanup-cache.py b/samples/utils/cleanup-cache.py old mode 100644 new mode 100755 diff --git a/samples/utils/diagram.py b/samples/utils/diagram.py old mode 100644 new mode 100755 diff --git a/samples/utils/list-synth.py b/samples/utils/list-synth.py old mode 100644 new mode 100755 diff --git a/samples/utils/list2cache.py b/samples/utils/list2cache.py old mode 100644 new mode 100755 diff --git a/samples/utils/manual-splitter.py b/samples/utils/manual-splitter.py old mode 100644 new mode 100755 diff --git a/samples/utils/player.py b/samples/utils/player.py old mode 100644 new mode 100755 index e09ed01..409d814 --- a/samples/utils/player.py +++ b/samples/utils/player.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # (should work in both Python 2 and Python 3) -# Simple sound-playing server v1.56 +# Simple sound-playing server v1.58 # Silas S. Brown - public domain - no warranty # connect to port 8124 (assumes behind firewall) @@ -13,8 +13,9 @@ import socket, select, os, sys, os.path, time, re for a in sys.argv[1:]: - if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with Raspbian 11; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl). Send Eth=(bluetooth Ethernet addr) to start. Note that the setup command reboots the system. - os.system('if [ -e /etc/xdg/lxsession/LXDE-pi/autostart ]; then mkdir -p /home/pi/.config/lxsession/LXDE-pi && cp /etc/xdg/lxsession/LXDE-pi/autostart /home/pi/.config/lxsession/LXDE-pi/ && echo sudo ethtool --set-eee eth0 eee off >> /home/pi/.config/lxsession/LXDE-pi/autostart && echo python '+os.path.join(os.getcwd(),sys.argv[0])+' >> /home/pi/.config/lxsession/LXDE-pi/autostart; else (echo "[Unit]";echo "Descrption=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart='+os.path.join(os.getcwd(),sys.argv[0])+'";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && awk '+"'"+'// {print} /^import / {print "os.system('+"'"+'"'+"'"+'"'+"'"+'pulseaudio --start'+"'"+'"'+"'"+'"'+"'"+')"}'+"'"+' < '+sys.argv[0]+' > .playerTMP && mv .playerTMP '+sys.argv[0]+'; fi && sudo "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a pi && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400) + if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with OS versions 11 and 12; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl). Send Eth=(bluetooth Ethernet addr) to start. Note that the setup command reboots the system. + # NOTE: If running on Pi with OS 12 and you've also done "raspi-config" to set things back to PulseAudio (as needed for example for language-synchronised Bluetooth playing in http://ssb22.user.srcf.net/s60/video.html notes), you might need to replace 'ExecStart=' with 'ExecStart=bash -c "while ! ssh localhost true; do sleep 1; done; ssh localhost ' below (and add a " at end of line), and do an ssh-keygen and add to authorized_keys, so player is run in a separate session from systemd (even though the user is the same; it's not clear why this is needed) + os.system('(echo "[Unit]";echo "Description=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart='+os.path.join(os.getcwd(),sys.argv[0])+'";echo "WorkingDirectory='+os.path.getcwd()+'";echo User="$(whoami)";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && sudo bash -c "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a $USER && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400) elif a=="--aplay": use_aplay = True # aplay and madplay, for older embedded devices, NOT tested together with --rpi-bluetooth-* above elif a.startswith("--delegate="): delegate_to_check=a.split('=')[1] # will ping that IP and delegate all sound to it when it's up. E.g. if it has better amplification but it's not always switched on. elif a.startswith("--chime="): chime_mp3=a.split('=')[1] # if clock bell desired, e.g. echo '$i-14vfff$c48o0l1b- @'|mwr2ly > chime.ly && lilypond chime.ly && timidity -Ow chime.midi && audacity chime.wav (amplify + trim) + mp3-encode (keep default 44100 sample rate so ~38 frames per sec). Not designed to work with --delegate. Pi1's 3.5mm o/p doesn't sound very good with this bell. @@ -69,9 +70,9 @@ continue elif d=='QUIT': s.close() ; break - elif d=="Eth=": # Eth=ethernet address, to connect via Bluetooth, tested on Raspberry Pi 400 with Raspbian 11 + elif d=="Eth=": # Eth=ethernet address to connect via Bluetooth (see --rpi-bluetooth-setup above) eth = S(c.recv(17)) - assert re.match("^[A-Fa-f0-9:]*$",eth) + assert re.match("^[A-Fa-f0-9:]+$",eth) os.system("M=/dev/null;E="+eth+";if ! pacmd list-sinks | grep "+eth.replace(":","_")+" >$M; then while true; do bluetoothctl --timeout 1 disconnect | grep Missing >$M||sleep 5;T=5;while ! bluetoothctl --timeout $T connect $E | tee $M | egrep \"Connection successful|Device $E Connected: yes\"; do sleep 5; T=10;M=/dev/stderr;bluetoothctl --timeout 1 devices;echo Retrying $E; done ; Got=0; for Try in 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z; do if pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then Got=1; break; fi; sleep 1; done; if [ $Got = 1 ] ; then break; fi; done; fi; pacmd set-default-sink bluez_sink."+eth.replace(":","_")+".a2dp_sink") # ; play /usr/share/scratch/Media/Sounds/Animal/Dog1.wav # (not really necessary if using 'close the socket' to signal we're ready) c.close() ; continue elif d=="Eth0": diff --git a/samples/utils/recover-unavail.py b/samples/utils/recover-unavail.py old mode 100644 new mode 100755 diff --git a/samples/utils/synth-batchconvert-helper.py b/samples/utils/synth-batchconvert-helper.py old mode 100644 new mode 100755 diff --git a/samples/utils/trace.py b/samples/utils/trace.py old mode 100644 new mode 100755 diff --git a/samples/utils/transliterate.py b/samples/utils/transliterate.py old mode 100644 new mode 100755 diff --git a/server/cantonese.py b/server/cantonese.py old mode 100644 new mode 100755 index a5698c1..ecc4576 --- a/server/cantonese.py +++ b/server/cantonese.py @@ -5,7 +5,7 @@ # cantonese.py - Python functions for processing Cantonese transliterations # (uses eSpeak and Gradint for help with some of them) -# v1.42 (c) 2013-15,2017-23 Silas S. Brown. License: GPL +# v1.48 (c) 2013-15,2017-24 Silas S. Brown. License: GPL cache = {} # to avoid repeated eSpeak runs, # zi -> jyutping or (pinyin,) -> translit @@ -64,7 +64,7 @@ def hanzi_only(unitext): return u"".join(filter(lambda x:0x4e00<=ord(x)<0xa700 o def py2nums(pinyin): if not type(pinyin)==type(u""): pinyin = pinyin.decode('utf-8') - assert pinyin.strip(), "blank pinyin" # saves figuring out a findall TypeError + if not pinyin.strip(): return "" global pinyin_dryrun if pinyin_dryrun: pinyin_dryrun = list(pinyin_dryrun) @@ -91,7 +91,7 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin): i = 0 ; tones = re.finditer('[1-7]',jyutping) ; j2 = [] for h,p in zip(list(hanzi),pinyin): try: j = getNext(tones).end() - except StopIteration: return jyutping # one of the zin has no Cantonese reading, which we'll pick up later on "failed to fix" + except StopIteration: return jyutping # one of the hanzi has no Cantonese reading in our data: we'll warn "failed to fix" below j2.append(jyutping[i:j]) ; i = j if h in py2j and p.lower() in py2j[h]: j2[-1]=j2[-1][:re.search("[A-Za-z]*[1-7]$",j2[-1]).start()]+py2j[h][p.lower()] return "".join(j2)+jyutping[i:] @@ -100,8 +100,9 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin): u"\u4E3A\u70BA":{"wei2":"wai4","wei4":"wai6"}, u"\u4E50\u6A02":{"le4":"lok6","yue4":"ngok6"}, u"\u4EB2\u89AA":{"qin1":"can1","qing4":"can3"}, +u"\u4EC0":{"shen2":"sam6","shi2":"sap6"}, # unless zaap6 u"\u4F20\u50B3":{"chuan2":"cyun4","zhuan4":"zyun6"}, -u"\u4FBF":{"bian4":"pin4","pian2":"bin6"}, +u"\u4FBF":{"bian4":"bin6","pian2":"pin4"}, u"\u5047":{"jia3":"gaa2","jia4":"gaa3"}, u"\u5174\u8208":{"xing1":"hing1","xing4":"hing3"}, # u"\u5207":{"qie4":"cai3","qie1":"cit3"}, # WRONG (rm'd v1.17). It's cit3 in re4qie4. It just wasn't in yiqie4 (which zhy_list has as an exception anyway) @@ -153,10 +154,10 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin): def jyutping_to_lau(j): j = S(j).lower().replace("j","y").replace("z","j") for k,v in jlRep: j=j.replace(k,v) - return j.lower().replace("aa","a").replace("ohek","euk") + return j.lower().replace("ohek","euk") def jyutping_to_lau_java(jyutpingNo=2,lauNo=1): # for annogen.py 3.29+ --annotation-postprocess to ship Jyutping and generate Lau at runtime - return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("(.*?)").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1­$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("aa","a").replace("ohek","euk").replaceAll("([1-7])","$1-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,""+r2+"");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this + return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("(.*?)").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1­$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("ohek","euk").replaceAll("([1-7])","$1-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,""+r2+"");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this def incomplete_lau_to_jyutping(l): # incomplete: assumes Lau didn't do the "aa" -> "a" rule l = S(l).lower().replace("euk","ohek") @@ -236,7 +237,10 @@ def mysub(z,l): z = re.sub(re.escape(x)+r"(.)",r"\1"+y,z) return z if type(u"")==type(""): U=str # Python 3 - else: U=unicode # Python 2 + else: # Python 2 + def U(x): + try: return x.decode('utf-8') # might be an emoji pass-through + except: return x # already Unicode return unicodedata.normalize('NFC',mysub(U(jyutping_to_yale_TeX(j).replace(r"\i{}","i").replace(r"\I{}","I")),[(r"\`",u"\u0300"),(r"\'",u"\u0301"),(r"\=",u"\u0304")])).encode('utf-8') def superscript_digits_TeX(j): @@ -291,6 +295,9 @@ def songSubst(l): pinyin = pinyin.decode('utf-8') if pinyin and not (pinyin,) in cache: pinyin_dryrun.add(pinyin) + for w in pinyin.split(): + for h in w.split('-'): + pinyin_dryrun.add(h) dryrun_mode = False for l in lines: if '#' in l: l,pinyin = l.split('#') @@ -300,7 +307,7 @@ def songSubst(l): elif pinyin: jyutping = adjust_jyutping_for_pinyin(l,jyutping,pinyin) groupLens = [0] - for syl,space in re.findall('([A-Za-z]*[1-5])( *)',py2nums(pinyin)): + for syl,space in re.findall('([A-Za-z]*[1-5])( *)',' '.join('-'.join(py2nums(h) for h in w.split('-')) for w in pinyin.split())): # doing it this way so we're not relying on espeak transliterate_multiple to preserve spacing and hyphenation groupLens[-1] += 1 if space: groupLens.append(0) if not groupLens[-1]: groupLens=groupLens[:-1] diff --git a/server/email-lesson.sh b/server/email-lesson.sh index 8406ee7..17e0d95 100755 --- a/server/email-lesson.sh +++ b/server/email-lesson.sh @@ -3,9 +3,9 @@ # email-lesson.sh: a script that can help you to # automatically distribute daily Gradint lessons # to students using a web server with reminder -# emails. Version 1.15 +# emails. Version 1.16 -# (C) 2007-2010,2020-2022 Silas S. Brown, License: GPL +# (C) 2007-2010,2020-2022,2024 Silas S. Brown, License: GPL # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -39,7 +39,7 @@ elif which mutt >/dev/null 2>/dev/null; then DefaultMailProg="mutt -x" else DefaultMailProg="ssh example.org mail" fi -if test "a$1" == "a--run"; then +if [ "$1" == "--run" ]; then set -o pipefail # make sure errors in pipes are reported if ! [ -d email_lesson_users ]; then echo "Error: script does not seem to have been set up yet" @@ -61,14 +61,14 @@ if test "a$1" == "a--run"; then while true; do ssh -C $PUBLIC_HTML_EXTRA_SSH_OPTIONS -n -o ControlMaster=yes $ControlPath $(echo "$PUBLIC_HTML"|sed -e 's/:.*//') sleep 86400; sleep 10; done & MasterPid=$! else unset MasterPid fi - (while ! bash -c "$CAT_LOGS_COMMAND"; do echo "cat-logs failed, re-trying in 61 seconds" 1>&2;sleep 61; done) | grep '/user\.' > "$TMPDIR/._email_lesson_logs" + (while ! bash -c "$CAT_LOGS_COMMAND"; do echo "cat-logs failed, re-trying in 61 seconds" >&2;sleep 61; done) | grep '/user\.' > "$TMPDIR/._email_lesson_logs" # (note: sleeping odd numbers of seconds so we can tell where it is if it gets stuck in one of these loops) Users="$(echo user.*)" cd .. unset NeedRunMirror for U in $Users; do . email_lesson_users/config - if ! test "a$GLOBAL_GRADINT_OPTIONS" == a; then GLOBAL_GRADINT_OPTIONS="$GLOBAL_GRADINT_OPTIONS ;"; fi + if [ "$GLOBAL_GRADINT_OPTIONS" ]; then GLOBAL_GRADINT_OPTIONS="$GLOBAL_GRADINT_OPTIONS ;"; fi # set some (but not all!) variables to defaults in case not set in profile SUBJECT_LINE="$DEFAULT_SUBJECT_LINE" FORGOT_YESTERDAY="$DEFAULT_FORGOT_YESTERDAY" @@ -85,7 +85,7 @@ if test "a$1" == "a--run"; then mv "email_lesson_users/$U/profile.removeCR" "email_lesson_users/$U/profile" fi . "email_lesson_users/$U/profile" - if test "a$Use_M3U" == ayes; then FILE_TYPE_2=m3u + if [ "$Use_M3U" == yes ]; then FILE_TYPE_2=m3u else FILE_TYPE_2=$FILE_TYPE; fi if echo "$MailProg" | grep ssh >/dev/null; then # ssh discards a level of quoting, so we need to be more careful @@ -94,7 +94,7 @@ if test "a$1" == "a--run"; then Extra_Mailprog_Params2="\"$Extra_Mailprog_Params2\"" fi if [ -e "email_lesson_users/$U/lastdate" ]; then - if test "$(cat "email_lesson_users/$U/lastdate")" == "$(date +%Y%m%d)"; then + if [ "$(cat "email_lesson_users/$U/lastdate")" == "$(date +%Y%m%d)" ]; then # still on same day - do nothing with this user this time continue fi @@ -114,10 +114,10 @@ if test "a$1" == "a--run"; then fi else Did_Download=1; fi rm -f "email_lesson_users/$U/rollback" - if test $Did_Download == 0; then + if [ $Did_Download == 0 ]; then # send a reminder DaysOld="$(python -c "import os,time;print(int((time.time()-os.stat('email_lesson_users/$U/lastdate').st_mtime)/3600/24))")" - if test $DaysOld -lt 5 || test $(date +%u) == 1; then # (remind only on Mondays if not checked for 5 days, to avoid filling up inboxes when people are away and can't get to email) + if [ $DaysOld -lt 5 ] || [ $(date +%u) == 1 ]; then # (remind only on Mondays if not checked for 5 days, to avoid filling up inboxes when people are away and can't get to email) while ! $MailProg -s "$SUBJECT_LINE" "$STUDENT_EMAIL" "$Extra_Mailprog_Params1" "$Extra_Mailprog_Params2" </dev/null; then OUTDIR=$TMPDIR else OUTDIR=$PUBLIC_HTML; fi USER_GRADINT_OPTIONS="$GLOBAL_GRADINT_OPTIONS $GRADINT_OPTIONS samplesDirectory='email_lesson_users/$U/samples'; progressFile='email_lesson_users/$U/progress.txt'; pickledProgressFile='email_lesson_users/$U/progress.bin'; vocabFile='email_lesson_users/$U/vocab.txt';saveLesson='';loadLesson=0;progressFileBackup='email_lesson_users/$U/progress.bak';outputFile=" @@ -147,14 +147,14 @@ do echo "mail sending failed; retrying in 62 seconds"; sleep 62; done; fi tail -$NumLines "email_lesson_users/$U/podcasts-to-send" > "email_lesson_users/$U/podcasts-to-send2" mv "email_lesson_users/$U/podcasts-to-send" "email_lesson_users/$U/podcasts-to-send.old" mv "email_lesson_users/$U/podcasts-to-send2" "email_lesson_users/$U/podcasts-to-send" - if test $NumLines == 0; then + if [ $NumLines == 0 ]; then echo "$U" | $MailProg -s Warning:email-lesson-run-out-of-podcasts $ADMIN_EMAIL fi else rm -f "email_lesson_users/$U/podcasts-to-send.old" # won't be a rollback after this fi - if test "$ENCODE_ON_REMOTE_HOST" == 1; then + if [ "$ENCODE_ON_REMOTE_HOST" == 1 ]; then ToSleep=123 - while ! if test "a$Send_Podcast_Instead" == a; then + while ! if [ ! "$Send_Podcast_Instead" ]; then python gradint.py "$USER_GRADINT_OPTIONS '-.sh'" "$TMPDIR/__stderr" | ssh -C $PUBLIC_HTML_EXTRA_SSH_OPTIONS $ControlPath $(echo "$PUBLIC_HTML"|sed -e 's/:.*//') "mkdir -p $REMOTE_WORKING_DIR; cd $REMOTE_WORKING_DIR; cat > __gradint.sh;chmod +x __gradint.sh;PATH=$SOX_PATH ./__gradint.sh|$ENCODING_COMMAND $(echo $PUBLIC_HTML|sed -e 's/[^:]*://')/$U-$CurDate.$FILE_TYPE;rm -f __gradint.sh"; else cd "email_lesson_users/$U" ; cat "$Send_Podcast_Instead" | ssh -C $PUBLIC_HTML_EXTRA_SSH_OPTIONS $ControlPath $(echo "$PUBLIC_HTML"|sed -e 's/:.*//') "cat > $(echo $PUBLIC_HTML|sed -e 's/[^:]*://')/$U-$CurDate.$FILE_TYPE"; cd ../..; @@ -166,18 +166,18 @@ do echo "mail sending failed; retrying in 62 seconds"; sleep 62; done; fi sleep $ToSleep ; ToSleep=$[$ToSleep*1.5] # (increasing-time retries) done rm "$TMPDIR/__stderr" - if test "a$Use_M3U" == ayes; then + if [ "$Use_M3U" == yes ]; then while ! ssh -C $PUBLIC_HTML_EXTRA_SSH_OPTIONS $ControlPath $(echo "$PUBLIC_HTML"|sed -e 's/:.*//') "echo $OUTSIDE_LOCATION/$U-$CurDate.$FILE_TYPE > $(echo $PUBLIC_HTML|sed -e 's/[^:]*://')/$U-$CurDate.m3u"; do sleep 63; done fi else # not ENCODE_ON_REMOTE_HOST - if ! test "a$Send_Podcast_Instead" == a; then + if [ "$Send_Podcast_Instead" ]; then (cd "email_lesson_users/$U" ; cat "$Send_Podcast_Instead") > "$OUTDIR/$U-$CurDate.$FILE_TYPE" elif ! python gradint.py "$USER_GRADINT_OPTIONS '$OUTDIR/$U-$CurDate.$FILE_TYPE'" "$OUTDIR/$U-$CurDate.m3u" fi if echo "$PUBLIC_HTML" | grep : >/dev/null; then @@ -200,14 +200,14 @@ EOF do echo "mail sending failed; retrying in 65 seconds"; sleep 65; done echo "$CurDate" > "email_lesson_users/$U/lastdate" unset AdminNote - if test "a$Send_Podcast_Instead" == a; then - if test "$(zgrep -H -m 1 lessonsLeft "email_lesson_users/$U/progress.txt"|sed -e 's/.*=//')" == 0; then AdminNote="Note: $U has run out of new words"; fi + if [ "$Send_Podcast_Instead" == a ]; then + if [ "$(zgrep -H -m 1 lessonsLeft "email_lesson_users/$U/progress.txt"|sed -e 's/.*=//')" == 0 ]; then AdminNote="Note: $U has run out of new words"; fi elif ! [ -e "email_lesson_users/$U/podcasts-to-send" ]; then AdminNote="Note: $U has run out of podcasts"; fi - if ! test "a$AdminNote" == a; then + if [ "$AdminNote" ]; then while ! echo "$AdminNote"|$MailProg -s gradint-user-ran-out "$ADMIN_EMAIL"; do echo "Mail sending failed; retrying in 67 seconds"; sleep 67; done fi done # end of per-user loop - if test "a$NeedRunMirror" == "a1" && ! test "a$PUBLIC_HTML_MIRROR_COMMAND" == a; then + if [ "$NeedRunMirror" == "1" ] && [ "$PUBLIC_HTML_MIRROR_COMMAND" ]; then while ! $PUBLIC_HTML_MIRROR_COMMAND; do echo "PUBLIC_HTML_MIRROR_COMMAND failed; retrying in 79 seconds" echo As subject | $MailProg -s "PUBLIC_HTML_MIRROR_COMMAND failed, will retry" "$ADMIN_EMAIL" || true # ignore errors @@ -215,9 +215,9 @@ do echo "mail sending failed; retrying in 65 seconds"; sleep 65; done done fi rm -f "$TMPDIR/._email_lesson_logs" - if ! test a$MasterPid == a; then + if [ $MasterPid ] ; then kill $MasterPid - kill $(ps axwww|grep "$TMPDIR/__gradint_ctrl"|sed -e 's/^ *//' -e 's/ .*//') 2>/dev/null + kill $(pgrep -f "$TMPDIR/__gradint_ctrl") 2>/dev/null rm -f "$TMPDIR/__gradint_ctrl" # in case ssh doesn't fi rm -f "$Gradint_Dir/.email-lesson-running" @@ -227,7 +227,7 @@ fi echo "After setting up users, run this script daily with --run on the command line." echo "As --run was not specified, it will now go into setup mode." # Setup: -if test "a$EDITOR" == a; then +if ! [ "$EDITOR" ]; then echo "Error: No EDITOR environment variable set"; exit 1 fi if ! [ -e email_lesson_users/config ]; then @@ -286,7 +286,7 @@ while true; do echo "Type a user alias (or just press Enter) to add a new user, or Ctrl-C to quit" read Alias ID=$(mktemp -d user.$(python -c 'import random; print(random.random())')XXXXXX) # (newer versions of mktemp allow more than 6 X's so the python step isn't necessary, but just in case we want to make sure that it's hard to guess the ID) - if ! test "a$Alias" == a; then ln -s "$ID" "$Alias"; fi + if [ "$Alias" ]; then ln -s "$ID" "$Alias"; fi cd "$ID" || exit 1 cat > profile <2: gradint.map,gradint.filter,gradint.chr=gradint._map,gradint._filter,gradint.unichr # undo Python 3 workaround in preparation for it to be done again, because reload doesn't do this (at least not on all Python versions) + gradint = reload(gradint) else: import gradint gradint.waitOnMessage = lambda *args:False langFullName = {} for l in gradint.ESpeakSynth().describe_supported_languages().split(): abbr,name = gradint.S(l).split("=") - langFullName[abbr]=name + langFullName[abbr]=name.replace("_","-") # Try to work out probable default language: lang = os.environ.get("HTTP_ACCEPT_LANGUAGE","") if lang: @@ -74,8 +80,10 @@ reinit_gradint() def main(): if "id" in query: # e.g. from redirectHomeKeepCookie - os.environ["HTTP_COOKIE"]="id="+query.getfirst("id") - print ('Set-Cookie: id=' + query.getfirst("id")+'; expires=Wed, 1 Dec 2036 23:59:59 GMT') + queryID = query.getfirst("id") + if not re.match("[A-Za-z0-9_.-]",queryID): return htmlOut("Bad query.  Bad, bad query.") # to avoid cluttering the disk if we're being given random queries by an attacker. IDs we generate are numeric only, but allow alphanumeric in case server admin wants to generate them. Don't allow =, parens, etc (likely random SQL query) + os.environ["HTTP_COOKIE"]="id="+queryID + print ('Set-Cookie: id=' + queryID+'; expires=Wed, 1 Dec 2036 23:59:59 GMT') # TODO: S2G if has_userID(): setup_userID() # always, even for justSynth, as it may include a voice selection (TODO consequently being called twice in many circumstances, could make this more efficient) filetype="" if "filetype" in query: filetype=query.getfirst("filetype") @@ -95,19 +103,19 @@ def main(): gradint.justSynthesize="0" if "l2w" in query and query.getfirst("l2w"): gradint.startBrowser=lambda *args:0 - if query.getfirst("l2")=="zh" and gradint.sanityCheck(query.getfirst("l2w"),"zh"): gradint.justSynthesize += "#en Pinyin needs tones. Please go back and add tone numbers." # speaking it because alert box might not work and we might be being called from HTML5 Audio stuff (TODO maybe duplicate sanityCheck in js, if so don't call HTML5 audio, then we can have an on-screen message here) + if query.getfirst("l2")=="zh" and gradint.generalCheck(query.getfirst("l2w"),"zh"): gradint.justSynthesize += "#en Pinyin needs tones. Please go back and add tone numbers." # speaking it because alert box might not work and we might be being called from HTML5 Audio stuff (TODO maybe duplicate generalCheck in js, if so don't call HTML5 audio, then we can have an on-screen message here) else: gradint.justSynthesize += "#"+query.getfirst("l2").replace("#","").replace('"','')+" "+query.getfirst("l2w").replace("#","").replace('"','') if "l1w" in query and query.getfirst("l1w"): gradint.justSynthesize += "#"+query.getfirst("l1").replace("#","").replace('"','')+" "+query.getfirst("l1w").replace("#","").replace('"','') - if gradint.justSynthesize=="0": return htmlOut('You must type a word in the box before pressing the Speak button.'+backLink) # TODO maybe add a Javascript test to the form also, IF can figure out if window.alert works + if gradint.justSynthesize=="0": return htmlOut(withLocalise('You must type a word in the box before pressing the Speak button.')+backLink) # TODO maybe add a Javascript test to the form also, IF can figure out if window.alert works serveAudio(stream = len(gradint.justSynthesize)>100, filetype=filetype) elif "add" in query: # add to vocab (l1,l2 the langs, l1w,l2w the words) if "l2w" in query and query.getfirst("l2w") and "l1w" in query and query.getfirst("l1w"): gradint.startBrowser=lambda *args:0 - if query.getfirst("l2")=="zh": scmsg=gradint.sanityCheck(query.getfirst("l2w"),"zh") - else: scmsg=None - if scmsg: htmlOut(gradint.B(scmsg)+gradint.B(backLink)) + if query.getfirst("l2")=="zh": gcmsg=gradint.generalCheck(query.getfirst("l2w"),"zh") + else: gcmsg=None + if gcmsg: htmlOut(gradint.B(gcmsg)+gradint.B(backLink)) else: addWord(query.getfirst("l1w"),query.getfirst("l2w"),query.getfirst("l1"),query.getfirst("l2")) - else: htmlOut('You must type words in both boxes before pressing the Add button.'+backLink) # TODO maybe add a Javascript test to the form also, IF can figure out a way to tell whether window.alert() works or not + else: htmlOut(withLocalise('You must type words in both boxes before pressing the Add button.')+backLink) # TODO maybe add a Javascript test to the form also, IF can figure out a way to tell whether window.alert() works or not elif "bulkadd" in query: # bulk adding, from authoring options dirID = setup_userID() def isOK(x): @@ -124,7 +132,7 @@ def main(): redirectHomeKeepCookie(dirID,"&dictionary=1") # '1' is special value for JS-only back link; don't try to link to referer as it might be a generated page elif "clang" in query: # change languages (l1,l2) dirID = setup_userID() - if (gradint.firstLanguage,gradint.secondLanguage) == (query.getfirst("l1"),query.getfirst("l2")) and not query.getfirst("clang")=="ignore-unchanged": return htmlOut('You must change the settings before pressing the Change Languages button.'+backLink) # (external scripts can set clang=ignore-unchanged) + if (gradint.firstLanguage,gradint.secondLanguage) == (query.getfirst("l1"),query.getfirst("l2")) and not query.getfirst("clang")=="ignore-unchanged": return htmlOut(withLocalise('You must change the settings before pressing the Change Languages button.')+backLink) # (external scripts can set clang=ignore-unchanged) gradint.updateSettingsFile(gradint.settingsFile,{"firstLanguage": query.getfirst("l1"),"secondLanguage":query.getfirst("l2")}) redirectHomeKeepCookie(dirID) elif "swaplang" in query: # swap languages @@ -142,12 +150,24 @@ def main(): try: v=open(gradint.vocabFile).read() except: v="" # (shouldn't get here unless they hack URLs) htmlOut('

|
',"Text edit your vocab list") - elif "lesson" in query: # make lesson + elif "lesson" in query: # make lesson ("Start lesson" button) setup_userID() gradint.maxNewWords = int(query.getfirst("new")) # (shouldn't need sensible-range check here if got a dropdown; if they really want to hack the URL then ok...) gradint.maxLenOfLesson = int(float(query.getfirst("mins"))*60) # TODO save those settings for next time also? serveAudio(stream = True, inURL = False, filetype=filetype) + elif "bigger" in query or "smaller" in query: + u = setup_userID() ; global zoom + if "bigger" in query: zoom = int(zoom*1.1) + else: zoom = int(zoom/1.1 + 0.5) + open(u+"-zoom.txt","w").write("%d\n" % zoom) + listVocab(True) + elif any("variant"+str(c) in query for c in range(max(len(gradint.GUI_translations[v]) for v in gradint.GUI_translations.keys() if v.startswith("@variants-")))): + for c in range(max(len(gradint.GUI_translations[v]) for v in gradint.GUI_translations.keys() if v.startswith("@variants-"))): #TODO duplicate code + if "variant"+str(c) in query: break + u = setup_userID() + gradint.updateSettingsFile(u+"-settings.txt",{"scriptVariants":{gradint.GUI_languages.get(gradint.firstLanguage,gradint.firstLanguage):c}}) + setup_userID() ; listVocab(True) elif "voNormal" in query: # voice option = normal setup_userID() gradint.voiceOption="" @@ -199,8 +219,10 @@ def allLinesHaveEquals(lines): for l in lines: if not '=' in l: return False return True +gradintUrl = os.environ.get("SCRIPT_URI","") # will be http:// or https:// as appropriate +if not gradintUrl and all(x in os.environ for x in ["REQUEST_SCHEME","SERVER_NAME","SCRIPT_NAME"]): gradintUrl = os.environ["REQUEST_SCHEME"]+"://"+os.environ["SERVER_NAME"]+os.environ["SCRIPT_NAME"] +if not gradintUrl: gradintUrl = "gradint.cgi" # guessing def authorWordList(lines,l1,l2): - gradintUrl = os.environ["SCRIPT_URI"] # will be http:// or https:// as appropriate r=[] ; count = 0 # could have target="gradint" in the following, but it may be in a background tab (target="_blank" not recommended as could accumulate many) r.append('
' % gradintUrl) @@ -232,12 +254,14 @@ def justsynthLink(text,lang=""): # assumes written function h5a return ''+gradint.S(text)+'' # TODO if h5a's canPlayType etc works, cld o/p a lesson as a JS web page that does its own 'take out of event stream' and 'progress write-back'. wld need to code that HERE by inspecting the finished Lesson object, don't call play(). +zoom = 100 # in case browser device lacks a zoom UI, we'll provide one def htmlOut(body_u8,title_extra="",links=1): print ("Content-type: text/html; charset=utf-8\n") if title_extra: title_extra=": "+title_extra print ('Gradint Web edition'+title_extra+'') print ('') - print ('') + print ('') + if not zoom==100: print('' % zoom) print ('') if type(body_u8)==type(u""): body_u8=body_u8.encode('utf-8') if hasattr(sys.stdout,'buffer'): # Python 3 @@ -247,9 +271,8 @@ def htmlOut(body_u8,title_extra="",links=1): else: print(body_u8) print ('
') if links: - print ('This is Gradint Web edition. If you need recorded words or additional functions, please download the full version of Gradint.') + print ('This is Gradint Web edition. If you need recorded words or additional functions, please download the full version of Gradint.') # TODO @ low-priority: Android 3 - if "iPhone" in os.environ.get("HTTP_USER_AGENT","") and gradint.secondLanguage=="zh": print ('

You can also try the Open University Chinese Characters First Steps iPhone application.') print ('

'+program_name[:program_name.index("(")]+"using "+gradint.program_name[:gradint.program_name.index("(")]) print ("") backLink = ' Back' # TODO may want to add a random= to the non-js HREF @@ -258,36 +281,30 @@ def serveAudio(stream=0, filetype="mp3", inURL=1): # caller imports gradint (and sets justSynthesize or whatever) first if os.environ.get("HTTP_IF_MODIFIED_SINCE",""): print ("Status: 304 Not Modified\n\n") ; return + httpRange = re.match("bytes=([0-9]*)-([0-9]*)$",os.environ.get('HTTP_RANGE','')) # we MUST support Range: for some iOS players (Apple did not follow the HTTP standard of having a sensible fallback if servers respond with 200, and Apache will not do Range for us if we're CGI). Single Range should be sufficient. + if httpRange: httpRange = httpRange.groups() + if httpRange==('',''): httpRange = None # must spec one + if httpRange: + if not httpRange[0]: httpRange=[-int(httpRange[1]),None] + elif not httpRange[1]: httpRange=[int(httpRange[0]),None] + else: httpRange=[int(httpRange[0]),int(httpRange[1])+1] + print ("Status: 206 Partial Content") + stream = 0 if filetype=="mp3": print ("Content-type: audio/mpeg") else: print ("Content-type: audio/"+filetype) # ok for ogg, wav? if inURL: print ("Last-Modified: Sun, 06 Jul 2008 13:20:05 GMT") print ("Expires: Wed, 1 Dec 2036 23:59:59 GMT") # TODO: S2G + print ("Content-disposition: attachment; filename=gradint."+filetype) # helps with some browsers that can't really do streaming gradint.out_type = filetype + gradint.waitBeforeStart = 0 def mainOrSynth(): oldProgress = None ; rollback = False if not gradint.justSynthesize and 'h5a' in query: - # TODO: if os.environ.get('HTTP_RANGE','')=='bytes=0-1' then that'll be '\xff' for mp3 but would need to stop the web server from adding a Content-Length etc (flush stdout and wait indefinitely for server to terminate the cgi process??) - try: oldProgress = open(gradint.progressFile).read() + try: oldProgress = open(gradint.progressFile,'rb').read() except: pass rollback = True - if 'lesson' in query: random.seed(query.getfirst('lesson')) # so clients that re-GET same lesson from partway through can work - if os.environ.get('HTTP_X_PLAYBACK_SESSION_ID',''): # seen on iOS: assumes the stream is a live broadcast and reconnecting to it continues where it left off. TODO: cache the mp3 output? (but don't delay the initial response) Recalculating for now with sox trim: - if os.path.exists(gradint.progressFile+'-ts'): - trimTo = time.time() - os.stat(gradint.progressFile+'-ts').st_mtime - if trimTo < gradint.maxLenOfLesson: - cin,cout = os.popen2("sox "+(gradint.soundCollector.soxParams()+' - ')*2+" trim "+str(int(trimTo))) - gradint.soundCollector.o,copyTo = cin,gradint.soundCollector.o - def copyStream(a,b): - while True: - try: x = a.read(1024) - except EOFError: break - b.write(x) - b.close() - import thread ; thread.start_new(copyStream,(cout,copyTo)) - else: open(gradint.progressFile+'-ts','w') # previous one was abandoned, restart - else: open(gradint.progressFile+'-ts','w') # create 1st one - # end of if HTTP_X_PLAYBACK_SESSION_ID + if "lesson" in query: random.seed(query.getfirst("lesson")) # so clients that re-GET same lesson from partway through can work try: gradint.main() except SystemExit: if not gradint.justSynthesize: @@ -295,25 +312,44 @@ def serveAudio(stream=0, filetype="mp3", inURL=1): reinit_gradint() ; setup_userID() gradint.write_to_stdout,gradint.outputFile = o1,o2 gradint.setSoundCollector(gradint.SoundCollector()) - gradint.justSynthesize = "en Problem generating the lesson. Check we have prompts for those languages." ; gradint.main() ; oldProgress = None + gradint.justSynthesize = "en Problem generating the lesson. Check we have prompts for those languages." ; gradint.main() + if oldProgress: open(gradint.progressFile,'wb').write(oldProgress) + rollback = oldProgress = None if rollback: # roll back pending lFinish os.rename(gradint.progressFile,gradint.progressFile+'-new') - if oldProgress: open(gradint.progressFile,'w').write(oldProgress) + if oldProgress: open(gradint.progressFile,'wb').write(oldProgress) + # end of def mainOrSynth if stream: - print ("Content-disposition: attachment; filename=gradint.mp3\n") # helps with some browsers that can't really do streaming + print ("") sys.stdout.flush() gradint.write_to_stdout = 1 gradint.outputFile="-."+filetype ; gradint.setSoundCollector(gradint.SoundCollector()) mainOrSynth() else: - tempdir = getoutput("mktemp -d") gradint.write_to_stdout = 0 - gradint.outputFile=tempdir+"/serveThis."+filetype ; gradint.setSoundCollector(gradint.SoundCollector()) - gradint.waitBeforeStart = 0 - mainOrSynth() - print ("Content-Length: "+repr(os.stat(tempdir+"/serveThis."+filetype).st_size)+"\n") + tempdir = tempfile.mkdtemp() + fn,fn2 = tempdir+"/I."+filetype, tempdir+"/O."+filetype + if httpRange and "lesson" in query: # try to cache it + try: os.mkdir(myTmp) + except: pass # exist ok + for f in os.listdir(myTmp): + if os.stat(myTmp+os.sep+f).st_mtime < time.time()-4000: + os.remove(myTmp+os.sep+f) + fn = gradint.outputPrefix+str(int(query.getfirst("lesson")))+"."+filetype # (don't be tricked into clobbering paths with non-int lesson IDs) + if not os.path.exists(fn): + gradint.outputFile=fn + gradint.setSoundCollector(gradint.SoundCollector()) + mainOrSynth() + if httpRange: + total = os.stat(fn).st_size + open(fn2,"wb").write(open(fn,"rb").read()[httpRange[0]:httpRange[1]]) + if httpRange[0]<0: httpRange[0] += total + if not httpRange[1]: httpRange[1] = total + print("Content-Range: bytes %d-%d/%d" % (httpRange[0],httpRange[1]-1,total)) + else: fn2 = fn + print ("Content-Length: "+repr(os.stat(fn2).st_size)+"\n") sys.stdout.flush() - os.system("cat "+tempdir+"/serveThis."+filetype) + os.system("cat "+fn2) # components already validated so no quoting required os.system("rm -r "+tempdir) def addWord(l1w,l2w,l1,l2,out=True): @@ -322,7 +358,7 @@ def addWord(l1w,l2w,l1,l2,out=True): if not ((gradint.firstLanguage,gradint.secondLanguage) == (l2,l1) and "HTTP_REFERER" in os.environ and not cginame in os.environ["HTTP_REFERER"]): gradint.updateSettingsFile(gradint.settingsFile,{"firstLanguage": l1,"secondLanguage":l2}) gradint.firstLanguage,gradint.secondLanguage = l1,l2 if (l1w+"_"+l1,l2w+"_"+l2) in map(lambda x:x[1:],gradint.parseSynthVocab(gradint.vocabFile,forGUI=1)): - if out: htmlOut('This word is already in your list.'+backLink) + if out: htmlOut(withLocalise('This word is already in your list.')+backLink) return gradint.appendVocabFileInRightLanguages().write(gradint.B(l2w)+gradint.B("=")+gradint.B(l1w)+gradint.B("\n")) if not out: return @@ -332,7 +368,7 @@ def addWord(l1w,l2w,l1,l2,out=True): def redirectHomeKeepCookie(dirID,extra=""): dirID = gradint.S(dirID) # just in case - print ("Location: "+cginame+"?random="+str(random.random())+"&id="+dirID[dirID.rindex("/")+1:]+extra+"\n") + print ("Location: "+cginame+"?random="+str(random.random())[2:]+"&id="+dirID[dirID.rindex("/")+1:]+extra+"\n") def langSelect(name,curLang): curLang = gradint.espeak_language_aliases.get(curLang,curLang) @@ -355,10 +391,18 @@ for k,v in {"Swap":{"zh":u"交换","zh2":u"交換"}, "click for audio":{"zh":u"击某词就听声音","zh2":u"擊某詞就聽聲音"}, "Repeats":{"zh":u"重复计数","zh2":u"重複計數"}, "To edit this list on another computer, type":{"zh":u"要是想在其他的电脑或手机编辑这个词汇表,请在别的设备打","zh2":u"要是想在其他的電腦或手機編輯這個詞彙表,請在別的設備打"}, + "Please wait while the lesson starts to play":{"zh":u"稍等本课正开始播放","zh2":u"稍等本課正開始播放"}, + "Bigger":{"zh":u"大"},"Smaller":{"zh":u"小"}, + 'You must type a word in the box before pressing the Speak button.':{"zh":u"按‘发音’前,应该框里打字。","zh2":u"按‘發音’前,應該框裡打字。"}, + 'You must type words in both boxes before pressing the Add button.':{"zh":u"按‘添加’前,应该在两框里打字。","zh2":u"按‘添加’前,應該在兩框裡打字。"}, + 'You must change the settings before pressing the Change Languages button.':{"zh":u"按‘选择其他语言’前,应该转换语言设定。","zh2":u"按‘選擇其他語言’前,應該轉換語言設定。"}, + 'This word is already in your list.':{"zh":u"本词已经在您的词汇表。","zh2":u"本詞已經在您的詞彙表。"}, "Your word list is empty.":{"zh":u"词汇表没有词汇,加一些吧","zh2":u"詞彙表沒有詞彙,加一些吧"} }.items(): if not k in gradint.GUI_translations: gradint.GUI_translations[k]=v +def withLocalise(x): return x+" "+localise(x,1) + def h5a(): body = """

'+localise("Your first language",1)+': '+langSelect('l1',firstLanguage)+' '+localise("second",1)+': '+langSelect('l2',secondLanguage)+' ' # onfocus..onblur updating onsubmit is needed for iOS "Go" button + body += (localise("Word in %s",1) % localise(secondLanguage))+':
'+(localise("Meaning in %s",1) % localise(firstLanguage))+':

'+localise("Your first language",1)+': '+langSelect('l1',firstLanguage)+' '+localise("second",1)+': '+langSelect('l2',secondLanguage)+' ' # onfocus..onblur updating onsubmit is needed for iOS "Go" button def htmlize(l,lang): if type(l)==type([]) or type(l)==type(()): return htmlize(l[-1],lang) l = gradint.B(l) @@ -403,7 +457,8 @@ def listVocab(hasList): # main screen def deleteLink(l1,l2): r = [] for l in [l2,l1]: - if type(l)==type([]) or type(l)==type(()) or not gradint.B("!synth:") in l: return "" # Web-GUI delete in poetry etc not yet supported + if type(l)==type([]) or type(l)==type(()) or not gradint.B("!synth:") in gradint.B(l): return "" # Web-GUI delete in poetry etc not yet supported + l = gradint.B(l) r.append(gradint.S(quote(l[l.index(gradint.B("!synth:"))+7:l.rfind(gradint.B("_"))]))) r.append(localise("Delete",2)) return ('

') % tuple(r) @@ -415,13 +470,13 @@ def listVocab(hasList): # main screen if data: hasList = "

Click on each word for audio
"+"".join(["%s" % (num,gradint.secondLanguage,htmlize(dest,gradint.secondLanguage),gradint.firstLanguage,htmlize(src,gradint.firstLanguage),deleteLink(src,dest)) for num,src,dest in data])+"
"+localise("Your word list",1)+" ("+localise("click for audio",1)+")
"+localise("Repeats",1)+""+localise(gradint.secondLanguage,1)+""+localise(gradint.firstLanguage,1)+"
%d%s%s
" else: hasList="" else: hasList="" - if hasList: body += '

'+numSelect('new',range(2,10),gradint.maxNewWords)+' '+localise("new words in")+' '+numSelect('mins',[15,20,25,30],int(gradint.maxLenOfLesson/60))+' '+localise('mins')+"""
""" + if hasList: body += '

'+numSelect('new',range(2,10),gradint.maxNewWords)+' '+localise("new words in")+' '+numSelect('mins',[15,20,25,30],int(gradint.maxLenOfLesson/60))+' '+localise('mins')+"""
""" # when lesson ended, refresh with lFinish which saves progress (interrupts before then cancel it), but don't save progress if somehow got the ended event in 1st minute as that could be a browser issue if "dictionary" in query: if query.getfirst("dictionary")=="1": body += '' # apparently it is -1, not -2; the redirect doesn't count as one (TODO are there any JS browsers that do count it as 2?) else: body += '

'+localise("Back to dictionary",1)+'' # TODO check for cross-site scripting if hasList: - if "SCRIPT_URI" in os.environ: hasList += "

"+localise("To edit this list on another computer, type",1)+" "+os.environ["SCRIPT_URI"]+"?id="+getCookieId()+"" - else: hasList="

"+localise("Your word list is empty.",1) + if "://" in gradintUrl: hasList += "

"+localise("To edit this list on another computer, type",1)+" "+gradintUrl.replace(".",".").replace("/","/")+"?id="+re.sub("([0-9]{4})(?!$)",r"\1",getCookieId())+"" # span needed for iOS at least + else: hasList="

"+localise("Your word list is empty.",1) body += hasList htmlOut(body+'

') @@ -454,14 +509,17 @@ def setup_userID(): open(dirName+'/'+userID+'-settings.txt','w') # TODO this could still be a race condition (but should be OK under normal circumstances) need_write = 1 print ('Set-Cookie: id=' + userID+'; expires=Wed, 1 Dec 2036 23:59:59 GMT') # TODO: S2G - userID = dirName+'/'+userID + userID0, userID = userID, dirName+os.sep+userID # already validated gradint.progressFileBackup=gradint.pickledProgressFile=None gradint.vocabFile = userID+"-vocab.txt" gradint.progressFile = userID+"-progress.txt" gradint.settingsFile = userID+"-settings.txt" + gradint.outputPrefix = myTmp+os.sep+userID0+"-" if need_write: gradint.updateSettingsFile(gradint.settingsFile,{'firstLanguage':gradint.firstLanguage,'secondLanguage':gradint.secondLanguage}) else: gradint.readSettings(gradint.settingsFile) gradint.auto_advancedPrompt=1 # prompt in L2 if we don't have L1 prompts on the server, what else can we do... + if os.path.exists(userID+"-zoom.txt"): + global zoom ; zoom = int(open(userID+"-zoom.txt").read().strip()) return userID try: main() diff --git a/server/lesson-table.py b/server/lesson-table.py old mode 100644 new mode 100755 diff --git a/server/safety-check-progressfile.py b/server/safety-check-progressfile.py old mode 100644 new mode 100755 diff --git a/server/vocab2html.py b/server/vocab2html.py old mode 100644 new mode 100755 diff --git a/src/frontend.py b/src/frontend.py index 3d4699b..b223ab4 100644 --- a/src/frontend.py +++ b/src/frontend.py @@ -150,7 +150,7 @@ def clearScreen(): warnings_printed = [] return if winsound or mingw32: os.system("cls") - else: os.system("clear 1>&2") # (1>&2 in case using stdout for something else) + else: os.system("clear >&2") # (>&2 in case using stdout for something else) return True cancelledFiles = [] @@ -1295,10 +1295,10 @@ def openDirectory(dir,inGuiThread=0): if inGuiThread: tkMessageBox.showinfo(app.master.title(),msg) else: waitOnMessage(msg) -def sanityCheck(text,language,pauseOnError=0): # text is utf-8; returns error message if any +def generalCheck(text,language,pauseOnError=0): # text is utf-8; returns error message if any if not text: return # always OK empty strings if pauseOnError: - ret = sanityCheck(text,language) + ret = generalCheck(text,language) if ret: waitOnMessage(ret) return ret if language=="zh": @@ -1329,7 +1329,7 @@ def s60_addVocab(): result = appuifw.multi_query(label1,label2) # unfortunately multi_query can't take default items (and sometimes no T9!), but Form is too awkward (can't see T9 mode + requires 2-button save via Options) and non-multi query would be even more modal if not result: return # cancelled l2,l1 = result # guaranteed to both be populated - while sanityCheck(l2.encode('utf-8'),secondLanguage,1): + while generalCheck(l2.encode('utf-8'),secondLanguage,1): l2=appuifw.query(label1,"text",u"") if not l2: return # cancelled # TODO detect duplicates like Tk GUI does? @@ -1371,7 +1371,7 @@ def s60_viewVocab(): oldL1,oldL2 = l1,l2 if action==2: first=1 - while first or (l2 and sanityCheck(l2.encode('utf-8'),secondLanguage,1)): + while first or (l2 and generalCheck(l2.encode('utf-8'),secondLanguage,1)): first=0 ; l2=appuifw.query(ensure_unicode(secondLanguage),"text",l2) if not l2: continue elif action==3: @@ -1386,7 +1386,7 @@ def s60_viewVocab(): def android_addVocab(): while True: l2 = None - while not l2 or sanityCheck(l2.encode('utf-8'),secondLanguage,1): + while not l2 or generalCheck(l2.encode('utf-8'),secondLanguage,1): l2 = android.dialogGetInput("Add word","Word in %s" % localise(secondLanguage)).result if not l2: return # cancelled l1 = android.dialogGetInput("Add word","Meaning in %s" % localise(firstLanguage)).result @@ -1482,9 +1482,9 @@ def downloadLAME(): fi if grep downloads.sourceforge lame.tar.gz 2>/dev/null; then Link="$(cat lame.tar.gz|grep downloads.sourceforge|head -1)" - echo "Got HTML: $Link" 1>&2 + echo "Got HTML: $Link" >&2 Link="$(echo "$Link"|sed -e 's/.*http/http/' -e 's,.*/projects,http://sourceforge.net/projects,' -e 's/".*//')" - echo "Following link to $Link" 1>&2 + echo "Following link to $Link" >&2 if ! $Curl "$Link" > lame.tar.gz; then rm -f lame.tar.gz; exit 1 fi @@ -1585,7 +1585,7 @@ def gui_event_loop(): if not text1 and not text2: app.todo.alert=u"Before pressing the "+localise("Speak")+u" button, you need to type the text you want to hear into the box." else: if text1.startswith(B('#')): msg="" # see below - else: msg=sanityCheck(text1,secondLanguage) + else: msg=generalCheck(text1,secondLanguage) if msg: app.todo.alert=ensure_unicode(msg) else: app.set_watch_cursor = 1 ; app.toRestore = [] @@ -1706,7 +1706,7 @@ def scanDirs(): app.todo.alert=msg+" "+localise("Repeat count is 0, so we cannot reduce it for extra revision.") elif menu_response=="add": text1 = asUnicode(app.Text1.get()).encode('utf-8') ; text2 = asUnicode(app.Text2.get()).encode('utf-8') - msg=sanityCheck(text1,secondLanguage) + msg=generalCheck(text1,secondLanguage) if msg: app.todo.alert=ensure_unicode(msg) else: o=appendVocabFileInRightLanguages() diff --git a/src/lessonplan.py b/src/lessonplan.py index 1d64301..f2cc2b2 100644 --- a/src/lessonplan.py +++ b/src/lessonplan.py @@ -104,15 +104,19 @@ def _load_from_text(self,fromString=0): self._py3_fix() def _py3_fix(self): if not type("")==type(u""): return - # we're Python 3, and we might have just loaded data from Python 2 + # we're Python 3, and we might have just loaded data from Python 2. Might have to encode as Latin-1 then decode as UTF-8. But don't do this if file was in fact saved by Python 3. + if any(ord(c) > 255 for l in [self.data,self.unavail] for i in l for j in i[1:] for k in ([j] if type(j)==str else j) for c in k): return # must have been written by the Python 3 version for l in [self.data,self.unavail]: for i in range(len(l)): for j in [1,2]: if type(l[i][j])==str: l[i]=l[i][:j]+(S2(LB(l[i][j])),)+l[i][j+1:] elif type(l[i][j])==list: l[i]=l[i][:j]+(map(lambda x:S2(LB(x)),l[i][j]),)+l[i][j+1:] + def _py3_fix_on_save(self): + if type("")==type(u"") and not(any(ord(c) > 255 for l in [self.data,self.unavail] for i in l for j in i[1:] for k in ([j] if type(j)==str else j) for c in k)): self.unavail.append((1,u"\u2014","[Py3]")) # ensure there's at least one, to prevent a py3_fix redo def save(self,partial=0): if need_say_where_put_progress: show_info("Saving "+cond(partial,"partial ","")+"progress to "+progressFile+"... ") else: show_info("Saving "+cond(partial,"partial ","")+"progress... ") + self._py3_fix_on_save() global progressFileBackup # Remove 0-repeated items (helps editing by hand) data = [] # don't use self.data - may want to make another lesson after saving @@ -159,6 +163,7 @@ def save(self,partial=0): if not app and not appuifw and not android: show_info("done\n") def save_binary(self,data): # save a pickled version if possible (no error if not) if not (pickledProgressFile and pickle): return + self._py3_fix_on_save() try: if compress_progress_file: if paranoid_file_management: fn=os.tempnam() diff --git a/src/makeevent.py b/src/makeevent.py index ea13e5d..3857ada 100644 --- a/src/makeevent.py +++ b/src/makeevent.py @@ -362,8 +362,8 @@ def toDict(l): # make the list of filenames into a dict of short-key -> [(long-k except IOError: pass # ignore write errors as it's only a cache except OSError: pass if partials_raw_mode: - (wtype,wrate,wchannels,wframes,wbits) = sndhdr.what(partialsDirectory+os.sep+"header"+dotwav) - partials_raw_0bytes = int(betweenPhrasePause*wrate)*wchannels*(wbits/8) + (wtype,wrate,wchannels,wframes,wbits) = swhat(partialsDirectory+os.sep+"header"+dotwav) + partials_raw_0bytes = int(betweenPhrasePause*wrate)*wchannels*int(wbits/8) else: synth_partials_voices,partials_raw_mode = {},None if checkIn("cant",synth_partials_voices): synth_partials_voices["zhy"]=synth_partials_voices["zh-yue"]=synth_partials_voices["cant"] @@ -476,7 +476,8 @@ def optimise_partial_playing(ce): return s else: return ce # can't figure out an optimisation in these circumstances def simplified_header(fname): - h=sndhdr.what(fname) + # called by optimise_partial_playing(_list) + h=swhat(fname) # ignore num frames i.e. h[3], just compare formats if h: return h[:3]+h[4:] def optimise_partial_playing_list(ceList): diff --git a/src/play.py b/src/play.py index 5e730f4..a5d85b3 100644 --- a/src/play.py +++ b/src/play.py @@ -148,8 +148,7 @@ def sox_check(): if macsound: if not gotSox and not os.system("mv sox-14.4.2 sox && rm sox.README"): gotSox,soxMp3 = sox_check() # see if that one works instead (NB must use os.system here: our system() has not yet been defined) if not gotSox and got_program("sox"): - if macsound: xtra=". (If you're on 10.8 Mountain Lion, try downloading a more recent sox binary from sox.sourceforge.net and putting it inside Gradint.app, but that will break compatibility with older PowerPC Macs.)" # TODO: ship TWO binaries? but we don't want the default gradint to get too big. See sox.README for more notes. - elif cygwin: xtra="" + if macsound or cygwin: xtra="" else: xtra=". Ubuntu users please install libsox-fmt-all." show_warning("SoX found but can't handle WAV, so you won't be able to write lessons to files for later"+xtra) else: gotSox = got_program("sox") @@ -439,18 +438,26 @@ def lengthOfSound(file): if B(file).lower().endswith(B(dotmp3)): return rough_guess_mp3_length(file) else: return pcmlen(file) +if type("")==type(u""): # Python 3 + import wave + def swhat(file): + if file.lower().endswith(os.extsep+"wav"): + o = wave.open(file,'rb') + return "wav",o.getframerate(),o.getnchannels(),o.getnframes(),8*o.getsampwidth() + else: # fallback non-WAV + import sndhdr # before Python 3.13 + return sndhdr.what(file) +else: # Python 2 + import sndhdr + swhat = sndhdr.what def pcmlen(file): - header = sndhdr.what(file) - if not header: - # some Python 3 installations seem less able to run sndhdr - if gotSox: return len(readB(os.popen("sox \""+file+"\" -t raw "+sox_8bit+" "+sox_signed+" -c 1 -r 8000 - ",popenRB)))/8000.0 - else: raise IOError("sndhdr can't analyse file '%s'" % (file,)) + header = swhat(file) (wtype,wrate,wchannels,wframes,wbits) = header if android: if wrate==6144: # might be a .3gp from android_recordFile d = open(file).read() if 'mdat' in d: return (len(d)-d.index('mdat'))/1500.0 # this assumes the bitrate is roughly the same as in my tests, TODO figure it out properly - divisor = wrate*wchannels*wbits/8 # do NOT optimise with (wbits>>3), because wbits could be 4 + divisor = wrate*wchannels*int(wbits/8) # do NOT optimise with (wbits>>3), because wbits could be 4 if not divisor: raise IOError("Cannot parse sample format of '%s': %s" % (file,repr(header))) return (filelen(file) - 44.0) / divisor # 44 is a typical header length, and .0 to convert to floating-point @@ -599,7 +606,7 @@ def beepCmd(soxParams,fname): class ShSoundCollector(object): def __init__(self): self.file2command = {} - self.commands = ["C() { echo -n $1% completed $'\r' 1>&2;}"] + self.commands = ["C() { echo -n $1% completed $'\r' >&2;}"] self.seconds = self.lastProgress = 0 if write_to_stdout: self.o=sys.stdout else: self.o = open(outputFile,"wb") @@ -656,7 +663,7 @@ def addFile(self,file,length): def finished(self): if outputFile_appendSilence: self.addSilence(outputFile_appendSilence,False) outfile_writeBytes(self.o,"\n") # so "tail" has a start of a line - self.commands.append("C 100;echo 1>&2;exit") + self.commands.append("C 100;echo >&2;exit") for c in self.commands: outfile_writeBytes(self.o,c+"\n") outfile_writeBytes(self.o,"tail -%d \"$S\" | bash\n" % (len(self.commands)+1)) if not write_to_stdout: diff --git a/src/synth.py b/src/synth.py index 3177631..20fa9a1 100644 --- a/src/synth.py +++ b/src/synth.py @@ -659,7 +659,7 @@ def transliterate_multiple(self,lang,textList,forPartials=1,keepIndexList=0): if int0: if int0 > thisgroup_max_priority: thisgroup_max_priority = int0 - if lWords[-1]=="[_^_]": thisgroup_enWord_priority = int0 # so far it looks like this is going to be an English word + if lWords[-1]==B("[_^_]"): thisgroup_enWord_priority = int0 # so far it looks like this is going to be an English word else: # a split between the groups if thisgroup_enWord_priority == thisgroup_max_priority: # the choice with the highest priority was the one containing the [_^_] to put the word into English en_words[r[-1]]=1 @@ -672,6 +672,7 @@ def transliterate_multiple(self,lang,textList,forPartials=1,keepIndexList=0): foundLetter=0 if l.startswith(B("Translate ")): toAppend=l[l.index(B("'"))+1:-1].replace(LB("\xc3\xbc"),B("v")) + if toAppend==LB("\xc2\xa0"): continue # stray no-break space (don't let this interfere with being able to do partials) if not (checkIn(toAppend,en_words) and r and toAppend==r[-1]): # TODO what about partial English words? e.g. try "kao3 testing" - translate 'testing' results in a translate of 'test' also (which assumes it's already in en mode), resulting in a spurious word "test" added to the text box; not sure how to pick this up without parsing the original text and comparing with the Replace rules that occurred r.append(toAppend) @@ -931,19 +932,43 @@ def guess_length(self,lang,text): return quickGuess(len(text),12) # TODO need a if oss_sound_device: def play(self,lang,text): if not self.theProcess: self.startProcess() - self.theProcess.write("(Parameter.set 'Audio_Command \"play --device=%s \$FILE vol %.1f\")\n(tts_text \"%s\" nil)\n" % (oss_sound_device,5*soundVolume,text)) # (tts_text text nil) can be better than (SayText text) because it splits into multiple utterances if necessary + self.theProcess.write("(Parameter.set 'Audio_Command \"play --device=%s \\$FILE vol %.1f\")\n(tts_text \"%s\" nil)\n" % (oss_sound_device,5*soundVolume,text)) # (tts_text text nil) can be better than (SayText text) because it splits into multiple utterances if necessary self.theProcess.flush() # else send it via a file, because we haven't got code to give it to play to the other devices directly def makefile(self,lang,text): if not self.theProcess: self.startProcess() fname = os.tempnam()+dotwav - self.theProcess.write("(Parameter.set 'Audio_Command \"sox \$FILE %s vol 5\")\n(SayText \"%s\")\n" % (fname,text)) + self.theProcess.write("(Parameter.set 'Audio_Command \"sox \\$FILE %s vol 5\")\n(SayText \"%s\")\n" % (fname,text)) self.theProcess.flush() return fname def finish_makefile(self): if self.theProcess: self.theProcess.close() self.theProcess = None +class CoquiSynth(Synth): + def __init__(self): + Synth.__init__(self) + self.synths = {} + def works_on_this_platform(self): + if not unix: return 0 # I'm unable to test elsewhere + self.base = os.environ.get("HOME","")+"/.local/share/tts" + return isDirectory(self.base) # Voices require large downloads the first time they are used, so we'll use only already-downloaded voices + def supports_language(self,lang): return any(a.startswith("tts_models--"+lang+"-") for a in os.listdir(self.base)) # TODO: might not want to use all downloaded models, or might not want to use for all input types (e.g. zh does not support pinyin) + def guess_length(self,lang,text): return quickGuess(len(text),6 if lang in ["zh"] else 12) # need better estimate + def makefile(self,lang,text): + text = ensure_unicode(text) + if lang=="zh": text += u"\u3002" # otherwise that model can glitch and repeat the last word of the phrase + if not lang in self.synths: + import torch;from TTS.api import TTS # shouldn't fault if models are downloaded to ~/.local/share/tts (unless uninstalled and not cleaned up...) + # We can assume Python 3 by this point, but must still use syntax compatible with Python 2 + for a in sorted(os.listdir(self.base)): + if a.startswith("tts_models--"+lang+"-"): + self.synths[lang]=TTS(a.replace("--","/")).to(cond(torch.cuda.is_available(),"cuda","cpu")) + break + fname = os.tempnam()+dotwav + self.synths[lang].tts_to_file(text,file_path=fname) + return fname + class GeneralSynth(Synth): def __init__(self): Synth.__init__(self) def supports_language(self,lang): @@ -990,7 +1015,7 @@ def makefile(self,lang,text): all_synth_classes.append(OSXSynth_Say) all_synth_classes.append(OSXSynth_OSAScript) # (prefer _Say if >=10.3 because it's faster) elif s.lower()=="sapi": all_synth_classes.append(PttsSynth) -all_synth_classes += [FestivalSynth,FliteSynth,OldRiscosSynth,S60Synth,AndroidSynth] +all_synth_classes += [CoquiSynth,FestivalSynth,FliteSynth,OldRiscosSynth,S60Synth,AndroidSynth] prefer_espeak = prefer_espeak.split() viable_synths = [] @@ -1162,7 +1187,7 @@ def abspath_from_start(p): # for just_synthesize to check for paths relative to os.chdir(d) return r -def just_synthesize(callSanityCheck=0,lastLang_override=None): +def just_synthesize(callGeneralCheck=0,lastLang_override=None): # Handle the justSynthesize setting (see advanced.txt) global startAnnouncement,endAnnouncement,logFile,synth_partials_cache synth_partials_cache = {} # to stop 'memory leak' when running from the GUI @@ -1197,7 +1222,7 @@ def checkCanSynth(fname): r = repr(l[0]) if r[:1]=="b": r=r[1:] show_warning("Assuming that %s is a word to synthesize in language '%s'" % (r,lastLanguage)) - if callSanityCheck and sanityCheck(l[0],lastLanguage,1): return + if callGeneralCheck and generalCheck(l[0],lastLanguage,1): return event = checkCanSynth("!synth:"+S(l[0])+"_"+S(lastLanguage)) if not event: continue # couldn't synth called_synth = 1 @@ -1217,10 +1242,10 @@ def checkCanSynth(fname): lastLanguage=lang ; continue # otherwise, user might have omitted lang by mistake show_warning("Assuming %s was meant to be synthesized in language '%s'" % (cond(B('#') in B(justSynthesize) or len(repr(line))<10,"that '"+repr(line)+"'","this line"),lastLanguage)) - if callSanityCheck and sanityCheck(line,lastLanguage,1): return + if callGeneralCheck and generalCheck(line,lastLanguage,1): return event = checkCanSynth("!synth:"+S(line)+"_"+S(lastLanguage)) else: - if callSanityCheck and sanityCheck(text,lang,1): return + if callGeneralCheck and generalCheck(text,lang,1): return event = checkCanSynth(fname) lastLanguage = lang if not event: continue diff --git a/src/system.py b/src/system.py index ec983c2..eb90715 100644 --- a/src/system.py +++ b/src/system.py @@ -63,7 +63,7 @@ class ShellExecuteInfo(ctypes.Structure): _fields_ = [("cbSize",wintypes.DWORD), try: ctypes.cdll.commdlg except: WMstandard = True -if macsound and __name__=="__main__": os.system("clear 1>&2") # so warnings etc start with a clear terminal (1>&2 just in case using stdout for something else) +if macsound and __name__=="__main__": os.system("clear >&2") # so warnings etc start with a clear terminal (>&2 just in case using stdout for something else) if riscos_sound: sys.stderr.write("Loading Gradint...\n") # in case it takes a while try: import androidhelper as android @@ -224,7 +224,7 @@ def wspstrip(s): # directory should be OK by now if sys.platform.find("ymbian")>-1: sys.path.insert(0,os.getcwd()+os.sep+"lib") -import time,sched,sndhdr,random,math,pprint,codecs +import time,sched,random,math,pprint,codecs def exc_info(inGradint=True): import sys # in case it's been gc'd diff --git a/src/top.py b/src/top.py index 3f88721..2f44760 100644 --- a/src/top.py +++ b/src/top.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # (Python 2 or Python 3, but more fully tested on 2) -program_name = "gradint v3.075 (c) 2002-23 Silas S. Brown. GPL v3+." +program_name = "gradint v3.091 (c) 2002-24 Silas S. Brown. GPL v3+." # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,7 +32,9 @@ def sort(l,c): l.sort(key=cmp_to_key(c)) def chr(x): return unichr(x).encode('latin1') from subprocess import getoutput popenRB,popenWB = "r","w" - def unicode(b,enc): return b.decode(enc) + def unicode(b,enc): + if type(b)==str: return b + return b.decode(enc) else: # Python 2 def sort(l,c): l.sort(c) popenRB,popenWB = "rb","wb" @@ -43,11 +45,11 @@ def sort(l,c): l.sort(c) try: True except: exec("True = 1 ; False = 0") def readB(f,m=None): - if hasattr(f,"buffer"): f=f.buffer # Python 3 non-"b" file + if hasattr(f,"buffer"): f0,f=f,f.buffer # Python 3 non-"b" file if m: return f.read(m) else: return f.read() # no "None" in Python 2 def writeB(f,b): - if hasattr(f,"buffer"): f=f.buffer # Python 3 non-"b" file + if hasattr(f,"buffer"): f0,f=f,f.buffer # Python 3 non-"b" file f.write(b) def B(x): if type(x)==bytes: return x diff --git a/thindown.py b/thindown.py old mode 100644 new mode 100755 index 1c0c92a..05737ef --- a/thindown.py +++ b/thindown.py @@ -1,3 +1,9 @@ +#!/usr/bin/env python +# (works on either Python 2 or Python 3) + +# program to "thin down" the gradint .py for low memory environments +# by taking out some of the code that's unused on that platform + # This file is part of the source code of Gradint # (c) Silas S. Brown. # This program is free software; you can redistribute it and/or modify @@ -8,11 +14,6 @@ # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -#!/usr/bin/env python -# (works on either Python 2 or Python 3) - -# program to "thin down" the gradint .py for low memory environments -# by taking out some of the code that's unused on that platform import sys, re @@ -230,7 +231,67 @@ to_omit = desktop_only + S60_only + android_only + android_or_S60 + not_winCE + riscos_only + mac_only elif "core" in sys.argv: # experimental "core code only" for 'minimal embedded porting' starting point (no UI, no synth, limited file I/O; you'll probably have to load up the event data yourself) version = "core" - to_omit = tk_only + not_S60_or_android + not_android + riscos_only + mac_only + desktop_only + winCE_only + S60_only + android_only + android_or_S60 + ["def main():","def rest_of_main():",'if __name__=="__main__":',"def transliterates_differently(text,lang):","def primitive_synthloop():","def appendVocabFileInRightLanguages():",'def delOrReplace(L2toDel,L1toDel,newL2,newL1,action="delete"):',"def sanityCheck(text,language,pauseOnError=0):","def localise(s):","def singular(number,s):","def readText(l):","def asUnicode(x):","def updateSettingsFile(fname,newVals):","def clearScreen():","def startBrowser(url):",'def getYN(msg,defaultIfEof="n"):',"def waitOnMessage(msg):","def interrupt_instructions():","def parseSynthVocab(fname,forGUI=0):","def scanSamples_inner(directory,retVal,doLimit):","def getLsDic(directory):","def check_has_variants(directory,ls):","def exec_in_a_func(x):","def scanSamples(directory=None):","def synth_from_partials(text,lang,voice=None,isStart=1):","def partials_langname(lang):","if partialsDirectory and isDirectory(partialsDirectory):",'for zipToCheck in ["yali-voice","yali-lower","cameron-voice"]:','def stripPuncEtc(text):','def can_be_synthesized(fname,dirBase=None,lang=None):','def synthcache_lookup(fname,dirBase=None,printErrors=0,justQueryCache=0,lang=None):','def textof(fname):','if synthCache and transTbl in synthCache_contents:','if synthCache:','class Partials_Synth(Synth):','def abspath_from_start(p):','class SynthEvent(Event):','def pinyin_uColon_to_V(pinyin):','def synth_event(language,text,is_prompt=0):','def get_synth_if_possible(language,warn=1,to_transliterate=False):','if wavPlayer_override or (unix and not macsound and not (oss_sound_device=="/dev/sound/dsp" or oss_sound_device=="/dev/dsp")):','def fix_compatibility(utext):','def read_chinese_number(num):','def preprocess_chinese_numbers(utext,isCant=0):','def intor0(v):','def fix_pinyin(pinyin,en_words):','def fix_commas(text):','def shell_escape(text):','class SimpleZhTransliterator(object):','def sort_out_pinyin_3rd_tones(pinyin):','def ensure_unicode(text):','def unzip_and_delete(f,specificFiles="",ignore_fail=0):','class Synth(object):','def quickGuess(letters,lettersPerSec):',"def changeToDirOf(file,winsound_also=0):",'if app or appuifw or android:','def subst_some_synth_for_synthcache(events):','def decide_subst_synth(cache_fname):','if winsound or winCEsound or mingw32 or riscos_sound or not hasattr(os,"tempnam") or android:','if len(sys.argv)>1:','def readSettings(f):','def exc_info(inGradint=True):','if not fileExists(configFiles[0]):','def u8strip(d):',] + to_omit = tk_only + not_S60_or_android + not_android + riscos_only + mac_only + desktop_only + winCE_only + S60_only + android_only + android_or_S60 + [ +"def main():", +"def rest_of_main():", +'if __name__=="__main__":', +"def transliterates_differently(text,lang):", +"def primitive_synthloop():", +"def appendVocabFileInRightLanguages():", +'def delOrReplace(L2toDel,L1toDel,newL2,newL1,action="delete"):', +"def generalCheck(text,language,pauseOnError=0):", +"def localise(s):", +"def singular(number,s):", +"def readText(l):", +"def asUnicode(x):", +"def updateSettingsFile(fname,newVals):", +"def clearScreen():", +"def startBrowser(url):",'def getYN(msg,defaultIfEof="n"):',"def waitOnMessage(msg):", +"def interrupt_instructions():", +"def parseSynthVocab(fname,forGUI=0):", +"def scanSamples_inner(directory,retVal,doLimit):", +"def getLsDic(directory):", +"def check_has_variants(directory,ls):", +"def exec_in_a_func(x):", +"def scanSamples(directory=None):", +"def synth_from_partials(text,lang,voice=None,isStart=1):", +"def partials_langname(lang):", +"if partialsDirectory and isDirectory(partialsDirectory):", +'for zipToCheck in ["yali-voice","yali-lower","cameron-voice"]:', +'def stripPuncEtc(text):', +'def can_be_synthesized(fname,dirBase=None,lang=None):', +'def synthcache_lookup(fname,dirBase=None,printErrors=0,justQueryCache=0,lang=None):', +'def textof(fname):', +'if synthCache and transTbl in synthCache_contents:', +'if synthCache:', +'class Partials_Synth(Synth):', +'def abspath_from_start(p):', +'class SynthEvent(Event):', +'def pinyin_uColon_to_V(pinyin):', +'def synth_event(language,text,is_prompt=0):', +'def get_synth_if_possible(language,warn=1,to_transliterate=False):', +'if wavPlayer_override or (unix and not macsound and not (oss_sound_device=="/dev/sound/dsp" or oss_sound_device=="/dev/dsp")):', +'def fix_compatibility(utext):', +'def read_chinese_number(num):', +'def preprocess_chinese_numbers(utext,isCant=0):', +'def intor0(v):', +'def fix_pinyin(pinyin,en_words):', +'def fix_commas(text):', +'def shell_escape(text):', +'class SimpleZhTransliterator(object):', +'def sort_out_pinyin_3rd_tones(pinyin):', +'def ensure_unicode(text):', +'def unzip_and_delete(f,specificFiles="",ignore_fail=0):', +'class Synth(object):', +'def quickGuess(letters,lettersPerSec):',"def changeToDirOf(file,winsound_also=0):",'if app or appuifw or android:', +'def subst_some_synth_for_synthcache(events):', +'def decide_subst_synth(cache_fname):', +'if winsound or winCEsound or mingw32 or riscos_sound or not hasattr(os,"tempnam") or android:', +'if len(sys.argv)>1:', +'def readSettings(f):', +'def exc_info(inGradint=True):', +'if not fileExists(configFiles[0]):', +'def u8strip(d):'] else: assert 0, "Unrecognised version on command line" revertToIndent = lastIndentLevel = indentLevel = -1