Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to upstream #2

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4431ec4
preserve Cantonese hyphenation
ssb22 Mar 18, 2023
b391a56
clarify comment
ssb22 Jul 1, 2023
bc7528d
Sidney Lau aa->a rule was wrong (typo in the example I used as a guide)
ssb22 Jul 9, 2023
d451c6c
Sidney Lau also fix in Java
ssb22 Jul 10, 2023
18ff713
fix Cantonese transliteration duplicating pre-included jyutping
ssb22 Sep 21, 2023
e0fdf29
properly fix last commit: it was a Python 3 compatibility issue
ssb22 Sep 22, 2023
babc589
this was also supposed to be in last commit
ssb22 Sep 22, 2023
3260b95
fix Cantonese Mandarin-driven override table (bian/pian was swapped)
ssb22 Sep 24, 2023
f8e9998
Cantonese readings from shen/shi
ssb22 Dec 3, 2023
c42c892
web: try harder to stop users accidentally playing two lessons at once
ssb22 Dec 5, 2023
0bd9f2b
server avoid spurious settings files; update identifier wording
ssb22 Dec 6, 2023
6b0896d
Mac: fix launcher for macOS 14
ssb22 Dec 10, 2023
706d688
fix occasional Python 3 crash
ssb22 Dec 12, 2023
61f718c
fix Mac afplay partials-synth on Python 3
ssb22 Dec 19, 2023
08fd2de
Rasbperry Pi OS 12 Bluetooth settings
ssb22 Jan 2, 2024
3ff3eea
player does not need to start pulseaudio on startup
ssb22 Jan 14, 2024
6491bd3
player RPi12 comment
ssb22 Jan 16, 2024
be2aebf
Cantonese handle emoji pass-through on Python 2
ssb22 Jan 21, 2024
1f96861
server script modernisations
ssb22 Mar 23, 2024
f9e8f3c
fix write to closed file on transliteration cache in Python 3 + ready…
ssb22 Jun 9, 2024
c4d0dba
fix executable flag
ssb22 Jun 22, 2024
af62f7e
cgi: zoom control; script variants; lesson timer; error localisations…
ssb22 Jul 5, 2024
7fe0510
idiomatic Cantonese hanzi prompts that eSpeak zh-yue can transliterate
ssb22 Jul 10, 2024
c489f51
cgi: big-print layout tweaks + nginx/fcgiwrap compatibility + fix Pyt…
ssb22 Jul 15, 2024
70d28fc
fix Python 3 compatibility issue in Unicode progress files
ssb22 Jul 20, 2024
033aa83
experimental support for Coqui speech synthesis on GNU/Linux
ssb22 Dec 5, 2024
122ea17
fix Coqui support + Python 2 compatibility
ssb22 Dec 6, 2024
5f792aa
fix PATH for some recent Macs; server support HTTP Range (improves iO…
ssb22 Dec 26, 2024
bf7912a
minor Mac warning simplification
ssb22 Dec 28, 2024
70c0e93
omit pycache from gradint-build.7z
ssb22 Dec 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*~
__pycache__
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,14 @@ publish: $(All_Versions) gradint.py
grep ^program_name < src/top.py|head -1|sed -e 's/.*radint v/v/' -e 's/ .*/./' > ~/homepage/public/gradint/latest-version.txt
make clean
~/homepage/update
ssh st0rage "cd eGuidedog/ssb22/gradint; screen -d -m /bin/bash -c 'sleep 60;. build-sync.sh'"

gradint-build.7z:
mkdir /tmp/gradint-build00
cp -r * /tmp/gradint-build00
rm -r /tmp/gradint-build00/LICENSE /tmp/gradint-build00/README.md /tmp/gradint-build00/charlearn
mv /tmp/gradint-build00 gradint
cd gradint ; make clean ; rm -rf extras ; cd ..
make -C gradint clean
rm -rf gradint/extras
7za a gradint-build.7z gradint/
rm -rf gradint

Expand All @@ -266,6 +266,7 @@ CD: $(Mac_Files) gradint.zip
echo;echo;echo "Made CD directory. Can add gradint/samples, gradint/vocab.txt, gradint/espeak for Windows, gradint/espeak-.. for OSX, sox Win/Mac binaries, oggenc or whatever for Windows, etc."

cleanup:
rm -f `find . -type f -name '*~' -o -name '*.pyc' -o -name DEADJOE`
find . -type f '(' -name '*~' -o -name '*.pyc' -o -name DEADJOE ')' -exec rm -vf '{}' ';'
rm -rvf __pycache__ # must be separate from find, as some find implementations exec before trying to descend and then error
clean: cleanup
rm -f gradint.py $(All_Versions) src/defaults.py gradint-installer.command gradint.dmg
rm -rf gradint.py $(All_Versions) src/defaults.py gradint-installer.command gradint.dmg
27 changes: 22 additions & 5 deletions advanced.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ otherLanguages = ["cant","ko","jp"]
# able to tell the difference between cant_en.wav and an
# ordinary English prompt and might use it wrongly.

possible_otherLanguages = ["cant","ko","jp","en","zh"]
possible_otherLanguages = ["cant","ko","jp","en","zh",
"zhy","zh-yue"]

# You can also fill in otherFirstLanguages below
# (using the same ["item","item"] format) to
Expand Down Expand Up @@ -95,7 +96,7 @@ prefer_espeak = "en"
# "zh" for Zhongwen (Mandarin).
# - You can improve eSpeak's English by installing
# Festival's dictionary and using lexconvert to convert
# it, see http://ssb22.user.srcf.net/gradint/lexconvert.html
# it, see http://ssb22.user.srcf.net/lexconvert/
# (this has already been done in the bundled version).
# - eSpeak is not very natural-sounding, but it is very
# clear and accurate in English and some other languages
Expand Down Expand Up @@ -201,7 +202,7 @@ systemVoice = "en"
# - Festival Lite on Windows (if all else fails) :
# put flite.exe in the gradint folder
#
# - Linux: install Festival, or flite if you want a US accent
# - GNU/Linux: install Festival, or flite for US accent
#
# - S60: the phone's built-in speech can be used
#
Expand All @@ -211,6 +212,22 @@ systemVoice = "en"
# older "Speech!" utility. These can be used only for
# playing in real-time, not for generating files.

# Coqui voices are experimentally supported on GNU/Linux.
# Setup: pip install coqui-tts[server,zh,ja,ko]
# Then download the voices you want, e.g.:
# from TTS.api import TTS;langs = {}
# for m in TTS().list_models(): langs.setdefault(m.split('/')[1].split('-')[0],[]).append(m)
#
# TTS(langs["zh"][0])
# TTS('tts_models/en/jenny/jenny')
# (If any model crashes during download, be sure to delete the
# result from ~/.local/share/tts before running Gradint. For
# example vocoder_models--ja--kokoro--hifigan_v1 may crash.
# I did say support for these voices is experimental.)
# Gradint detects voices that have been downloaded
# (but prefer_espeak overrides this). The Chinese
# voice does NOT support pinyin.

# You can also set extra_speech to a list of
# (language prefix, command), for example:
# extra_speech=[ ("la","say-latvian"),("de","say-german") ]
Expand Down Expand Up @@ -350,7 +367,7 @@ lily_file = "C:\\Program Files\\NeoSpeech\\Lily16\\data-common\\userdict\\userdi
# somewhere under C:\Program Files\VW\VT\Lily\M16-SAPI5\lib\
# but I don't know exactly)

# If you want to use SAPI under WINE in Linux
# If you want to use SAPI under WINE in GNU/Linux
# then you can set ptts_program:
ptts_program = None
# (hint: run winecfg and set Windows version to Millenium (ME)
Expand Down Expand Up @@ -759,7 +776,7 @@ gui_output_directory = "output"
# in which case the first directory that EXISTS will be used
# (or the last one on the list if all else fail).
# Useful if the directory to your MP3 player only appears when
# it's plugged in for example. With Linux automounters you can
# it's plugged in for example. With GNU/Linux automounters
# set "/media/*" as one of the directories, and it will expand to
# whatever removable device is mounted IF there is only one.

Expand Down
1 change: 1 addition & 0 deletions hanzi-prompts/begin_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
開頭
1 change: 1 addition & 0 deletions hanzi-prompts/end_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
今日個堂上完啦
1 change: 1 addition & 0 deletions hanzi-prompts/longpause_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
而家我哋要等一陣,然後翻溫。喺第一課我哋仲未學習好多嘅詞語,所以停頓會比較長,但係喺未來嘅課程,我哋唔會有咁長嘅停頓
1 change: 1 addition & 0 deletions hanzi-prompts/meaningis_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
意思係
1 change: 1 addition & 0 deletions hanzi-prompts/nowPleaseSay_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
而家請講
1 change: 1 addition & 0 deletions hanzi-prompts/pleaseSay_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
請講
1 change: 1 addition & 0 deletions hanzi-prompts/repeatAfterMe_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
請跟住講
1 change: 1 addition & 0 deletions hanzi-prompts/sayAgain_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
再講一次
1 change: 1 addition & 0 deletions hanzi-prompts/tryToSay_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
試吓講
1 change: 1 addition & 0 deletions hanzi-prompts/whatSay_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
點講
1 change: 1 addition & 0 deletions hanzi-prompts/whatmean_zh-yue.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
乜嘢意思?
1 change: 1 addition & 0 deletions hanzi-prompts/whatmean_zh-yue_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
係乜嘢意思?
1 change: 1 addition & 0 deletions hanzi-prompts/whatmean_zh-yue_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
乜嘢意思呢?
3 changes: 2 additions & 1 deletion mac/start-gradint.app/Contents/MacOS/start-gradint
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash
export PATH="$PATH:/usr/local/bin" # in case lame etc is there
export PATH="/usr/local/bin:$PATH" # for python3 override + in case lame etc is there
cd "${BASH_SOURCE%/*}/../.." # needed on macOS 14, possibly 13
if sw_vers 2>/dev/null|grep ^ProductVersion.*1[2-9]; then # macOS 12+
if test $(python3 -c 'import tkinter,sys;print(sys.version_info[:3]>=(3,10,1))' 2>/dev/null) = "True"; then exec python3 gradint.py; fi
osascript -e "tell application (path to frontmost application as text) to display dialog \"macOS 12 bundled a broken version of the GUI libraries: please install Python 3 from python.org before running Gradint\" buttons {\"OK\"} with icon stop"
Expand Down
Empty file modified samples/utils/autosplit.py
100644 → 100755
Empty file.
Empty file modified samples/utils/cache-synth.py
100644 → 100755
Empty file.
Empty file modified samples/utils/cleanup-cache.py
100644 → 100755
Empty file.
Empty file modified samples/utils/diagram.py
100644 → 100755
Empty file.
Empty file modified samples/utils/list-synth.py
100644 → 100755
Empty file.
Empty file modified samples/utils/list2cache.py
100644 → 100755
Empty file.
Empty file modified samples/utils/manual-splitter.py
100644 → 100755
Empty file.
11 changes: 6 additions & 5 deletions samples/utils/player.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# (should work in both Python 2 and Python 3)

# Simple sound-playing server v1.56
# Simple sound-playing server v1.58
# Silas S. Brown - public domain - no warranty

# connect to port 8124 (assumes behind firewall)
Expand All @@ -13,8 +13,9 @@

import socket, select, os, sys, os.path, time, re
for a in sys.argv[1:]:
if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with Raspbian 11; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl). Send Eth=(bluetooth Ethernet addr) to start. Note that the setup command reboots the system.
os.system('if [ -e /etc/xdg/lxsession/LXDE-pi/autostart ]; then mkdir -p /home/pi/.config/lxsession/LXDE-pi && cp /etc/xdg/lxsession/LXDE-pi/autostart /home/pi/.config/lxsession/LXDE-pi/ && echo sudo ethtool --set-eee eth0 eee off >> /home/pi/.config/lxsession/LXDE-pi/autostart && echo python '+os.path.join(os.getcwd(),sys.argv[0])+' >> /home/pi/.config/lxsession/LXDE-pi/autostart; else (echo "[Unit]";echo "Descrption=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart='+os.path.join(os.getcwd(),sys.argv[0])+'";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && awk '+"'"+'// {print} /^import / {print "os.system('+"'"+'"'+"'"+'"'+"'"+'pulseaudio --start'+"'"+'"'+"'"+'"'+"'"+')"}'+"'"+' < '+sys.argv[0]+' > .playerTMP && mv .playerTMP '+sys.argv[0]+'; fi && sudo "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a pi && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400)
if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with OS versions 11 and 12; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl). Send Eth=(bluetooth Ethernet addr) to start. Note that the setup command reboots the system.
# NOTE: If running on Pi with OS 12 and you've also done "raspi-config" to set things back to PulseAudio (as needed for example for language-synchronised Bluetooth playing in http://ssb22.user.srcf.net/s60/video.html notes), you might need to replace 'ExecStart=' with 'ExecStart=bash -c "while ! ssh localhost true; do sleep 1; done; ssh localhost ' below (and add a " at end of line), and do an ssh-keygen and add to authorized_keys, so player is run in a separate session from systemd (even though the user is the same; it's not clear why this is needed)
os.system('(echo "[Unit]";echo "Description=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart='+os.path.join(os.getcwd(),sys.argv[0])+'";echo "WorkingDirectory='+os.path.getcwd()+'";echo User="$(whoami)";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && sudo bash -c "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a $USER && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400)
elif a=="--aplay": use_aplay = True # aplay and madplay, for older embedded devices, NOT tested together with --rpi-bluetooth-* above
elif a.startswith("--delegate="): delegate_to_check=a.split('=')[1] # will ping that IP and delegate all sound to it when it's up. E.g. if it has better amplification but it's not always switched on.
elif a.startswith("--chime="): chime_mp3=a.split('=')[1] # if clock bell desired, e.g. echo '$i-14vfff$c48o0l1b- @'|mwr2ly > chime.ly && lilypond chime.ly && timidity -Ow chime.midi && audacity chime.wav (amplify + trim) + mp3-encode (keep default 44100 sample rate so ~38 frames per sec). Not designed to work with --delegate. Pi1's 3.5mm o/p doesn't sound very good with this bell.
Expand Down Expand Up @@ -69,9 +70,9 @@
continue
elif d=='QUIT':
s.close() ; break
elif d=="Eth=": # Eth=ethernet address, to connect via Bluetooth, tested on Raspberry Pi 400 with Raspbian 11
elif d=="Eth=": # Eth=ethernet address to connect via Bluetooth (see --rpi-bluetooth-setup above)
eth = S(c.recv(17))
assert re.match("^[A-Fa-f0-9:]*$",eth)
assert re.match("^[A-Fa-f0-9:]+$",eth)
os.system("M=/dev/null;E="+eth+";if ! pacmd list-sinks | grep "+eth.replace(":","_")+" >$M; then while true; do bluetoothctl --timeout 1 disconnect | grep Missing >$M||sleep 5;T=5;while ! bluetoothctl --timeout $T connect $E | tee $M | egrep \"Connection successful|Device $E Connected: yes\"; do sleep 5; T=10;M=/dev/stderr;bluetoothctl --timeout 1 devices;echo Retrying $E; done ; Got=0; for Try in 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z; do if pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then Got=1; break; fi; sleep 1; done; if [ $Got = 1 ] ; then break; fi; done; fi; pacmd set-default-sink bluez_sink."+eth.replace(":","_")+".a2dp_sink") # ; play /usr/share/scratch/Media/Sounds/Animal/Dog1.wav # (not really necessary if using 'close the socket' to signal we're ready)
c.close() ; continue
elif d=="Eth0":
Expand Down
Empty file modified samples/utils/recover-unavail.py
100644 → 100755
Empty file.
Empty file modified samples/utils/synth-batchconvert-helper.py
100644 → 100755
Empty file.
Empty file modified samples/utils/trace.py
100644 → 100755
Empty file.
Empty file modified samples/utils/transliterate.py
100644 → 100755
Empty file.
23 changes: 15 additions & 8 deletions server/cantonese.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# cantonese.py - Python functions for processing Cantonese transliterations
# (uses eSpeak and Gradint for help with some of them)

# v1.42 (c) 2013-15,2017-23 Silas S. Brown. License: GPL
# v1.48 (c) 2013-15,2017-24 Silas S. Brown. License: GPL

cache = {} # to avoid repeated eSpeak runs,
# zi -> jyutping or (pinyin,) -> translit
Expand Down Expand Up @@ -64,7 +64,7 @@ def hanzi_only(unitext): return u"".join(filter(lambda x:0x4e00<=ord(x)<0xa700 o
def py2nums(pinyin):
if not type(pinyin)==type(u""):
pinyin = pinyin.decode('utf-8')
assert pinyin.strip(), "blank pinyin" # saves figuring out a findall TypeError
if not pinyin.strip(): return ""
global pinyin_dryrun
if pinyin_dryrun:
pinyin_dryrun = list(pinyin_dryrun)
Expand All @@ -91,7 +91,7 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
i = 0 ; tones = re.finditer('[1-7]',jyutping) ; j2 = []
for h,p in zip(list(hanzi),pinyin):
try: j = getNext(tones).end()
except StopIteration: return jyutping # one of the zin has no Cantonese reading, which we'll pick up later on "failed to fix"
except StopIteration: return jyutping # one of the hanzi has no Cantonese reading in our data: we'll warn "failed to fix" below
j2.append(jyutping[i:j]) ; i = j
if h in py2j and p.lower() in py2j[h]: j2[-1]=j2[-1][:re.search("[A-Za-z]*[1-7]$",j2[-1]).start()]+py2j[h][p.lower()]
return "".join(j2)+jyutping[i:]
Expand All @@ -100,8 +100,9 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
u"\u4E3A\u70BA":{"wei2":"wai4","wei4":"wai6"},
u"\u4E50\u6A02":{"le4":"lok6","yue4":"ngok6"},
u"\u4EB2\u89AA":{"qin1":"can1","qing4":"can3"},
u"\u4EC0":{"shen2":"sam6","shi2":"sap6"}, # unless zaap6
u"\u4F20\u50B3":{"chuan2":"cyun4","zhuan4":"zyun6"},
u"\u4FBF":{"bian4":"pin4","pian2":"bin6"},
u"\u4FBF":{"bian4":"bin6","pian2":"pin4"},
u"\u5047":{"jia3":"gaa2","jia4":"gaa3"},
u"\u5174\u8208":{"xing1":"hing1","xing4":"hing3"},
# u"\u5207":{"qie4":"cai3","qie1":"cit3"}, # WRONG (rm'd v1.17). It's cit3 in re4qie4. It just wasn't in yiqie4 (which zhy_list has as an exception anyway)
Expand Down Expand Up @@ -153,10 +154,10 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
def jyutping_to_lau(j):
j = S(j).lower().replace("j","y").replace("z","j")
for k,v in jlRep: j=j.replace(k,v)
return j.lower().replace("aa","a").replace("ohek","euk")
return j.lower().replace("ohek","euk")
def jyutping_to_lau_java(jyutpingNo=2,lauNo=1):
# for annogen.py 3.29+ --annotation-postprocess to ship Jyutping and generate Lau at runtime
return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("<rt>(.*?)</rt>").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1&shy;$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("aa","a").replace("ohek","euk").replaceAll("([1-7])","<sup>$1</sup>-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,"<rt>"+r2+"</rt>");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this
return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("<rt>(.*?)</rt>").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1&shy;$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("ohek","euk").replaceAll("([1-7])","<sup>$1</sup>-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,"<rt>"+r2+"</rt>");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this
def incomplete_lau_to_jyutping(l):
# incomplete: assumes Lau didn't do the "aa" -> "a" rule
l = S(l).lower().replace("euk","ohek")
Expand Down Expand Up @@ -236,7 +237,10 @@ def mysub(z,l):
z = re.sub(re.escape(x)+r"(.)",r"\1"+y,z)
return z
if type(u"")==type(""): U=str # Python 3
else: U=unicode # Python 2
else: # Python 2
def U(x):
try: return x.decode('utf-8') # might be an emoji pass-through
except: return x # already Unicode
return unicodedata.normalize('NFC',mysub(U(jyutping_to_yale_TeX(j).replace(r"\i{}","i").replace(r"\I{}","I")),[(r"\`",u"\u0300"),(r"\'",u"\u0301"),(r"\=",u"\u0304")])).encode('utf-8')

def superscript_digits_TeX(j):
Expand Down Expand Up @@ -291,6 +295,9 @@ def songSubst(l):
pinyin = pinyin.decode('utf-8')
if pinyin and not (pinyin,) in cache:
pinyin_dryrun.add(pinyin)
for w in pinyin.split():
for h in w.split('-'):
pinyin_dryrun.add(h)
dryrun_mode = False
for l in lines:
if '#' in l: l,pinyin = l.split('#')
Expand All @@ -300,7 +307,7 @@ def songSubst(l):
elif pinyin:
jyutping = adjust_jyutping_for_pinyin(l,jyutping,pinyin)
groupLens = [0]
for syl,space in re.findall('([A-Za-z]*[1-5])( *)',py2nums(pinyin)):
for syl,space in re.findall('([A-Za-z]*[1-5])( *)',' '.join('-'.join(py2nums(h) for h in w.split('-')) for w in pinyin.split())): # doing it this way so we're not relying on espeak transliterate_multiple to preserve spacing and hyphenation
groupLens[-1] += 1
if space: groupLens.append(0)
if not groupLens[-1]: groupLens=groupLens[:-1]
Expand Down
Loading