project-renard-survey · ssb22 · Mar 18, 2023 · Jul 1, 2023 · Jul 9, 2023 · Jul 10, 2023
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 *~
+__pycache__
diff --git a/Makefile b/Makefile
@@ -232,14 +232,14 @@ publish: $(All_Versions) gradint.py
 	grep ^program_name < src/top.py|head -1|sed -e 's/.*radint v/v/' -e 's/ .*/./' > ~/homepage/public/gradint/latest-version.txt
 	make clean
 	~/homepage/update
-	ssh st0rage "cd eGuidedog/ssb22/gradint; screen -d -m /bin/bash -c 'sleep 60;. build-sync.sh'"
 
 gradint-build.7z:
 	mkdir /tmp/gradint-build00
 	cp -r * /tmp/gradint-build00
 	rm -r /tmp/gradint-build00/LICENSE /tmp/gradint-build00/README.md /tmp/gradint-build00/charlearn
 	mv /tmp/gradint-build00 gradint
-	cd gradint ; make clean ; rm -rf extras ; cd ..
+	make -C gradint clean
+	rm -rf gradint/extras
 	7za a gradint-build.7z gradint/
 	rm -rf gradint
 
@@ -266,6 +266,7 @@ CD: $(Mac_Files) gradint.zip
 	echo;echo;echo "Made CD directory.  Can add gradint/samples, gradint/vocab.txt, gradint/espeak for Windows, gradint/espeak-.. for OSX, sox Win/Mac binaries, oggenc or whatever for Windows, etc."
 
 cleanup:
-	rm -f `find . -type f -name '*~' -o -name '*.pyc' -o -name DEADJOE`
+	find . -type f '(' -name '*~' -o -name '*.pyc' -o -name DEADJOE ')' -exec rm -vf '{}' ';'
+	rm -rvf __pycache__ # must be separate from find, as some find implementations exec before trying to descend and then error
 clean: cleanup
-	rm -f gradint.py $(All_Versions) src/defaults.py gradint-installer.command gradint.dmg
+	rm -rf gradint.py $(All_Versions) src/defaults.py gradint-installer.command gradint.dmg
diff --git a/advanced.txt b/advanced.txt
@@ -47,7 +47,8 @@ otherLanguages = ["cant","ko","jp"]
 # able to tell the difference between cant_en.wav and an
 # ordinary English prompt and might use it wrongly.
 
-possible_otherLanguages = ["cant","ko","jp","en","zh"]
+possible_otherLanguages = ["cant","ko","jp","en","zh",
+                           "zhy","zh-yue"]
 
 # You can also fill in otherFirstLanguages below
 # (using the same ["item","item"] format) to
@@ -95,7 +96,7 @@ prefer_espeak = "en"
 # "zh" for Zhongwen (Mandarin).
 #    - You can improve eSpeak's English by installing
 #    Festival's dictionary and using lexconvert to convert
-#    it, see http://ssb22.user.srcf.net/gradint/lexconvert.html
+#    it, see http://ssb22.user.srcf.net/lexconvert/
 #    (this has already been done in the bundled version).
 #    - eSpeak is not very natural-sounding, but it is very
 #    clear and accurate in English and some other languages
@@ -201,7 +202,7 @@ systemVoice = "en"
 #    - Festival Lite on Windows (if all else fails) :
 #      put flite.exe in the gradint folder
 #
-#    - Linux: install Festival, or flite if you want a US accent
+#    - GNU/Linux: install Festival, or flite for US accent
 #
 #    - S60: the phone's built-in speech can be used
 #
@@ -211,6 +212,22 @@ systemVoice = "en"
 #      older "Speech!" utility.  These can be used only for
 #      playing in real-time, not for generating files.
 
+# Coqui voices are experimentally supported on GNU/Linux.
+# Setup: pip install coqui-tts[server,zh,ja,ko]
+# Then download the voices you want, e.g.:
+# from TTS.api import TTS;langs = {}
+# for m in TTS().list_models(): langs.setdefault(m.split('/')[1].split('-')[0],[]).append(m)
+# 
+# TTS(langs["zh"][0])
+# TTS('tts_models/en/jenny/jenny')
+# (If any model crashes during download, be sure to delete the
+# result from ~/.local/share/tts before running Gradint.  For
+# example vocoder_models--ja--kokoro--hifigan_v1 may crash.
+# I did say support for these voices is experimental.)
+# Gradint detects voices that have been downloaded
+# (but prefer_espeak overrides this).  The Chinese
+# voice does NOT support pinyin.
+
 # You can also set extra_speech to a list of
 # (language prefix, command), for example:
 # extra_speech=[ ("la","say-latvian"),("de","say-german") ]
@@ -350,7 +367,7 @@ lily_file = "C:\\Program Files\\NeoSpeech\\Lily16\\data-common\\userdict\\userdi
 # somewhere under C:\Program Files\VW\VT\Lily\M16-SAPI5\lib\
 # but I don't know exactly)
 
-# If you want to use SAPI under WINE in Linux
+# If you want to use SAPI under WINE in GNU/Linux
 # then you can set ptts_program:
 ptts_program = None
 # (hint: run winecfg and set Windows version to Millenium (ME)
@@ -759,7 +776,7 @@ gui_output_directory = "output"
 # in which case the first directory that EXISTS will be used
 # (or the last one on the list if all else fail).
 # Useful if the directory to your MP3 player only appears when
-# it's plugged in for example.  With Linux automounters you can
+# it's plugged in for example.  With GNU/Linux automounters
 # set "/media/*" as one of the directories, and it will expand to
 # whatever removable device is mounted IF there is only one.
 

diff --git a/hanzi-prompts/begin_zh-yue.txt b/hanzi-prompts/begin_zh-yue.txt
@@ -0,0 +1 @@
+開頭
diff --git a/hanzi-prompts/end_zh-yue.txt b/hanzi-prompts/end_zh-yue.txt
@@ -0,0 +1 @@
+今日個堂上完啦
diff --git a/hanzi-prompts/longpause_zh-yue.txt b/hanzi-prompts/longpause_zh-yue.txt
@@ -0,0 +1 @@
+而家我哋要等一陣，然後翻溫。喺第一課我哋仲未學習好多嘅詞語，所以停頓會比較長，但係喺未來嘅課程，我哋唔會有咁長嘅停頓
diff --git a/hanzi-prompts/meaningis_zh-yue.txt b/hanzi-prompts/meaningis_zh-yue.txt
@@ -0,0 +1 @@
+意思係
diff --git a/hanzi-prompts/nowPleaseSay_zh-yue.txt b/hanzi-prompts/nowPleaseSay_zh-yue.txt
@@ -0,0 +1 @@
+而家請講
diff --git a/hanzi-prompts/pleaseSay_zh-yue.txt b/hanzi-prompts/pleaseSay_zh-yue.txt
@@ -0,0 +1 @@
+請講
diff --git a/hanzi-prompts/repeatAfterMe_zh-yue.txt b/hanzi-prompts/repeatAfterMe_zh-yue.txt
@@ -0,0 +1 @@
+請跟住講
diff --git a/hanzi-prompts/sayAgain_zh-yue.txt b/hanzi-prompts/sayAgain_zh-yue.txt
@@ -0,0 +1 @@
+再講一次
diff --git a/hanzi-prompts/tryToSay_zh-yue.txt b/hanzi-prompts/tryToSay_zh-yue.txt
@@ -0,0 +1 @@
+試吓講
diff --git a/hanzi-prompts/whatSay_zh-yue.txt b/hanzi-prompts/whatSay_zh-yue.txt
@@ -0,0 +1 @@
+點講
diff --git a/hanzi-prompts/whatmean_zh-yue.txt b/hanzi-prompts/whatmean_zh-yue.txt
@@ -0,0 +1 @@
+乜嘢意思？
diff --git a/hanzi-prompts/whatmean_zh-yue_2.txt b/hanzi-prompts/whatmean_zh-yue_2.txt
@@ -0,0 +1 @@
+係乜嘢意思？
diff --git a/hanzi-prompts/whatmean_zh-yue_3.txt b/hanzi-prompts/whatmean_zh-yue_3.txt
@@ -0,0 +1 @@
+乜嘢意思呢？
diff --git a/mac/start-gradint.app/Contents/MacOS/start-gradint b/mac/start-gradint.app/Contents/MacOS/start-gradint
@@ -1,5 +1,6 @@
 #!/bin/bash
-export PATH="$PATH:/usr/local/bin" # in case lame etc is there
+export PATH="/usr/local/bin:$PATH" # for python3 override + in case lame etc is there
+cd "${BASH_SOURCE%/*}/../.." # needed on macOS 14, possibly 13
 if sw_vers 2>/dev/null|grep ^ProductVersion.*1[2-9]; then # macOS 12+
   if test $(python3 -c 'import tkinter,sys;print(sys.version_info[:3]>=(3,10,1))' 2>/dev/null) = "True"; then exec python3 gradint.py; fi
   osascript -e "tell application (path to frontmost application as text) to display dialog \"macOS 12 bundled a broken version of the GUI libraries: please install Python 3 from python.org before running Gradint\" buttons {\"OK\"} with icon stop"

diff --git a/samples/utils/autosplit.py b/samples/utils/autosplit.py
diff --git a/samples/utils/cache-synth.py b/samples/utils/cache-synth.py
diff --git a/samples/utils/cleanup-cache.py b/samples/utils/cleanup-cache.py
diff --git a/samples/utils/diagram.py b/samples/utils/diagram.py
diff --git a/samples/utils/list-synth.py b/samples/utils/list-synth.py
diff --git a/samples/utils/list2cache.py b/samples/utils/list2cache.py
diff --git a/samples/utils/manual-splitter.py b/samples/utils/manual-splitter.py
diff --git a/samples/utils/player.py b/samples/utils/player.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # (should work in both Python 2 and Python 3)
 
-# Simple sound-playing server v1.56
+# Simple sound-playing server v1.58
 # Silas S. Brown - public domain - no warranty
 
 # connect to port 8124 (assumes behind firewall)
@@ -13,8 +13,9 @@
 
 import socket, select, os, sys, os.path, time, re
 for a in sys.argv[1:]:
-  if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with Raspbian 11; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl).  Send Eth=(bluetooth Ethernet addr) to start.  Note that the setup command reboots the system.
-    os.system('if [ -e /etc/xdg/lxsession/LXDE-pi/autostart ]; then mkdir -p /home/pi/.config/lxsession/LXDE-pi && cp /etc/xdg/lxsession/LXDE-pi/autostart /home/pi/.config/lxsession/LXDE-pi/ && echo sudo ethtool --set-eee eth0 eee off >> /home/pi/.config/lxsession/LXDE-pi/autostart && echo python '+os.path.join(os.getcwd(),sys.argv[0])+' >> /home/pi/.config/lxsession/LXDE-pi/autostart; else (echo "[Unit]";echo "Descrption=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart='+os.path.join(os.getcwd(),sys.argv[0])+'";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && awk '+"'"+'// {print} /^import / {print "os.system('+"'"+'"'+"'"+'"'+"'"+'pulseaudio --start'+"'"+'"'+"'"+'"'+"'"+')"}'+"'"+' < '+sys.argv[0]+' > .playerTMP && mv .playerTMP '+sys.argv[0]+'; fi && sudo "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a pi && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400)
+  if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with OS versions 11 and 12; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl).  Send Eth=(bluetooth Ethernet addr) to start.  Note that the setup command reboots the system.
+    # NOTE: If running on Pi with OS 12 and you've also done "raspi-config" to set things back to PulseAudio (as needed for example for language-synchronised Bluetooth playing in http://ssb22.user.srcf.net/s60/video.html notes), you might need to replace 'ExecStart=' with 'ExecStart=bash -c "while ! ssh localhost true; do sleep 1; done; ssh localhost ' below (and add a " at end of line), and do an ssh-keygen and add to authorized_keys, so player is run in a separate session from systemd (even though the user is the same; it's not clear why this is needed)
+    os.system('(echo "[Unit]";echo "Description=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart='+os.path.join(os.getcwd(),sys.argv[0])+'";echo "WorkingDirectory='+os.path.getcwd()+'";echo User="$(whoami)";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && sudo bash -c "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a $USER && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400)
   elif a=="--aplay": use_aplay = True # aplay and madplay, for older embedded devices, NOT tested together with --rpi-bluetooth-* above
   elif a.startswith("--delegate="): delegate_to_check=a.split('=')[1] # will ping that IP and delegate all sound to it when it's up.  E.g. if it has better amplification but it's not always switched on.
   elif a.startswith("--chime="): chime_mp3=a.split('=')[1] # if clock bell desired, e.g. echo '$i-14vfff$c48o0l1b- @'|mwr2ly > chime.ly && lilypond chime.ly && timidity -Ow chime.midi && audacity chime.wav (amplify + trim) + mp3-encode (keep default 44100 sample rate so ~38 frames per sec).  Not designed to work with --delegate.  Pi1's 3.5mm o/p doesn't sound very good with this bell.
@@ -69,9 +70,9 @@
         continue
     elif d=='QUIT':
         s.close() ; break
-    elif d=="Eth=": # Eth=ethernet address, to connect via Bluetooth, tested on Raspberry Pi 400 with Raspbian 11
+    elif d=="Eth=": # Eth=ethernet address to connect via Bluetooth (see --rpi-bluetooth-setup above)
         eth = S(c.recv(17))
-        assert re.match("^[A-Fa-f0-9:]*$",eth)
+        assert re.match("^[A-Fa-f0-9:]+$",eth)
         os.system("M=/dev/null;E="+eth+";if ! pacmd list-sinks | grep "+eth.replace(":","_")+" >$M; then while true; do bluetoothctl --timeout 1 disconnect | grep Missing >$M||sleep 5;T=5;while ! bluetoothctl --timeout $T connect $E | tee $M | egrep \"Connection successful|Device $E Connected: yes\"; do sleep 5; T=10;M=/dev/stderr;bluetoothctl --timeout 1 devices;echo Retrying $E; done ; Got=0; for Try in 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z; do if pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then Got=1; break; fi; sleep 1; done; if [ $Got = 1 ] ; then break; fi; done; fi; pacmd set-default-sink bluez_sink."+eth.replace(":","_")+".a2dp_sink") # ; play /usr/share/scratch/Media/Sounds/Animal/Dog1.wav # (not really necessary if using 'close the socket' to signal we're ready)
         c.close() ; continue
     elif d=="Eth0":

diff --git a/samples/utils/recover-unavail.py b/samples/utils/recover-unavail.py
diff --git a/samples/utils/synth-batchconvert-helper.py b/samples/utils/synth-batchconvert-helper.py
diff --git a/samples/utils/trace.py b/samples/utils/trace.py
diff --git a/samples/utils/transliterate.py b/samples/utils/transliterate.py
diff --git a/server/cantonese.py b/server/cantonese.py
@@ -5,7 +5,7 @@
 # cantonese.py - Python functions for processing Cantonese transliterations
 # (uses eSpeak and Gradint for help with some of them)
 
-# v1.42 (c) 2013-15,2017-23 Silas S. Brown.  License: GPL
+# v1.48 (c) 2013-15,2017-24 Silas S. Brown.  License: GPL
 
 cache = {} # to avoid repeated eSpeak runs,
 # zi -> jyutping or (pinyin,) -> translit
@@ -64,7 +64,7 @@ def hanzi_only(unitext): return u"".join(filter(lambda x:0x4e00<=ord(x)<0xa700 o
 def py2nums(pinyin):
   if not type(pinyin)==type(u""):
     pinyin = pinyin.decode('utf-8')
-  assert pinyin.strip(), "blank pinyin" # saves figuring out a findall TypeError
+  if not pinyin.strip(): return ""
   global pinyin_dryrun
   if pinyin_dryrun:
     pinyin_dryrun = list(pinyin_dryrun)
@@ -91,7 +91,7 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
   i = 0 ; tones = re.finditer('[1-7]',jyutping) ; j2 = []
   for h,p in zip(list(hanzi),pinyin):
     try: j = getNext(tones).end()
-    except StopIteration: return jyutping # one of the zin has no Cantonese reading, which we'll pick up later on "failed to fix"
+    except StopIteration: return jyutping # one of the hanzi has no Cantonese reading in our data: we'll warn "failed to fix" below
     j2.append(jyutping[i:j]) ; i = j
     if h in py2j and p.lower() in py2j[h]: j2[-1]=j2[-1][:re.search("[A-Za-z]*[1-7]$",j2[-1]).start()]+py2j[h][p.lower()]
   return "".join(j2)+jyutping[i:]
@@ -100,8 +100,9 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
 u"\u4E3A\u70BA":{"wei2":"wai4","wei4":"wai6"},
 u"\u4E50\u6A02":{"le4":"lok6","yue4":"ngok6"},
 u"\u4EB2\u89AA":{"qin1":"can1","qing4":"can3"},
+u"\u4EC0":{"shen2":"sam6","shi2":"sap6"}, # unless zaap6
 u"\u4F20\u50B3":{"chuan2":"cyun4","zhuan4":"zyun6"},
-u"\u4FBF":{"bian4":"pin4","pian2":"bin6"},
+u"\u4FBF":{"bian4":"bin6","pian2":"pin4"},
 u"\u5047":{"jia3":"gaa2","jia4":"gaa3"},
 u"\u5174\u8208":{"xing1":"hing1","xing4":"hing3"},
 # u"\u5207":{"qie4":"cai3","qie1":"cit3"}, # WRONG (rm'd v1.17).  It's cit3 in re4qie4.  It just wasn't in yiqie4 (which zhy_list has as an exception anyway)
@@ -153,10 +154,10 @@ def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
 def jyutping_to_lau(j):
   j = S(j).lower().replace("j","y").replace("z","j")
   for k,v in jlRep: j=j.replace(k,v)
-  return j.lower().replace("aa","a").replace("ohek","euk")
+  return j.lower().replace("ohek","euk")
 def jyutping_to_lau_java(jyutpingNo=2,lauNo=1):
   # for annogen.py 3.29+ --annotation-postprocess to ship Jyutping and generate Lau at runtime
-  return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("<rt>(.*?)</rt>").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1&shy;$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("aa","a").replace("ohek","euk").replaceAll("([1-7])","<sup>$1</sup>-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,"<rt>"+r2+"</rt>");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this
+  return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("<rt>(.*?)</rt>").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1&shy;$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("ohek","euk").replaceAll("([1-7])","<sup>$1</sup>-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,"<rt>"+r2+"</rt>");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this
 def incomplete_lau_to_jyutping(l):
   # incomplete: assumes Lau didn't do the "aa" -> "a" rule
   l = S(l).lower().replace("euk","ohek")
@@ -236,7 +237,10 @@ def mysub(z,l):
       z = re.sub(re.escape(x)+r"(.)",r"\1"+y,z)
     return z
   if type(u"")==type(""): U=str # Python 3
-  else: U=unicode # Python 2
+  else: # Python 2
+    def U(x):
+      try: return x.decode('utf-8') # might be an emoji pass-through
+      except: return x # already Unicode
   return unicodedata.normalize('NFC',mysub(U(jyutping_to_yale_TeX(j).replace(r"\i{}","i").replace(r"\I{}","I")),[(r"\`",u"\u0300"),(r"\'",u"\u0301"),(r"\=",u"\u0304")])).encode('utf-8')
 
 def superscript_digits_TeX(j):
@@ -291,6 +295,9 @@ def songSubst(l):
         pinyin = pinyin.decode('utf-8')
       if pinyin and not (pinyin,) in cache:
         pinyin_dryrun.add(pinyin)
+        for w in pinyin.split():
+          for h in w.split('-'):
+            pinyin_dryrun.add(h)
     dryrun_mode = False
     for l in lines:
       if '#' in l: l,pinyin = l.split('#')
@@ -300,7 +307,7 @@ def songSubst(l):
       elif pinyin:
         jyutping = adjust_jyutping_for_pinyin(l,jyutping,pinyin)
         groupLens = [0]
-        for syl,space in re.findall('([A-Za-z]*[1-5])( *)',py2nums(pinyin)):
+        for syl,space in re.findall('([A-Za-z]*[1-5])( *)',' '.join('-'.join(py2nums(h) for h in w.split('-')) for w in pinyin.split())): # doing it this way so we're not relying on espeak transliterate_multiple to preserve spacing and hyphenation
           groupLens[-1] += 1
           if space: groupLens.append(0)
         if not groupLens[-1]: groupLens=groupLens[:-1]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		而家我哋要等一陣，然後翻溫。喺第一課我哋仲未學習好多嘅詞語，所以停頓會比較長，但係喺未來嘅課程，我哋唔會有咁長嘅停頓