Skip to content

Commit

Permalink
more IPA (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
aryamanarora committed Apr 20, 2021
1 parent 06bd0ad commit 8f7e625
Show file tree
Hide file tree
Showing 14 changed files with 36,525 additions and 40,120 deletions.
18 changes: 10 additions & 8 deletions cldf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
write2.writerow([entry, headword, '', data[entry][0]['ref']])
for row in read:
write.writerow(row)
write2.writerow([row[0], row[2], row[3], row[4]])
write2.writerow([row[0], row[2], '', row[3]])

a = set()
with open('cldf/forms.csv', 'w') as fout, open('errors.txt', 'w') as errors:
Expand Down Expand Up @@ -64,22 +64,24 @@
write.writerow([num, lang, entry, word, desc, '', '', entry, '', 'CDIAL'])
else:
if word[0] == '': continue
word[0] = word[0].strip('.,')
word[0] = word[0].strip('.,;- ')
word[0] = word[0].replace('<? >', '')
word[0] = word[0].lower()

for i in superscript:
word[0] = word[0].replace('ˊ', '́').replace(' --', '-').replace('-- ', '-')
word[0] = word[0].replace(f'<superscript>{i}</superscript>', superscript[i])

oldest = unicodedata.normalize('NFD', word[0])
oldest = oldest.replace('̄˘', '̄̆')
oldest = oldest.replace('̆̄', '̄̆')
oldest = oldest.replace('̄̆', '̄̆')
if '̄̆' in oldest:
form['words'].append([oldest.replace('̄̆', '̄'), word[1]])
word[0] = word[0].replace('̄̆', '')
oldest = oldest.replace('̄̆', '')
word[0] = oldest
word[0] = unicodedata.normalize('NFC', word[0])

for i in superscript:
word[0] = word[0].replace('ˊ', '́').replace(' --', '-').replace('-- ', '-')
word[0] = word[0].replace(f'<superscript>{i}</superscript>', superscript[i])

if '˚' not in word[0]: reference = word[0]
else:
old = word[0]
Expand All @@ -95,7 +97,7 @@
if lang in tokenizers and '˚' not in word[0]:
ipa = tokenizers[lang](word[0], column='IPA').replace(' ', '').replace('#', ' ')
if '�' in ipa:
if lang == 'S': errors.write(f'{lang} {oldest} {word[0]} {ipa}\n')
if lang == 'M': errors.write(f'{lang} {oldest} {word[0]} {ipa}\n')
ipa = ''

write.writerow([num, lang, entry, word[0], word[1], '', ipa, entry, '', 'CDIAL'])
Expand Down
2 changes: 1 addition & 1 deletion cldf/cognates.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15433,7 +15433,7 @@ e14,Indo-Aryan,*locis-kāṣṭhikā,,patyal5
e15,Indo-Aryan,sapakṣa,'easy',patyal5
e16,Indo-Aryan,*ubbāsī,'yawn',arora
e17,Indo-Aryan,*kakkara,'cloud',arora
e18,Indo-Aryan,*celha,'waist',arora
e18,Indo-Aryan,*celhi,'waist',arora
e19,Indo-Aryan,*cora,'asparagus beans',arora
e20,Indo-Aryan,*cora,'hut' (perhaps related to cakrá?),arora
e21,Indo-Aryan,*jhañjhaṭ,'problem',arora
Expand Down
71,412 changes: 35,706 additions & 35,706 deletions cldf/forms.csv

Large diffs are not rendered by default.

60 changes: 30 additions & 30 deletions cldf/parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15416,33 +15416,33 @@ ID,Name,Concepticon_ID,Description
14843,hárati:,,"<number>14843</number> <b>hárati:</b> Paš. <i>ar</i> -- ʻ to accept, agree, hear ʼ rather (like <i>ār</i> -- ʻ to bring ʼ) &lt; <smallcaps>āˊharati</smallcaps>. <p></p>"
14844,hásta- :,,"<number>14844</number> <b>hásta -- :</b> Wg. (Gambir) <i>dōš</i> ʻ hand and forearm ʼ, <i>doš- &amp;atildedotdot ʻ palm of hand ʼ.</i> <p></p>"
14845,himna- :,,"<number>14845</number> *<b>himna -- :</b> Sh.gil. <i>hĭn</i> m., gur. koh. <i>hinn</i> f., jij. <i>hĭ̄n</i> ʻ snow ʼ, Sv. <i>hina</i> rather than &lt; <smallcaps>sníh</smallcaps> -- ?<hr/><div id=""responsive_footer_table""><div id=""responsive_footer_tr""><div id=""responsive_footer_td""><a href=""/dictionaries/soas/"">Back to the Search Page </a>  |   <a href=""/dictionaries/"">Back to the DDSA Page</a>"
e1,su-vāṭa,'good enclosure',patyal3
e2,anna-dhāra,'stream of food',patyal2
e3,ku-varṇa,'of bad colour',patyal2
e4,gāvyáya,'belonging to or coming from cow or cattle' RV 9.70.7; 10.48.4,patyal2
e5,dantā-valī,'row of teeth; teeth collectively',patyal2
e6,saptā-nnādya,'seven grains',patyal2
e7,*kalyā-hāra,'breakfast',patyal5
e8,*citra-karbura,'spotted',patyal5
e9,*catur-akṣa,'four-eyed',patyal5
e10,jánitr̥,,patyal5
e11,*dēva-sthala,,patyal5
e12,*vādya-tantra,,patyal5
e13,*mr̥taka-sthāna,,patyal5
e14,*locis-kāṣṭhikā,,patyal5
e15,sapakṣa,'easy',patyal5
e16,*ubbāsī,'yawn',arora
e17,*kakkara,'cloud',arora
e18,*celha,'waist',arora
e19,*cora,'asparagus beans',arora
e20,*cora,'hut' (perhaps related to cakrá?),arora
e21,*jhañjhaṭ,'problem',arora
e22,*padopānah,"'shoe' [padá1, upānáh]",fritz
e23,*yavanakadala,"'banana' [yavaná, kadala]",fritz
e24,*yavanavālukā,"'sand' [yavaná, vālukā]",fritz
e25,*varṣadhanus,"'rainbow' [varṣá, dhánus]",fritz
e26,*ayasgaṇḍa,"'iron' [áyas, gaṇḍa]",fritz
e27,*pādadīrghamarkaṭa,"'long-legged (?) spider' [pā́da, dīrghá, markaṭa]",fritz
e28,*bhakkhamukha,"'lump-faced' [bhakkha, múkha]",fritz
e29,*rūpāsti,"'has form?' [rūpá, ásti]",fritz
e30,*anyaika,"'another' [anyá, ḗka]",fritz
e1,su-vāṭa,,'good enclosure'
e2,anna-dhāra,,'stream of food'
e3,ku-varṇa,,'of bad colour'
e4,gāvyáya,,'belonging to or coming from cow or cattle' RV 9.70.7; 10.48.4
e5,dantā-valī,,'row of teeth; teeth collectively'
e6,saptā-nnādya,,'seven grains'
e7,*kalyā-hāra,,'breakfast'
e8,*citra-karbura,,'spotted'
e9,*catur-akṣa,,'four-eyed'
e10,jánitr̥,,
e11,*dēva-sthala,,
e12,*vādya-tantra,,
e13,*mr̥taka-sthāna,,
e14,*locis-kāṣṭhikā,,
e15,sapakṣa,,'easy'
e16,*ubbāsī,,'yawn'
e17,*kakkara,,'cloud'
e18,*celhi,,'waist'
e19,*cora,,'asparagus beans'
e20,*cora,,'hut' (perhaps related to cakrá?)
e21,*jhañjhaṭ,,'problem'
e22,*padopānah,,"'shoe' [padá1, upānáh]"
e23,*yavanakadala,,"'banana' [yavaná, kadala]"
e24,*yavanavālukā,,"'sand' [yavaná, vālukā]"
e25,*varṣadhanus,,"'rainbow' [varṣá, dhánus]"
e26,*ayasgaṇḍa,,"'iron' [áyas, gaṇḍa]"
e27,*pādadīrghamarkaṭa,,"'long-legged (?) spider' [pā́da, dīrghá, markaṭa]"
e28,*bhakkhamukha,,"'lump-faced' [bhakkha, múkha]"
e29,*rūpāsti,,"'has form?' [rūpá, ásti]"
e30,*anyaika,,"'another' [anyá, ḗka]"
Expand Down
2 changes: 1 addition & 1 deletion data/extensions_ia.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ e14,Indo-Aryan,*locis-kāṣṭhikā,,patyal5
e15,Indo-Aryan,sapakṣa,'easy',patyal5
e16,Indo-Aryan,*ubbāsī,'yawn',arora
e17,Indo-Aryan,*kakkara,'cloud',arora
e18,Indo-Aryan,*celha,'waist',arora
e18,Indo-Aryan,*celhi,'waist',arora
e19,Indo-Aryan,*cora,'asparagus beans',arora
e20,Indo-Aryan,*cora,'hut' (perhaps related to cakrá?),arora
e21,Indo-Aryan,*jhañjhaṭ,'problem',arora
Expand Down
68 changes: 68 additions & 0 deletions data/ipa/cdial/G.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
Grapheme IPA
a ə
ã ə̃
ā aː
ã̄ ãː
ᵃ ᵊ
ᶦ ɪ
i ɪ
ï ɪ
ĩ ɪ̃
ī iː
ī˜ ĩː
u ʊ
ũ ʊ̃
ū uː
ū̃ ũː
e eː
ē eː
ẽ ẽː
ē̃ ẽː
o oː
ō oː
õ õː
ō̃ õː
ai ɛː
aĩ ɛ̃ː
au ɔː
ɔ ɔ
ɔ̃ ɔ̃
aũ ɔ̃ː
k k
kh kʰ
g g
gh gʱ
ṅ ŋ
c t͡ʃ
ch t͡ʃʰ
j d͡ʒ
jh d͡ʒʱ
ñ ɲ
t t̪
th t̪ʰ
d d̪
dh d̪ʱ
n n
ṭ ʈ
ṭh ʈʰ
ḍ ɖ
ḍh ɖʱ
ṇ ɳ
p p
ph pʰ
b b
bh bʱ
y j
r ɾ
l l
v ʋ
w ʋ
s s
ś ʃ
ṣ ʂ
h ɦ
ṛ ɽ
x x
m m
ḷ ɭ
72 changes: 72 additions & 0 deletions data/ipa/cdial/M.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
Grapheme IPA
a ə
ã ə̃
ā aː
ã̄ ãː
ᵃ ᵊ
ᶦ ɪ
i ɪ
ï ɪ
ĩ ɪ̃
ī iː
ī˜ ĩː
u ʊ
ũ ʊ̃
ū uː
ū̃ ũː
e eː
ē eː
ẽ ẽː
ē̃ ẽː
o oː
ō oː
õ õː
ō̃ õː
ai ɛː
aĩ ɛ̃ː
au ɔː
ɔ ɔ
ɔ̃ ɔ̃
aũ ɔ̃ː
k k
kh kʰ
g g
gh gʱ
ṅ ŋ
c t͡ʃ
ch t͡ʃʰ
j d͡ʒ
jh d͡ʒʱ
ñ ɲ
t t̪
th t̪ʰ
d d̪
dh d̪ʱ
n n
ṭ ʈ
ṭh ʈʰ
ḍ ɖ
ḍh ɖʱ
ṇ ɳ
p p
ph pʰ
b b
bh bʱ
y j
r ɾ
l l
v ʋ
w ʋ
s s
ś ʃ
ṣ ʂ
h ɦ
ṛ ɽ
x x
m m
ḷ ɭ
ċ t͡s
ċh t͡sʰ
j̈ d͡z
j̈h d͡zʱ
66 changes: 66 additions & 0 deletions data/ipa/cdial/P.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
Grapheme IPA
a ə
ã ə̃
ā aː
ã̄ ãː
ᵃ ᵊ
ᶦ ɪ
i ɪ
ï ɪ
ĩ ɪ̃
ī iː
ī˜ ĩː
u ʊ
ũ ʊ̃
ū uː
ū̃ ũː
e eː
ē eː
ẽ ẽː
ē̃ ẽː
o oː
ō oː
õ õː
ō̃ õː
ai ɛː
aĩ ɛ̃ː
au ɔː
aũ ɔ̃ː
k k
kh kʰ
g g
gh gʱ
ṅ ŋ
c t͡ʃ
ch t͡ʃʰ
j d͡ʒ
jh d͡ʒʱ
ñ ɲ
t t̪
th t̪ʰ
d d̪
dh d̪ʱ
n n
ṭ ʈ
ṭh ʈʰ
ḍ ɖ
ḍh ɖʱ
ṇ ɳ
p p
ph pʰ
b b
bh bʱ
y j
r ɾ
l l
v ʋ
w ʋ
s s
ś ʃ
ṣ ʂ
h ɦ
ṛ ɽ
x x
m m
ḷ ɭ
52 changes: 52 additions & 0 deletions data/ipa/cdial/Pk.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Grapheme IPA
ṁ ̃
a ə
ā aː
i ɪ
ï ɪ
ī iː
u ʊ
ü ʊ
ũ ʊ̃
ū uː
e e
ē eː
o o
ō oː
k k
kh kʰ
g g
gh gʱ
ṅ ŋ
c t͡ʃ
ch t͡ʃʰ
j d͡ʒ
jh d͡ʒʱ
ñ ɲ
t t̪
th t̪ʰ
d d̪
dh d̪ʱ
n n
ṭ ʈ
ṭh ʈʰ
ḍ ɖ
ḍh ɖʱ
ṇ ɳ
p p
ph pʰ
b b
bh bʱ
y j
r ɾ
l l
v ʋ
w ʋ
s s
ś ʃ
ṣ ʂ
h ɦ
ṛ ɽ
m m
Loading

0 comments on commit 8f7e625

Please sign in to comment.