Skip to content

Commit

Permalink
add all of Strand's data, and IPA (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
aryamanarora committed Mar 23, 2021
1 parent 9133ea6 commit 55916f6
Show file tree
Hide file tree
Showing 8 changed files with 11,520 additions and 10,934 deletions.
7,444 changes: 3,800 additions & 3,644 deletions cldf/forms.csv

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion cldf/languages.csv
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,5 @@ OHG,Old High German,oldh1241,10,52
Toch,Tocharian,tokh1241,89.18,42.98
deg,Pashai: Gorayk (Degano),sout2672,70.9008,34.6458
Kam,Kamviri,kamv1242,71.34,35.41
Kata,Katavari,kati1279,70.773611,35.301667
Kata,Katavari,kati1279,70.773611,35.301667
bhatr,Bhateri,bate1261,72.93,34.96
7,444 changes: 3,800 additions & 3,644 deletions data/extensions.csv

Large diffs are not rendered by default.

7,288 changes: 3,644 additions & 3,644 deletions data/strand.csv

Large diffs are not rendered by default.

36 changes: 35 additions & 1 deletion data/strand.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from urllib.error import HTTPError
import csv
import re
from segments.tokenizer import Tokenizer, Profile

t = Tokenizer('strand_profile.txt')

chars = ['p', 'b', 'bAsp', 'f', 'v', 'w', 'm', 'uFrn', 'u', 'o', 'oFrn', 'uTns', 'oTns',
'cDen', 'zDen', 't', 'd', 'dAsp', 's', 'z', 'l', 'lVls', 'n', 'cRet', 'jRet',
Expand All @@ -22,6 +25,35 @@

codes = ['deg', 'Kam', 'Kata', 'Ash', 'Wg', 'Kho', 'Phal']

with open('strand2.csv', 'w') as fout:
writer = csv.writer(fout)
link = f'http://nuristan.info/IndoAryan/SwatIndus/Bhatera/BhateraLanguage/Lexicon/lex.html'
try:
with urlopen(Request(link, headers={'User-Agent': 'Mozilla/5.0'})) as resp:
soup = BeautifulSoup(resp, 'html.parser')
for data in soup.find_all(class_='dic'):
word = data.find(class_='l')
if word:
print(word)
word = word.find(text=True, recursive=False)
word2 = re.sub(r'ʹ(.)', r'\1ʹ', word)
data = str(data).replace('\n', ' ')
l = re.search(r'<b>]</b>\xa0 (.*?)\.\xa0 (.*?)\.', data)
if not l:
l = re.search(r'</span>[\xa0 ]+(.*?)\.\xa0\xa0([^\.]+)\.', data)
print(l)
if l:
pos = l.group(1).lower()
definition = l.group(2).lower()
turner = re.search(r'T\..(\d+)', data)
if turner:
turner = turner.group(1)
ipa = t(word2, column='IPA').replace(' ', '').replace('#', ' ')
writer.writerow(['bhatr', turner, word, definition, '', ipa, '', 'strand'])

except HTTPError as e:
pass

with open('strand.csv', 'w') as fout:
writer = csv.writer(fout)
for i, language in enumerate(languages):
Expand All @@ -35,6 +67,7 @@
word = data.find(class_='l')
if word:
word = word.find(text=True, recursive=False)
word2 = re.sub(r'ʹ(.)', r'\1ʹ', word)
l = re.search(r'<b>]</b>\xa0 (.*?)\.\xa0 (.*?)\.', str(data))
if not l:
l = re.search(r'</span>\xa0 (.*?)\.\xa0 (.*?)\.', str(data))
Expand All @@ -44,7 +77,8 @@
turner = re.search(r'T\. (\d+)', str(data))
if turner:
turner = turner.group(1)
writer.writerow([codes[i], turner, word, definition, '', '', '', 'strand'])
ipa = t(word2, column='IPA').replace(' ', '').replace('#', ' ')
writer.writerow([codes[i], turner, word, definition, '', ipa, '', 'strand'])

except HTTPError as e:
pass
156 changes: 156 additions & 0 deletions data/strand2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
bhatr,9691,mʹâ,i,,mɑ́,,strand
bhatr,9691,mʹio˜,mine,,míõ,,strand
bhatr,11302,bʹe,we,,bé,,strand
bhatr,986,âsʹo˜,us,,ɑsó̃,,strand
bhatr,5889,tʹu,thou,,t̪ú,,strand
bhatr,5889,tʹi,thee,,t̪í,,strand
bhatr,10511,tʹus,you (pl,,t̪ús,,strand
bhatr,12815,sa,this; that,,sə,,strand
bhatr,6495,dʹûr,far,,d̪ú̬ɾ,,strand
bhatr,2462,yʹâk,one,,j̣ɑ́k,,strand
bhatr,6648,dʹû,two,,d̪ú̬,,strand
bhatr,5994,c̣ʹâ,three,,ʈ͡ʂɑ́,,strand
bhatr,4655,ćʹôr,four,,t͡só̬ɾ,,strand
bhatr,7655,pʹâ˜z,five,,pɑ́̃z,,strand
bhatr,12803,ṣʹo,six,,ʂó,,strand
bhatr,13139,sʹât,seven,,sɑ́t̪,,strand
bhatr,941,ʹâṭ,eight,,ɑ́ʈ,,strand
bhatr,6984,nʹu˜,nine,,nú̃,,strand
bhatr,6227,dʹaš,ten,,d̪ə́ʃ,,strand
bhatr,2485,yʹeš,eleven,,j̣éʃ,,strand
bhatr,6658,duʹäš,twelve,,d̪uä́ʃ,,strand
bhatr,6001,čʹîš,thirteen,,t͡ʃí̬ʃ,,strand
bhatr,4605,ćʹodaš,fourteen,,t͡sód̪əʃ,,strand
bhatr,7662,pa˜ǰʹiš,fifteen,,pə̃d͡ʒíʃ,,strand
bhatr,12812,ṣuʹeš,sixteen,,ʂuéʃ,,strand
bhatr,13146,satʹäš,seventeen,,sət̪ä́ʃ,,strand
bhatr,946,aṭʹäš,eighteen,,əʈä́ʃ,,strand
bhatr,2411,ʹu˜biš,nineteen,,ú̃biʃ,,strand
bhatr,11616,bʹîš,twenty,,bí̬ʃ,,strand
bhatr,6004,c̣ʹobiš,sixty,,ʈ͡ʂóbiʃ,,strand
bhatr,4623,ćʹorbiš,eighty,,t͡sóɾbiʃ,,strand
bhatr,12278,šʹal,hundred,,ʃə́l,,strand
bhatr,9568,bʹuṭo˜,all,,búʈõ,,strand
bhatr,4424,ghʹu˜,big,,gʱú̃,,strand
bhatr,6368,ʹig,tall,,íg,,strand
bhatr,1670,uzʹal,white,,uzə́l,,strand
bhatr,3083,kʹâl,black,,kɑ́l,,strand
bhatr,10539,râtʹu,red,,ɾɑt̪ú,,strand
bhatr,8233,pʹil,yellow,,píl,,strand
bhatr,7563,nʹil,blue,,níl,,strand
bhatr,10930,lʹâc̣h,bad,,lɑ́ʈ͡ʂh,,strand
bhatr,13768,thʹu,is,,t̪hú,,strand
bhatr,6906,nʹâ,not,,nɑ́,,strand
bhatr,9402,bhaṭʹera,bhaṭera,,bʱəʈéɾə,,strand
bhatr,11396,bʹâṣ,rain,,bɑ́ʂ,,strand
bhatr,14096,hiʹu˜,snow,,hiú̃,,strand
bhatr,13493,sʹûm,earth; ground,,sú̬m,,strand
bhatr,11348,bʹâṭh,rock,,bɑ́ʈh,,strand
bhatr,3018,khʹûr,boulder,,khú̬ɾ,,strand
bhatr,13386,sigʹal,sand,,sigə́l,,strand
bhatr,13627,khʹân,mountain,,khɑ́n,,strand
bhatr,3790,khʹâṛ,valley; stream,,khɑ́ɽ,,strand
bhatr,8082,puʹe˜i,water,,pué̃i,,strand
bhatr,13415,sʹîn,river,,sí̬n,,strand
bhatr,1869,ʹûć,spring,,ú̬t͡s,,strand
bhatr,125,ʹâ˜r,fire,,ɑ́̃ɾ,,strand
bhatr,6849,dhʹû˜,smoke,,d̪ʱú̬̃,,strand
bhatr,5177,zâ˜gʹâl,forest,,zɑ̃gɑ́l,,strand
bhatr,12067,bʹîc̣h,tree,,bí̬ʈ͡ʂh,,strand
bhatr,11120,lʹô,cedar,,ló̬,,strand
bhatr,11209,bʹân,holly oak,,bɑ́n,,strand
bhatr,48,âc̣hʹô,walnut,,ɑʈ͡ʂhó̬,,strand
bhatr,3331,kʹul,half walnut,,kúl,,strand
bhatr,22,âc̣hʹâ,apricot,,ɑʈ͡ʂhɑ́,,strand
bhatr,1103,ʹâru,peach,,ɑ́ɾu,,strand
bhatr,2747,kʹâu,olive,,kɑ́u,,strand
bhatr,4287,gʹû˜,wheat,,gú̬̃,,strand
bhatr,10431,ǰʹô,barley,,d͡ʒó̬,,strand
bhatr,12415,šʹâl,rice (in field),,ʃɑ́l,,strand
bhatr,9827,mʹoṣ,man,,móʂ,,strand
bhatr,4889,ćʹun,children,,t͡sún,,strand
bhatr,4147,gʹâ,cow,,gɑ́,,strand
bhatr,4255,gʹu,bull,,gú,,strand
bhatr,11239,bâćʹu,calf,,bɑt͡sú,,strand
bhatr,4963,ćhʹel,goat (f,,t͡shél,,strand
bhatr,10264,mʹugur,billy goat,,múguɾ,,strand
bhatr,4973,ćhâtʹu,kid,,t͡shɑt̪ú,,strand
bhatr,4973,ćhâtʹai,kid (f,,t͡shɑt̪ə́i,,strand
bhatr,9606,bhʹeḍe˜,ewe,,bʱéɖẽ,,strand
bhatr,9606,bhʹiḍ,ram,,bʱíɖ,,strand
bhatr,10310,mâmtʹai,lamb (f,,mɑmt̪ə́i,,strand
bhatr,10310,mâmtʹu,lamb,,mɑmt̪ú,,strand
bhatr,4516,ghʹô,horse,,gʱó̬,,strand
bhatr,4516,ghuʹai,mare,,gʱuə́i,,strand
bhatr,13331,sʹâṇ,buffalo (male),,sɑ́ɳ,,strand
bhatr,9964,mhʹeṣ,buffalo (f,,mhéʂ,,strand
bhatr,3219,kućʹur,dog,,kut͡súɾ,,strand
bhatr,3219,kućʹir,bitch,,kut͡síɾ,,strand
bhatr,5177,zâ˜glʹei,bird,,zɑ̃gléi,,strand
bhatr,9758,mʹâć,fish,,mɑ́t͡s,,strand
bhatr,12497,ṣʹiṣ,head,,ʂíʂ,,strand
bhatr,11572,bʹâl,hair,,bɑ́l,,strand
bhatr,5803,tʹâl,forehead,,t̪ɑ́l,,strand
bhatr,43,ʹâ˜c̣h,eye,,ɑ́̃ʈ͡ʂh,,strand
bhatr,43,ʹe˜c̣h,eyes,,é̃ʈ͡ʂh,,strand
bhatr,7031,natʹûr,nose,,nət̪ú̬ɾ,,strand
bhatr,1533,ʹâ˜,mouth,,ɑ́̃,,strand
bhatr,5853,dʹut,lip,,d̪út̪,,strand
bhatr,5853,dʹot,lips,,d̪ót̪,,strand
bhatr,6152,dʹân,tooth,,d̪ɑ́n,,strand
bhatr,6152,dʹan,teeth,,d̪ə́n,,strand
bhatr,5228,zʹîb,tongue; language,,zí̬b,,strand
bhatr,9083,pho˜g,mustache,,phõg,,strand
bhatr,6250,dʹâi,beard,,d̪ɑ́i,,strand
bhatr,13640,kʹum,shoulder,,kúm,,strand
bhatr,13640,kʹom,shoulders,,kóm,,strand
bhatr,9229,bakuʹi˜,arm,,bəkuí̃,,strand
bhatr,14024,hʹât,hand,,hɑ́t̪,,strand
bhatr,14024,hʹat,hands,,hə́t̪,,strand
bhatr,135,â˜guʹi,finger,,ɑ̃guí,,strand
bhatr,3243,kʹuṭ,knee,,kúʈ,,strand
bhatr,3906,khʹur,foot,,khúɾ,,strand
bhatr,3906,khʹor,feet,,khóɾ,,strand
bhatr,137,â˜gṭʹo,toe,,ɑ̃gʈó,,strand
bhatr,14152,hiʹû,heart,,hiú̬,,strand
bhatr,5589,dhʹîr,stomach,,d̪ʱí̬ɾ,,strand
bhatr,3696,c̣hʹîr,milk,,ʈ͡ʂhí̬ɾ,,strand
bhatr,8139,pʹâṣ,cow dung,,pɑ́ʂ,,strand
bhatr,3696,c̣hʹîr,milk,,ʈ͡ʂhí̬ɾ,,strand
bhatr,6148,dʹed,yoghurt,,d̪éd̪,,strand
bhatr,4501,ghiʹû,ghee,,gʱiú̬,,strand
bhatr,4368,gʹâ˜,village,,gɑ́̃,,strand
bhatr,4336,gʹûṭ,house,,gú̬ʈ,,strand
bhatr,12323,šʹû˜,bed,,ʃú̬̃,,strand
bhatr,5905,tulʹei,bedding,,t̪uléi,,strand
bhatr,5481,ṭôpʹai,hat,,ʈo̬pə́i,,strand
bhatr,4483,gʹu˜ḍ,button,,gú̃ɖ,,strand
bhatr,138,â˜gṭhʹui,ring,,ɑ̃gʈhúi,,strand
bhatr,9402,bhaṭʹera wâla,man from bhaṭera,,bʱəʈéɾə wɑlə,,strand
bhatr,1135,âpʹâ˜,kinsman,,ɑpɑ́̃,,strand
bhatr,9935,mhʹal,father,,mhə́l,,strand
bhatr,9935,mhʹel,mother,,mhél,,strand
bhatr,8188,pic̣ʹu,father's brother,,piʈ͡ʂú,,strand
bhatr,2988,kâkʹai,father's older brother,,kɑkə́i,,strand
bhatr,9089,pʹâi,father's sister,,pɑ́i,,strand
bhatr,10055,mâmʹai,mother's brother,,mɑmə́i,,strand
bhatr,10001,mâsʹei,mother's sister,,mɑséi,,strand
bhatr,9661,hʹo˜,brother,,hó̃,,strand
bhatr,9349,bhiʹo˜,sister,,bʱió̃,,strand
bhatr,8265,pûc̣h,son,,pu̬ʈ͡ʂh,,strand
bhatr,6481,dhʹî,daughter,,d̪ʱí̬,,strand
bhatr,9664,hʹoc̣,brother's son,,hóʈ͡ʂ,,strand
bhatr,13918,sazuʹi,sister's daughter,,səzuí,,strand
bhatr,8416,pʹu,grandson,,pú,,strand
bhatr,8417,pʹui,granddaughter,,púi,,strand
bhatr,9467,bâriʹu,husband,,bɑɾiú,,strand
bhatr,12753,šʹûr,spouse's father,,ʃú̬ɾ,,strand
bhatr,12759,ṣʹâṣ,spouse's mother,,ʂɑ́ʂ,,strand
bhatr,9660,hâzʹe,brother's wife,,hɑzé,,strand
bhatr,10124,mʹic̣,wife's brother,,míʈ͡ʂ,,strand
bhatr,13871,sârʹâ˜,wife's sister,,sɑɾɑ́̃,,strand
bhatr,2718,kʹe˜ṭ,husband's brother (younger?),,ké̃ʈ,,strand
bhatr,5200,zʹâ˜il,husband's sister,,zɑ́̃il,,strand
bhatr,11251,bhuʹâiṭ,son's wife,,bʱuɑ́iʈ,,strand
bhatr,5228,zʹîb,tongue; language,,zí̬b,,strand
bhatr,9402,bhaṭʹe sa zîb,bhaṭera language,,bʱəʈé sə zi̬b,,strand
83 changes: 83 additions & 0 deletions data/strand_profile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
Grapheme IPA
p p
b b
bh bʱ
f f
v v
w w
m m
ü ü
ü ü
-
u u
ö ö
o o
û u̬
ô o̬
ć t͡s
ź d͡z
źh d͡zʱ
t t̪
d d̪
dh d̪ʱ
s s
z z
l l
ɬ ɬ
n n
c̣ ʈ͡ʂ
j̣ ɖ͡ʐ
J̣ ɖ͡ʐ
j̣h ɖ͡ʐʱ
ṭ ʈ
ḍ ɖ
ḍh ɖʱ
ṣ ʂ
ẓ ʐ
r ɾ
ṛ ɽ
ḷ ɭ
ř ɻ
ň ɻ̃
ṇ ɳ
r̥ ɻ̩
č t͡ʃ
ǰ d͡ʒ
ǰh d͡ʒʱ
š ʃ
ž ʒ
y j̣
i i
e e
ä ä
î i̬
ê e̬
k̂ kʲ
ĝ gʲ
ĝh gʲʱ
k k
g g
gh gʱ
x x
ǧ ɣ
ʱ ʱ
ʷ ʷ
ŋ ŋ
ï ï
a ə
â ɑ
å a
q q
ḥ ħ
ʕ ʕ
ʔ ʔ
h h
ɦ ɦ
ɦ̊ ɦ̊
ʹ ́
˜ ̃
_
=
·
+
,
Binary file modified jambu/db.sqlite
Binary file not shown.

0 comments on commit 55916f6

Please sign in to comment.