-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
225 lines (210 loc) · 9.35 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
<!DOCTYPE html lang="en-GB">
<html>
<head>
<meta charset="utf-8">
<title>Classical Chinese Sentence Finder</title>
<style>
/*make two-column tables look good*/
table {
border-collapse: collapse;
}
td, th {
border: 1px solid black;
padding: 5px;
width:500px;
}
.chinese {
width: 250px;
}
.unknown {
color:red;
}
.english {
width: 400px;
}
.ctext {
width:10px;
text-align: center;
}
th {
text-align: left;
}
/* expert span has a rounded border */
.expert {
border: 1px solid black;
border-radius: 5px;
padding: 5px;
margin: 5px;
width:500px;
}
/* hrs inside span go from left to right without border */
.expert hr {
margin-left: 0;
margin-right: 0;
padding-left: 0;
padding-right: 0;
border: 0;
border-top: 1px solid black;
}
</style>
</head>
<body>
This uses a cached set of passages from <a href="https://ctext.org/">ctext.org</a> for parallel Classical Chinese-English passages consisting of only the characters you mentioned.
<p>
Paste all the chinese characters you know into the box below:<br>
<textarea id="text" rows="10" cols="50">一上不世中之乍乎也予事云井亦人仁今以任休低何俄保信儀充光入全公共其冶出分利則前力勝勞勿化去參受可名君吾周哉唯問善喻四固在地士外多夢大天夫好如妻始姓威子孔字孟學孺安容富實將對山己已床弘弟强後得從循德心必忍志忠思性怵恆恕惕惠惡惻愛我所政故文於施既日明昔是景曰書曾月有望木本栩梁楹樂樊欲止此死毅母水汝海濠為無然父物理生用由疑皆盡相矣知章端粟終絏縲罕罪習者而聞胡能臣臥自至與舉苟若莊萬蘧處蝶行見覺言誨請諸謂識貢貧貫貴賜賤足身軻輿辭近遊道遠適遲鄉醉重長門隱雖霜非頭頹食體魚鯈齊</textarea><br>
<!-- expert mode checkbox-->
<input type="checkbox" id="expert_mode">expert options<br>
<div class="expert" style="display:none">
<!-- radio button group -->
<input type="radio" name="grouping" value="vocab" checked>Find Sentences consisting of only these chinese characters (for vocabulary-based sentence lists).<br>
<input type="radio" name="grouping" value="colocation">Find Sentences containing all of these chinese characters (for colocations).<br>
<hr>
<input type="number" id="unknown_char_allowance" value="0" min="0" max="5">Unknown characters allowed (does nothing in colocation mode).
</div>
<p>
<!-- number input field for minimum sentence length -->
<input type="number" id="min_length" value="10" min="2" max="40">Minimum sentence length (in characters)
<p>
<button id="button">Loading datasets...</button>
<p></p>
<span id="output"></span>
<script>
var button = document.getElementById("button");
//disable button until json is loaded
button.disabled = true;
var database={};
//load database.json
var request = new XMLHttpRequest();
request.open('GET', 'database.json', true);
request.onload = function() {
if (request.status >= 200 && request.status < 400) {
// Success!
database = JSON.parse(request.responseText);
//enable button
button.disabled = false;
button.innerText = "Find passages using your characters!";
}
};
request.send();
//set expert mode checkbox event
document.getElementById("expert_mode").addEventListener("click", function() {
var expert = document.getElementsByClassName("expert");
if (document.getElementById("expert_mode").checked){
for (var i = 0; i < expert.length; i++) {
expert[i].style.display = "block";
}
} else {
for (var i = 0; i < expert.length; i++) {
expert[i].style.display = "none";
}
}
});
//set button event
document.getElementById("button").addEventListener("click", function() {
//get grouping value
var colocation_mode = document.querySelector('input[name="grouping"]:checked').value === "colocation";
var min_length = document.getElementById("min_length").value;
var unknown_char_allowance = parseInt(document.getElementById("unknown_char_allowance").value);
var expert_mode = document.getElementById("expert_mode").checked;
if (expert_mode===false){
unknown_char_allowance = 0;
colocation_mode=false;
}
var known_characters = document.getElementById("text").value;
if (colocation_mode){
//strip all whitespace from known_characters
known_characters = known_characters.replace(/\p{Z}/gu, '');
//strip all punctuation
known_characters = known_characters.replace(/\p{P}/gu, '');
//strip newlines
known_characters = known_characters.replace(/\p{C}/gu, '');
} else {
//add chinese punctuation to known characters
known_characters += "、.,…。?!;:…「」『』()〔〕【】《》〈〉﹏﹑﹔﹕﹖﹗﹙﹚﹛﹜﹝﹞ \t\n";
}
//remove duplicate characters and convert to array
known_characters = known_characters.split("").filter(function(item, pos, self) {
return self.indexOf(item) == pos;
});
function markup(str){
//enclose runs of unknown characters in <span class="unknown">...</span>
var output = "";
var unknown_ar = "";
for (var i = 0; i < str.length; i++) {
var c = str[i];
if (known_characters.indexOf(c) == -1) {
unknown_ar += c;
} else {
if (unknown_ar.length>0){
output += "<span class='unknown'>" + unknown_ar + "</span>";
unknown_ar = "";
}
output += c;
}
}
if (unknown_ar.length>0){
output += "<span class='unknown'>" + unknown_ar + "</span>";
unknown_ar = [];
}
return output;
}
var results=[];
loop:
for (var key in database) {
if (colocation_mode) {
//for each entry in database, if all known_characters are in sentence, add to results:
if (known_characters.every(function(v) { return key.indexOf(v) >= 0; })) {
results.push(key);
}
} else {
//for each entry in database, if all characters in its key are in known_characters, add to results
var unknown_ar=[];
for (var i = 0; i < key.length; i++) {
var k = key[i];
if (known_characters.indexOf(k) == -1) {
if (unknown_ar.indexOf(k) == -1){
unknown_ar.push(k);
if (unknown_ar.length>unknown_char_allowance){
continue loop;
}
}
}
}
if (unknown_ar.length === unknown_char_allowance) {
results.push(key);
}
}
}
function chineseStringLength(str) {
str = str.replace(/\p{Z}/gu, '');
str = str.replace(/\p{P}/gu, '');
str = str.replace(/\p{C}/gu, '');
return str.length;
}
//filter by min-length
results = results.filter(function (el) {
return chineseStringLength(el) >= min_length;
});
//sort results by length
results.sort(function(a, b) {
var l1 = chineseStringLength(a);
var l2 = chineseStringLength(b);
if (l1==l2){
//return random
return 0.5 - Math.random();
} else {
return chineseStringLength(a)-chineseStringLength(b);
}
});
output = "<table><tr><th class='ctext'>Link</th><th class='chinese'>Chinese</th><th class='english'>English</th></tr>\n";
//for each result, add a row to the table
for (var i = 0; i < results.length; i++) {
output += "<tr><td class='ctext'> <a rel='noreferrer' href='https://ctext.org/dictionary.pl?if=en&id="+database[results[i]][0][1]+"'>◲</a> </td><td class='chinese'><span lang='zh-Hant'>" + markup(results[i]) + "</span></td><td class='english'>" + database[results[i]][0][0] + "</td></tr>\n";
}
output += "</table>";
document.getElementById("output").innerHTML = output;
});
</script>
</body>
</html>