-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
87 lines (72 loc) · 2.58 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from urllib.request import urlretrieve
from zipfile import ZipFile
import os
from romanyh.transposition import transposeRomanText
def isAnalysisFile(f):
"""Checks whether file 'f' is a RomanText analysis file."""
return "analysis" in f and "feedback_on" not in f and f.endswith("txt")
def isScoreFile(f):
"""Checks whether file 'f' is a compressed MusicXML score file."""
return "score" in f and "analysis_on" not in f and f.endswith("mxl")
def downloadAndExtract(listFile=None, analysisOnly=True):
"""Downloads and extracts all RomanText files in When-in-Rome.
The script gets the latest master branch of the When-in-Rome repository,
extracts all the RomanText files into a local folder, and returns
a list with a global path to every file it extracted.
If `listFile` is provided, the list of files is also written in disk,
in the location provided by listFile.
"""
wheninromeURL = (
"https://github.com/MarkGotham/When-in-Rome/archive/master.zip"
)
# Gets the zipped repo in a temporary file
tmpZipFile, httpResponse = urlretrieve(wheninromeURL)
# TODO: Maybe handle any issues based on httpResponse
repo = ZipFile(tmpZipFile)
directory = []
for f in sorted(repo.namelist()):
if isAnalysisFile(f):
localFileName = repo.extract(f)
directory.append(f)
elif (
not analysisOnly
and isScoreFile(f)
and f.replace("score.mxl", "analysis.txt") in directory
):
localFileName = repo.extract(f)
if listFile:
with open(listFile, "w") as fout:
fout.write("\n".join(directory))
return directory
def transposeAll(keys=[], listFile="dataset.txt"):
"""Transposes every file in the dataset to other keys.
If keys is not provided, the files will be transposed
to 12 different keys in the same mode.
"""
if not os.path.exists(listFile):
downloadAndExtract(listFile)
with open(listFile) as fd:
files = fd.read().split("\n")
if not keys:
keys = [
"C",
"C#",
"D",
"Eb",
"E",
"F",
"F#",
"G",
"Ab",
"A",
"Bb",
"B",
]
for f in files:
for k in keys:
transposed = transposeRomanText(f, k)
transposedFileName = f.replace(".txt", f"_{k}.txt")
with open(transposedFileName, "w") as fout:
fout.write(transposed)
if __name__ == "__main__":
downloadAndExtract(listFile="dataset.txt")