Skip to content

Commit

Permalink
Merge pull request #121 from rakuri255/fix-re-pitch
Browse files Browse the repository at this point in the history
Fix re pitch
  • Loading branch information
rakuri255 authored Feb 5, 2024
2 parents 56b2168 + 12815d2 commit 513ec6f
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 64 deletions.
6 changes: 6 additions & 0 deletions ReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Version: 0.0.9
Date: 2024.02.06
- Fix:
- Re-Pitch mode now re-pitch the audio again
- Re-Pitch mode now show the text and lines in plot

# Version: 0.0.8
Date: 2024.01.03
- Changes:
Expand Down
2 changes: 1 addition & 1 deletion src/Settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class Settings:
APP_VERSION = "0.0.8"
APP_VERSION = "0.0.9"

create_midi = True
create_plot = False
Expand Down
2 changes: 1 addition & 1 deletion src/UltraSinger.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def run() -> None:
vocals_path = os.path.join(audio_separation_path, "vocals.wav")
plot_spectrogram(vocals_path, song_output, "vocals.wav")
plot_spectrogram(settings.processing_audio_path, song_output, "processing audio")
plot(pitched_data, song_output, transcribed_data, midi_notes)
plot(pitched_data, song_output, transcribed_data, ultrastar_class, midi_notes)

# Write Ultrastar txt
if is_audio:
Expand Down
2 changes: 1 addition & 1 deletion src/modules/Ultrastar/ultrastar_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def create_repitched_txt_from_ultrastar_data(
# todo: just add '_repitched' to input_file
with open(output_repitched_ultrastar, "w", encoding=FILE_ENCODING) as file:
for line in txt:
if line.startswith(f"#{UltrastarTxtNoteTypeTag.NORMAL} "):
if line.startswith(f"{UltrastarTxtNoteTypeTag.NORMAL} "):
parts = re.findall(r"\S+|\s+", line)
# between are whitespaces
# [0] :
Expand Down
133 changes: 72 additions & 61 deletions src/modules/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle

from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
from modules.console_colors import ULTRASINGER_HEAD
from modules.Pitcher.pitched_data import PitchedData
from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence
from modules.Speech_Recognition.TranscribedData import TranscribedData


@dataclass
class PlottedNote:
"""Plotted note"""
Expand Down Expand Up @@ -54,11 +56,12 @@ def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]


def plot(
pitched_data: PitchedData,
output_path: str,
transcribed_data: list[TranscribedData] = None,
midi_notes: list[str] = None,
title: str = None,
pitched_data: PitchedData,
output_path: str,
transcribed_data: list[TranscribedData] = None,
ultrastar_class: UltrastarTxtValue = None,
midi_notes: list[str] = None,
title: str = None,
) -> None:
"""Plot transcribed data"""

Expand Down Expand Up @@ -118,7 +121,7 @@ def plot(

set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound)

draw_words(transcribed_data, midi_notes)
plot_words(transcribed_data, ultrastar_class, midi_notes)

if title is not None:
plt.title(label=title)
Expand Down Expand Up @@ -192,8 +195,8 @@ def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData:
for i, time in enumerate(pitched_data.times):
comes_right_after_previous = time - previous_time <= step_size
previous_frequency_is_not_gap = (
len(pitched_data_with_gaps.frequencies) > 0
and str(pitched_data_with_gaps.frequencies[-1]) != "nan"
len(pitched_data_with_gaps.frequencies) > 0
and str(pitched_data_with_gaps.frequencies[-1]) != "nan"
)
if previous_frequency_is_not_gap and not comes_right_after_previous:
pitched_data_with_gaps.times.append(time)
Expand All @@ -209,34 +212,42 @@ def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData:
return pitched_data_with_gaps


def draw_words(transcribed_data, midi_notes):
def plot_word(midi_note: str, start, end, word):
note_frequency = librosa.note_to_hz(midi_note)
frequency_range = get_frequency_range(midi_note)

half_frequency_range = frequency_range / 2
height = (
numpy.log10([note_frequency + half_frequency_range])[0]
- numpy.log10([note_frequency - half_frequency_range])[0]
)
xy_start_pos = (
start,
numpy.log10([note_frequency - half_frequency_range])[0],
)
width = end - start
rect = Rectangle(
xy_start_pos,
width,
height,
edgecolor="none",
facecolor="red",
alpha=0.5,
)
plt.gca().add_patch(rect)
plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90)


def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]):
"""Draw rectangles for each word"""
if transcribed_data is not None:
for i, data in enumerate(transcribed_data):
note_frequency = librosa.note_to_hz(midi_notes[i])
frequency_range = get_frequency_range(midi_notes[i])
plot_word(midi_notes[i], data.start, data.end, data.word)

half_frequency_range = frequency_range / 2
height = (
numpy.log10([note_frequency + half_frequency_range])[0]
- numpy.log10([note_frequency - half_frequency_range])[0]
)

xy_start_pos = (
data.start,
numpy.log10([note_frequency - half_frequency_range])[0],
)
width = data.end - data.start
rect = Rectangle(
xy_start_pos,
width,
height,
edgecolor="none",
facecolor="red",
alpha=0.5,
)
plt.gca().add_patch(rect)
plt.text(data.start + width/4, numpy.log10([note_frequency + half_frequency_range])[0], data.word, rotation=90)
elif ultrastar_class is not None:
for i, data in enumerate(ultrastar_class.words):
plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i],
ultrastar_class.words[i])


def snake(s):
Expand All @@ -253,40 +264,40 @@ def plot_spectrogram(audio_seperation_path: str,
title: str = "Spectrogram",

) -> None:
"""Plot spectrogram of data"""
"""Plot spectrogram of data"""

print(
f"{ULTRASINGER_HEAD} Creating plot{': ' + title}"
)
print(
f"{ULTRASINGER_HEAD} Creating plot{': ' + title}"
)

audio, sr = librosa.load(audio_seperation_path, sr=None)
powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr)
plt.colorbar()
audio, sr = librosa.load(audio_seperation_path, sr=None)
powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr)
plt.colorbar()

if title is not None:
plt.title(label=title)
if title is not None:
plt.title(label=title)

plt.xlabel("Time (s)")
plt.ylabel("Frequency (Hz)")
plt.xlabel("Time (s)")
plt.ylabel("Frequency (Hz)")

ymin = 0
ymax = max(frequenciesFound) + 0.05
plt.ylim(ymin, ymax)
xmin = 0
xmax = max(time)
plt.xlim(xmin, xmax)
ymin = 0
ymax = max(frequenciesFound) + 0.05
plt.ylim(ymin, ymax)
xmin = 0
xmax = max(time)
plt.xlim(xmin, xmax)

plt.figure(1).set_figwidth(max(6.4, xmax))
plt.figure(1).set_figheight(4)
plt.figure(1).set_figwidth(max(6.4, xmax))
plt.figure(1).set_figheight(4)

plt.figure(1).tight_layout(h_pad=1.4)
plt.figure(1).tight_layout(h_pad=1.4)

dpi = 200
plt.savefig(
os.path.join(
output_path, f"plot{'_' + snake(title)}.svg"
),
dpi=dpi,
)
plt.clf()
plt.cla()
dpi = 200
plt.savefig(
os.path.join(
output_path, f"plot{'_' + snake(title)}.svg"
),
dpi=dpi,
)
plt.clf()
plt.cla()

0 comments on commit 513ec6f

Please sign in to comment.