Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Pychorus

Pychorus is an open source library to find choruses or interesting sections in pieces of music. The algorithm is largely based on [a paper](https://pdfs.semanticscholar.org/f120/3fb2efe2f251ea7c221c9eaca95cc163594b.pdf) by Masataka Goto with some simplifications and modifications. There is room for improvement so feel free to contribute to the project.

Check out the blog post: (coming soon) for a full explanation on how the library works

## Getting Started

You can install the codebase easily with

```
pip install pychorus
```

### Sample execution

The most straightforward way to use the module is as follows:

```
from pychorus import find_and_output_chorus

chorus_start_sec = find_and_output_chorus("path/to/audio_file", "path/to/output_file", clip_length)
```

You can also clone the repo and use main.py as a command line tool like
```
python main.py path/to/audio_file --output_file=path/to/output_file
```

### Creating the chromogram, time-time, and time-lag matrices

```
from pychorus import create_chroma
from pychorus.similarity_matrix import TimeTimeSimilarityMatrix, TimeLagSimilarityMatrix

chroma, _, sr, _ = create_chroma("path/to/audio_file")
time_time_similarity = TimeTimeSimilarityMatrix(chroma, sr)
time_lag_similarity = TimeLagSimilarityMatrix(chroma, sr)

# Visualize the results
time_time_similarity.display()
time_lag_similarity.display()
```

## Planned improvements for v0.2
* Detect choruses in music recorded without a metronome by looking for slightly crooked lines
* API to return all choruses, not just one with the most matches
* Add ability to output entire detected chorus, not just section of size clip_length

## License

This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details
2 changes: 1 addition & 1 deletion pychorus/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from pychorus.helpers import find_and_output_chorus, find_chorus
from pychorus.helpers import find_and_output_chorus, find_chorus, create_chroma
29 changes: 20 additions & 9 deletions pychorus/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,21 @@ def draw_lines(num_samples, sample_rate, lines):
plt.show()


def create_chroma(input_file, n_fft=N_FFT):
"""
Generate the notes present in a song

Returns: tuple of 12 x n chroma, song wav data, sample rate (usually 22050)
and the song length in seconds
"""
y, sr = librosa.load(input_file, sr=None)
song_length_sec = y.shape[0] / float(sr)
S = np.abs(librosa.stft(y, n_fft=n_fft))**2
chroma = librosa.feature.chroma_stft(S=S, sr=sr)

return chroma, y, sr, song_length_sec


def find_chorus(chroma, sr, song_length_sec, clip_length):
"""
Find the most repeated chorus
Expand All @@ -117,10 +132,11 @@ def find_chorus(chroma, sr, song_length_sec, clip_length):
sr: sample rate of the song, usually 22050
song_length_sec: length in seconds of the song (lost in processing chroma)
clip_length: minimum length in seconds we want our chorus to be (at least 10-15s)

Returns: Time in seconds of the start of the best chorus
"""
num_samples = chroma.shape[1]

print("Calculating time lag similarity matrix")
time_time_similarity = TimeTimeSimilarityMatrix(chroma, sr)
time_lag_similarity = TimeLagSimilarityMatrix(chroma, sr)

Expand All @@ -145,7 +161,7 @@ def find_chorus(chroma, sr, song_length_sec, clip_length):
return best_chorus.start / chroma_sr


def find_and_output_chorus(input_file, output_file, clip_length):
def find_and_output_chorus(input_file, output_file, clip_length=15):
"""
Finds the most repeated chorus from input_file and outputs to output file.

Expand All @@ -157,12 +173,7 @@ def find_and_output_chorus(input_file, output_file, clip_length):

Returns: Time in seconds of the start of the best chorus
"""
print("Loading file")
y, sr = librosa.load(input_file)
song_length_sec = y.shape[0] / float(sr)
S = np.abs(librosa.stft(y, n_fft=N_FFT))**2
chroma = librosa.feature.chroma_stft(S=S, sr=sr)

chroma, song_wav_data, sr, song_length_sec = create_chroma(input_file)
chorus_start = find_chorus(chroma, sr, song_length_sec, clip_length)
if chorus_start is None:
return
Expand All @@ -171,7 +182,7 @@ def find_and_output_chorus(input_file, output_file, clip_length):
chorus_start // 60, chorus_start % 60))

if output_file is not None:
chorus_wave_data = y[int(chorus_start*sr) : int((chorus_start+clip_length)*sr)]
chorus_wave_data = song_wav_data[int(chorus_start*sr) : int((chorus_start+clip_length)*sr)]
sf.write(output_file, chorus_wave_data, sr)
#librosa.output.write_wav(output_file, chorus_wave_data, sr)

Expand Down
36 changes: 33 additions & 3 deletions pychorus/similarity_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ def __init__(self, chroma, sample_rate):

@abstractmethod
def compute_similarity_matrix(self, chroma):
""""The specific type of similarity matrix we want to compute"""
""""
The specific type of similarity matrix we want to compute

Args:
chroma: 12 x n numpy array of musical notes present at every time step
"""
pass

def display(self):
Expand All @@ -45,11 +50,24 @@ class TimeTimeSimilarityMatrix(SimilarityMatrix):
def compute_similarity_matrix(self, chroma):
"""Optimized way to compute the time-time similarity matrix with numpy broadcasting"""
broadcast_x = np.expand_dims(chroma, 2) # (12 x n x 1)
broadcast_y = np.swapaxes(np.expand_dims(chroma, 2), 1, 2) # (12 x 1 x n)
broadcast_y = np.swapaxes(np.expand_dims(chroma, 2), 1,
2) # (12 x 1 x n)
time_time_matrix = 1 - (np.linalg.norm(
(broadcast_x - broadcast_y), axis=0) / sqrt(12))
return time_time_matrix

def compute_similarity_matrix_slow(self, chroma):
"""Slow but straightforward way to compute time time similarity matrix"""
num_samples = chroma.shape[1]
time_time_similarity = np.zeros((num_samples, num_samples))
for i in range(num_samples):
for j in range(num_samples):
# For every pair of samples, check similarity
time_time_similarity[i, j] = 1 - (
np.linalg.norm(chroma[:, i] - chroma[:, j]) / sqrt(12))

return time_time_similarity


class TimeLagSimilarityMatrix(SimilarityMatrix):
"""
Expand All @@ -72,6 +90,19 @@ def compute_similarity_matrix(self, chroma):
time_lag_similarity = np.rot90(time_lag_similarity, k=1, axes=(0, 1))
return time_lag_similarity[:num_samples, :num_samples]

def compute_similarity_matrix_slow(self, chroma):
"""Slow but straightforward way to compute time lag similarity matrix"""
num_samples = chroma.shape[1]
time_lag_similarity = np.zeros((num_samples, num_samples))
for i in range(num_samples):
for j in range(i + 1):
# For every pair of samples, check similarity using lag
# [j, i] because numpy indexes by column then row
time_lag_similarity[j, i] = 1 - (
np.linalg.norm(chroma[:, i] - chroma[:, i - j]) / sqrt(12))

return time_lag_similarity

def denoise(self, time_time_matrix, smoothing_size):
"""
Emphasize horizontal lines by suppressing vertical and diagonal lines. We look at 6
Expand Down Expand Up @@ -114,7 +145,6 @@ def denoise(self, time_time_matrix, smoothing_size):
ur_average[y, x] = diagonal_moving_average[x - y,
x + smoothing_size - 1]


non_horizontal_max = np.maximum.reduce([down_average, up_average, ll_average, ur_average])
non_horizontal_min = np.minimum.reduce([up_average, down_average, ll_average, ur_average])

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
'scipy',
'soundfile',
'matplotlib'
]
],
classifiers = [],
)