Skip to content

acrcloud/acrcloud_sdk_python

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Audio Recognition Python SDK

Overview

ACRCloud provides services such as Music Recognition, Broadcast Monitoring, Custom Audio Recognition, Copyright Compliance & Data Deduplication, Live Channel Detection, and Offline Recognition etc.

This audio recognition python SDK support most of audio / video files.

Audio: mp3, wav, m4a, flac, aac, amr, ape, ogg ...
Video: mp4, mkv, wmv, flv, ts, avi ...

Requirements

Follow one of the tutorials to create a project and get your host, access_key and access_secret.

Install

Linux and macOS

python3 -m pip install pyacrcloud

Other platform

You can run python -m pip install git+https://github.com/acrcloud/acrcloud_sdk_python or go to sub dir, and run"sudo python setup.py install".

Windows Runtime Library

If you run the SDK on Windows, you must install this library.
X86: download and install Library(windows/vcredist_x86.exe)
x64: download and install Library(windows/vcredist_x64.exe)

Note

  1. If you run the SDK on Windows, you must install library(vcredist).
  2. ALL version supports humming.
  3. If you use docker alpine, you need to install "apk add --update libstdc++"

Functions

Introduction all API.

recognizer.py

class ACRCloudRecognizer:
    def recognize_by_file(self, file_path, start_seconds, rec_length=10):
      #@param file_path : query file path
      #@param start_seconds : skip (start_seconds) seconds from from the beginning of (filePath)
      #@param rec_length: use rec_length seconds data to recongize
      #@return result metainfos
      
    def recognize_by_filebuffer(self, file_buffer, start_seconds, rec_length=10):
      #@param file_buffer : file_path query buffer
      #@param start_seconds : skip (start_seconds) seconds from from the beginning of (filePath)
      #@param rec_length: use rec_length seconds data to recongize
      #@return result metainfos
      
    def recognize(self, wav_audio_buffer):
      #@param wav_audio_buffer : query buffer(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz)
      #@return result metainfos

Module acrcloud_extr_tool

def create_fingerprint_by_file(file_name, start_time_seconds, audio_len_seconds, is_db_fingerprint, opt):
      #file_name: Path of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 
      #is_db_fingerprint: If it is True, it will create db frigerprint (Fingerprint for bucket, not for recognition); 
      #opt opt = {
               'filter_energy_min': 0,
               'silence_energy_threshold': 100,
               'silence_rate_threshold': 1
           }

def create_humming_fingerprint_by_file(file_name, start_time_seconds, audio_len_seconds):
      #file_name: Path of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 

def create_fingerprint_by_filebuffer(data_buffer, start_time_seconds, audio_len_seconds, is_db_fingerprint, opt):
      #data_buffer: data buffer of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 
      #is_db_fingerprint: If it is True, it will create db frigerprint (Fingerprint for bucket, not for recognition); 
      #opt = {
               'filter_energy_min': 0,
               'silence_energy_threshold': 100,
               'silence_rate_threshold': 1
           }

def create_humming_fingerprint_by_filebuffer(data_buffer, start_time_seconds, audio_len_seconds):
      #data_buffer: data buffer of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 

def create_fingerprint(data_buffer, is_db_fingerprint):
      #data_buffer: audio data buffer(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 
      #is_db_fingerprint: If it is True, it will create db frigerprint (Fingerprint for bucket, not for recognition); 

def create_humming_fingerprint(data_buffer):
      #data_buffer: audio data buffer(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 

def decode_audio_by_file(file_name, start_time_seconds, audio_len_seconds):
      #It will return the audio data(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 
      #file_name: Path of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need, if it is 0, will decode all the audio; 

def decode_audio_by_filebuffer(data_buffer, start_time_seconds, audio_len_seconds):
      #It will return the audio data(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 
      #data_buffer: data buffer of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need, if it is 0, will decode all the audio; 

def version() 
      #return the version of this module

Example

run Test: python test.py test.mp3

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import os, sys
from acrcloud.recognizer import ACRCloudRecognizer

if __name__ == '__main__':
    config = {
        #Replace "xxxxxxxx" below with your project's host, access_key and access_secret.
        'host':'XXXXXXXX',
        'access_key':'XXXXXXXX', 
        'access_secret':'XXXXXXXX',
        'timeout':10 # seconds
    }

    '''This module can recognize ACRCloud by most of audio/video file. 
        Audio: mp3, wav, m4a, flac, aac, amr, ape, ogg ...
        Video: mp4, mkv, wmv, flv, ts, avi ...'''
    re = ACRCloudRecognizer(config)

    # process each for 10 seconds.
    filepath = sys.argv[1]
    duration_ms = int(re.get_duration_ms_by_file(filepath))
    for i in range(0, duration_ms//1000, 10):
        res = re.recognize_by_file(filepath, i, 10)
        print(i, res.strip())

    #recognize by file path, and skip 0 seconds from from the beginning of sys.argv[1].
    print re.recognize_by_file(sys.argv[1], 0)

    buf = open(sys.argv[1], 'rb').read()
    #recognize by file_audio_buffer that read from file path, and skip 0 seconds from from the beginning of sys.argv[1].
    print re.recognize_by_filebuffer(buf, 0)