Description
from matcher import KNeighborsVC
from wavlm.WavLM import WavLMConfig,WavLM
from hifigan.models import Generator
from hifigan.utils import AttrDict
from loguru import logger
from pathlib import Path
import soundfile as sf
import torch
import json
加载wavlm模型 输入为[B,1,t ] 输出【B,t2, 1024]
load the pre-trained checkpoints
checkpoint = torch.load('WavLM-Large.pt')
cfg = WavLMConfig(checkpoint['cfg'])
wavlm_model = WavLM(cfg).to("cuda")
wavlm_model.load_state_dict(checkpoint['model'])
wavlm_model.eval()
logger.info("wavlm模型加载")
声码器。 hifigan模型,输入为【B,timelen, 1024 ] ,输出为【B,T]
aconfig = "hifigan/config_v1_wavlm.json"
with open(aconfig) as f:
config = f.read()
json_config = json.loads(config )
h = AttrDict(json_config)
hifigan_model = Generator(h).to("cuda")
hifigan_model.load_state_dict( torch.load("prematch_g_02500000.pt",map_location="cuda")["generator"])
logger.info("hifigan模型加载")
KNN VC 模型
knn_vc = KNeighborsVC(wavlm=wavlm_model,
hifigan= hifigan_model,
hifigan_cfg= h ,
)
logger.info("knn vc 模型加载")
开始转换
src_wav_path = '测试VC/SSB0005_16Khz/SSB00050001.wav'
ref_wav_paths = [ str(x) for x in Path("测试VC/SSB1831_16Khz").rglob("*.wav")][:30]
query_seq = knn_vc.get_features(src_wav_path)
matching_set = knn_vc.get_matching_set(ref_wav_paths)
print(f"quert_seq shape:{query_seq.shape}")
print(f"matching_set shape:{matching_set[0].shape}")
out_wav = knn_vc.match(query_seq, matching_set, topk=4)
sf.write("转换语音.wav", out_wav ,16000)
logger.info("KNN VC 完毕")