Skip to content

The whole KNN VC demo code should be : #46

Open
@ywh-my

Description

@ywh-my

from matcher import KNeighborsVC

from wavlm.WavLM import WavLMConfig,WavLM

from hifigan.models import Generator
from hifigan.utils import AttrDict
from loguru import logger
from pathlib import Path

import soundfile as sf
import torch
import json

加载wavlm模型 输入为[B,1,t ] 输出【B,t2, 1024]

load the pre-trained checkpoints

checkpoint = torch.load('WavLM-Large.pt')
cfg = WavLMConfig(checkpoint['cfg'])
wavlm_model = WavLM(cfg).to("cuda")
wavlm_model.load_state_dict(checkpoint['model'])
wavlm_model.eval()
logger.info("wavlm模型加载")

声码器。 hifigan模型,输入为【B,timelen, 1024 ] ,输出为【B,T]

aconfig = "hifigan/config_v1_wavlm.json"
with open(aconfig) as f:
config = f.read()
json_config = json.loads(config )
h = AttrDict(json_config)
hifigan_model = Generator(h).to("cuda")
hifigan_model.load_state_dict( torch.load("prematch_g_02500000.pt",map_location="cuda")["generator"])
logger.info("hifigan模型加载")

KNN VC 模型

knn_vc = KNeighborsVC(wavlm=wavlm_model,
hifigan= hifigan_model,
hifigan_cfg= h ,
)
logger.info("knn vc 模型加载")

开始转换

src_wav_path = '测试VC/SSB0005_16Khz/SSB00050001.wav'
ref_wav_paths = [ str(x) for x in Path("测试VC/SSB1831_16Khz").rglob("*.wav")][:30]

query_seq = knn_vc.get_features(src_wav_path)
matching_set = knn_vc.get_matching_set(ref_wav_paths)

print(f"quert_seq shape:{query_seq.shape}")
print(f"matching_set shape:{matching_set[0].shape}")

out_wav = knn_vc.match(query_seq, matching_set, topk=4)

sf.write("转换语音.wav", out_wav ,16000)
logger.info("KNN VC 完毕")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions