The whole KNN VC demo code should be :

from matcher import KNeighborsVC

from wavlm.WavLM import WavLMConfig,WavLM

from hifigan.models import Generator
from hifigan.utils import AttrDict
from loguru import logger
from pathlib import Path

import soundfile as sf 
import torch
import json

# 加载wavlm模型  输入为[B,1,t ] 输出【B,t2, 1024]
# load the pre-trained checkpoints
checkpoint = torch.load('WavLM-Large.pt') 
cfg = WavLMConfig(checkpoint['cfg'])
wavlm_model = WavLM(cfg).to("cuda")
wavlm_model.load_state_dict(checkpoint['model'])
wavlm_model.eval()
logger.info("wavlm模型加载")





# 声码器。 hifigan模型，输入为【B，timelen, 1024 ] ,输出为【B，T]
aconfig = "hifigan/config_v1_wavlm.json"
with open(aconfig) as f:
    config = f.read()
json_config = json.loads(config )
h = AttrDict(json_config)
hifigan_model = Generator(h).to("cuda")
hifigan_model.load_state_dict( torch.load("prematch_g_02500000.pt",map_location="cuda")["generator"])
logger.info("hifigan模型加载")


## KNN VC 模型

knn_vc = KNeighborsVC(wavlm=wavlm_model, 
                      hifigan= hifigan_model,
                      hifigan_cfg= h ,
                      )
logger.info("knn vc 模型加载")



### 开始转换
src_wav_path = '测试VC/SSB0005_16Khz/SSB00050001.wav'
ref_wav_paths = [ str(x) for x in Path("测试VC/SSB1831_16Khz").rglob("*.wav")][:30]



query_seq = knn_vc.get_features(src_wav_path)
matching_set = knn_vc.get_matching_set(ref_wav_paths)

print(f"quert_seq shape:{query_seq.shape}")
print(f"matching_set shape:{matching_set[0].shape}")



out_wav = knn_vc.match(query_seq, matching_set, topk=4)



sf.write("转换语音.wav", out_wav ,16000)
logger.info("KNN VC 完毕")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

The whole KNN VC demo code should be : #46

加载wavlm模型输入为[B,1,t ] 输出【B,t2, 1024]

load the pre-trained checkpoints

声码器。 hifigan模型，输入为【B，timelen, 1024 ] ,输出为【B，T]

KNN VC 模型

开始转换

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

The whole KNN VC demo code should be : #46

Description

加载wavlm模型 输入为[B,1,t ] 输出【B,t2, 1024]

load the pre-trained checkpoints

声码器。 hifigan模型，输入为【B，timelen, 1024 ] ,输出为【B，T]

KNN VC 模型

开始转换

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions

加载wavlm模型输入为[B,1,t ] 输出【B,t2, 1024]