使用科大讯飞API进行语音识别和合成代码
时间: 2024-06-09 08:11:21 浏览: 191
科大讯飞语音识别与合成
3星 · 编辑精心推荐
以下是使用科大讯飞API进行语音识别和合成的Python代码示例:
语音识别:
```python
import requests
import json
url = "http://api.xfyun.cn/v1/service/v1/iat"
APPID = "YOUR_APPID" # 替换为自己的APPID
API_KEY = "YOUR_API_KEY" # 替换为自己的API_KEY
audio_file = "test.wav" # 需要识别的音频文件路径
with open(audio_file, "rb") as f:
audio_data = f.read()
param = {
"engine_type": "sms16k", # 引擎类型,此处使用16k采样率的普通话识别
"aue": "raw", # 音频编码格式,此处为未压缩的音频数据
}
headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"X-Appid": APPID,
"X-CurTime": str(int(time.time())),
"X-Param": base64.b64encode(json.dumps(param).replace(' ', '').encode('utf-8')).decode('utf-8'),
"X-CheckSum": hashlib.md5((API_KEY + str(int(time.time())) + base64.b64encode(audio_data).decode('utf-8')) \
.encode('utf-8')).hexdigest(),
}
response = requests.post(url, headers=headers, data=audio_data)
result = json.loads(response.text)
if result["code"] == "0":
print("识别结果:", result["data"])
else:
print("识别失败:", result["desc"])
```
语音合成:
```python
import requests
import json
import time
import hashlib
import base64
url = "http://api.xfyun.cn/v1/service/v1/tts"
APPID = "YOUR_APPID" # 替换为自己的APPID
API_KEY = "YOUR_API_KEY" # 替换为自己的API_KEY
text = "科大讯飞提供全球领先的人工智能技术、产品和服务,致力于让机器具备智能的思维和感官能力。" # 需要合成的文本内容
param = {
"auf": "audio/L16;rate=16000", # 音频采样率和编码格式
"aue": "lame", # 音频编码格式,此处使用mp3
"voice_name": "xiaoyan", # 合成的发音人
"speed": "50", # 合成语速
"volume": "50", # 合成音量
"pitch": "50", # 合成音调
"engine_type": "intp65", # 引擎类型
"text": text, # 合成的文本内容
}
headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"X-Appid": APPID,
"X-CurTime": str(int(time.time())),
"X-Param": base64.b64encode(json.dumps(param).replace(' ', '').encode('utf-8')).decode('utf-8'),
"X-CheckSum": hashlib.md5((API_KEY + str(int(time.time())) + base64.b64encode(text.encode('utf-8')).decode('utf-8')) \
.encode('utf-8')).hexdigest(),
}
response = requests.post(url, headers=headers)
result = json.loads(response.text)
if result["code"] == "0":
audio_data = base64.b64decode(result["data"])
audio_file = "output.mp3" # 合成的音频保存路径
with open(audio_file, "wb") as f:
f.write(audio_data)
print("合成成功,音频文件保存在:", audio_file)
else:
print("合成失败:", result["desc"])
```
阅读全文