Python 识别录音并转为文字的实现_Python

程式功能: 用 UI 界面,点击界面上的“开始识别”来录音(调用百度云语音接口),并自动将结果显示在界面的文本框中

Time: 2022/03/06

Author: Xiaohong

功能:Python 更改目录下目录及文件的顺序命名

项目的文件结构方式:
1. PyQt5 UI 文件: My_Audio_Record_cloud.ui
2. PyQt5 UI 文件转换生成的 PY 文件: My_Audio_Record_cloud_Ui.py
3. PyQt5 UI 文件对应的 Class 文件: My_Audio_Record_cloud_class.py
4. 通用的消息显示文件(在My_Audio_Record_cloud_class.py 中被调用): FangMessage.py

本例为实验室产品,不具备直接使用，支持的语音录入长度也较短

主程序界面如下：

Python 识别录音并转为文字的实现

主程序 My_Audio_Record_cloud_class.py:

				?

									# -*- coding: utf-8 -*-

									'''

									程式功能: 用 UI 界面,点击界面上的“开始识别”来录音,并自动将结果显示在界面的文本框中

									Time: 2022/03/06

									Author: Xiaohong

									'''

									import wave  # pip3 install  wave

									import My_Audio_Record_cloud_Ui as my_audio_record_cloud

									from pyaudio import PyAudio, paInt16  # 直接用pip安装的pyaudio不支持3.7

									# 若安装失败的话,下载对应的whl 文件  https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio

									from PyQt5 import QtGui, QtCore, QtWidgets

									from PyQt5.QtWidgets import (

									    QApplication,

									    QMainWindow,

									    QDialog,

									    QSplashScreen,

									    QToolButton,

									    QToolTip,

									    QWidget,

									    QMessageBox,

									    QAction,

									    QFileDialog,

									)

									# from PyQt5.QtWidgets import (

									#     QApplication,

									#     QWidget,

									# )

									import sys, os, json, pycurl, urllib

									import urllib.request

									from FangMessage import FangMessage

									class Audio_record_cloud_class(QMainWindow, my_audio_record_cloud.Ui_MainWindow):

									    def __init__(self, parent=None):

									        super().__init__()

									        self.child = my_audio_record_cloud.Ui_MainWindow()

									        self.child.setupUi(self)

									        self.file_name = ""

									        self.child.pushButton.clicked.connect(self.my_start)

									        # self.child.pb_play.clicked.connect(self.play_audio)

									        # 录音文件参数

									        self.framerate = 8000

									        self.NUM_SAMPLES = 2000

									        self.channels = 1

									        self.sampwidth = 2

									        # 录音时长参数

									        self.TIME = 5

									        # 播放文件参数

									        self.chunk = 1024

									    # 设置默认的录音文件名

									    # 当前目录+test+当前的时间ID+'.wav'

									    def init_file_name(self):

									        file_path = os.getcwd()

									        file_name = 'test' + self.get_timeseq() + '.wav'

									        file_wav = os.path.join(file_path, file_name)

									        self.file_name = file_wav

									        # self.child.lineEdit.setText(self.file_name)

									        # print(file_wav)

									        return file_wav

									    # 获取当前的时间ID

									    def get_timeseq(self):

									        import time

									        now = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))

									        return now

									    # 开始录音

									    def Start_record(self):

									        self.init_file_name()

									        pa = PyAudio()

									        stream = pa.open(

									            format=paInt16,

									            channels=1,

									            rate=self.framerate,

									            input=True,

									            frames_per_buffer=self.NUM_SAMPLES,

									        )

									        my_buf = []

									        count = 0

									        while count <= self.TIME * 4:

									            string_audio_data = stream.read(self.NUM_SAMPLES)

									            my_buf.append(string_audio_data)

									            count += 1

									            print("..")

									        # print('begin:')

									        # print(my_buf)

									        self.save_wave_file(self.file_name, my_buf)

									        stream.close()

									        FangMessage1 = FangMessage()

									        FangMessage1.runY('完成', '已完成录音', 'OK')

									    # 保存声音文件

									    def save_wave_file(self, filename, data):

									        wf = wave.open(filename, 'wb')

									        wf.setnchannels(self.channels)

									        wf.setsampwidth(self.sampwidth)

									        wf.setframerate(self.framerate)

									        for i in data:

									            wf.writeframes(i)

									        wf.close()

									    # 获取 百度返回结果,并 Print

									    def dump_res(self, buf):

									        print(buf)

									        my_temp = json.loads(buf)

									        my_list = my_temp['result']

									        self.child.textBrowser.setText(my_list[0])

									        print(my_list[0])

									    # 访问 百度云语音 网站，根据自己申请的应用Key 获取本次访问的 Token

									    def get_token(self):

									        apiKey = "XXXXXXXXXXXXXXXXXXXXXXX"

									        secretKey = "YYYYYYYYYYYYYYYYYYYYYYYYY"

									        auth_url = (

									            "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id="

									            + apiKey

									            + "&client_secret="

									            + secretKey

									        )

									        # print(auth_url)

									        res = urllib.request.urlopen(auth_url)

									        json_data = res.read()

									        # print(json_data)

									        # print('.....')

									        # print(json.loads(json_data))

									        return json.loads(json_data)['access_token']

									    # 访问 百度云语音 网站，根据 Token,上传 wav 文件

									    def use_cloud(self, token):

									        fp = wave.open(self.file_name, 'rb')

									        nf = fp.getnframes()

									        print('sampwidth:', fp.getsampwidth())

									        print('framerate:', fp.getframerate())

									        print('channels:', fp.getnchannels())

									        f_len = nf * 2

									        audio_data = fp.readframes(nf)

									        cuid = "4d36e972-e325-11ce-bfc1-08002be10318"

									        print('token:')

									        print(token)

									        srv_url = (

									            'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token

									        )

									        http_header = ['Content-Type:audio/pcm;rate=8000', 'Content-Length:%d' % f_len]

									        c = pycurl.Curl()

									        c.setopt(pycurl.URL, str(srv_url))

									        c.setopt(c.HTTPHEADER, http_header)

									        c.setopt(c.POST, 1)

									        c.setopt(c.CONNECTTIMEOUT, 80)

									        c.setopt(c.TIMEOUT, 80)

									        c.setopt(c.WRITEFUNCTION, self.dump_res)

									        c.setopt(c.POSTFIELDS, audio_data)

									        c.setopt(c.POSTFIELDSIZE, f_len)

									        c.perform()

									    def my_start(self):

									        print('OK')

									        self.Start_record()

									        self.use_cloud(self.get_token())

									if __name__ == "__main__":

									    app = QApplication(sys.argv)

									    myWin = Audio_record_cloud_class()

									    myWin.show()

									    sys.exit(app.exec_())

Ui 转化py文件如下：My_Audio_Record_cloud_Ui.py

				?

									# -*- coding: utf-8 -*-

									# Form implementation generated from reading ui file 'd:\vscode_2020\My_Audio\My_Audio\My_Audio_Record_cloud.ui'

									#

									# Created by: PyQt5 UI code generator 5.15.0

									#

									# WARNING: Any manual changes made to this file will be lost when pyuic5 is

									# run again.  Do not edit this file unless you know what you are doing.

									from PyQt5 import QtCore, QtGui, QtWidgets

									class Ui_MainWindow(object):

									    def setupUi(self, MainWindow):

									        MainWindow.setObjectName("MainWindow")

									        MainWindow.resize(558, 525)

									        self.centralwidget = QtWidgets.QWidget(MainWindow)

									        self.centralwidget.setObjectName("centralwidget")

									        self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget)

									        self.textBrowser.setGeometry(QtCore.QRect(30, 50, 501, 351))

									        self.textBrowser.setObjectName("textBrowser")

									        self.pushButton = QtWidgets.QPushButton(self.centralwidget)

									        self.pushButton.setGeometry(QtCore.QRect(40, 420, 75, 23))

									        self.pushButton.setObjectName("pushButton")

									        self.label = QtWidgets.QLabel(self.centralwidget)

									        self.label.setGeometry(QtCore.QRect(40, 460, 491, 16))

									        self.label.setObjectName("label")

									        self.label_2 = QtWidgets.QLabel(self.centralwidget)

									        self.label_2.setGeometry(QtCore.QRect(30, 30, 161, 16))

									        self.label_2.setObjectName("label_2")

									        self.label_3 = QtWidgets.QLabel(self.centralwidget)

									        self.label_3.setGeometry(QtCore.QRect(180, 10, 111, 31))

									        font = QtGui.QFont()

									        font.setFamily("Agency FB")

									        font.setPointSize(18)

									        font.setBold(True)

									        font.setWeight(75)

									        self.label_3.setFont(font)

									        self.label_3.setObjectName("label_3")

									        self.label_4 = QtWidgets.QLabel(self.centralwidget)

									        self.label_4.setGeometry(QtCore.QRect(480, 20, 54, 12))

									        self.label_4.setObjectName("label_4")

									        self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)

									        self.pushButton_2.setGeometry(QtCore.QRect(450, 420, 75, 23))

									        self.pushButton_2.setObjectName("pushButton_2")

									        MainWindow.setCentralWidget(self.centralwidget)

									        self.menubar = QtWidgets.QMenuBar(MainWindow)

									        self.menubar.setGeometry(QtCore.QRect(0, 0, 558, 23))

									        self.menubar.setObjectName("menubar")

									        MainWindow.setMenuBar(self.menubar)

									        self.statusbar = QtWidgets.QStatusBar(MainWindow)

									        self.statusbar.setObjectName("statusbar")

									        MainWindow.setStatusBar(self.statusbar)

									        self.retranslateUi(MainWindow)

									        self.pushButton_2.clicked.connect(MainWindow.close)

									        QtCore.QMetaObject.connectSlotsByName(MainWindow)

									    def retranslateUi(self, MainWindow):

									        _translate = QtCore.QCoreApplication.translate

									        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))

									        self.pushButton.setText(_translate("MainWindow", "开始识别"))

									        self.label.setText(_translate("MainWindow", "说明:点击“开始识别”按钮来录音,并通过百度语音的功能,自动将结果显示在文本框中"))

									        self.label_2.setText(_translate("MainWindow", "语音识别的结果:"))

									        self.label_3.setText(_translate("MainWindow", "语音识别"))

									        self.label_4.setText(_translate("MainWindow", "v20220306"))

									        self.pushButton_2.setText(_translate("MainWindow", "结束"))