小程序接入科大訊飛,web語(yǔ)音聽寫

最近公司的需求是小程序接入語(yǔ)音,選擇的是科大訊飛的語(yǔ)音。
這里科大訊飛的相關(guān)申請(qǐng)流程就不做介紹了,就說(shuō)自己在做的過(guò)程遇到的坑吧。

調(diào)用的是科大訊webApi 語(yǔ)音聽寫接口

工具類:

import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.poi.util.IOUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;

public class DictationUntils {
   /**
    * 語(yǔ)音轉(zhuǎn)換 api
    */
   private static String url="http://api.xfyun.cn/v1/service/v1/iat";

   private static String APPID="5bac8813";
   private static String APIKEY ="f69565ec6d410c4cd6cb3ef4a8c75ef4";
   public static String httpPostWithJSON(InputStream is) throws Exception {

       HttpPost httpPost = new HttpPost(url);
       CloseableHttpClient client = HttpClients.createDefault();
       String respContent = null;

       // 這兩個(gè)參數(shù)改成自己的,不然IP進(jìn)不了白名單也是不能調(diào)用的

       String parm = "{\"engine_type\": \"sms16k\",\"aue\": \"raw\"}";

       String x_param = new String(Base64.encodeBase64(parm.getBytes("UTF-8")));

       String x_time = System.currentTimeMillis() / 1000L + "";
       String checksum = APIKEY + x_time + x_param;
       String x_checksum = getMD5(checksum).toLowerCase();

       httpPost.setHeader("X-Appid", APPID);
       httpPost.setHeader("X-CurTime", x_time);
       httpPost.setHeader("X-Param", x_param);
       httpPost.setHeader("X-CheckSum", x_checksum);

      // File file = new File("C:\\lx.wav");
       String audio = null;
       try {
          // InputStream is = new FileInputStream(file);
           byte[] bytes = IOUtils.toByteArray(is);
           audio = Base64.encodeBase64String(bytes);
       } catch (Exception e) {
           e.printStackTrace();
       }

       List<BasicNameValuePair> pairList = new ArrayList<BasicNameValuePair>();
       pairList.add(new BasicNameValuePair("audio", audio));
       StringEntity entity = new UrlEncodedFormEntity(pairList, "utf-8");
       entity.setContentType("application/x-www-form-urlencoded; charset=utf-8");
       httpPost.setEntity(entity);

       HttpResponse resp = client.execute(httpPost);
       if (resp.getStatusLine().getStatusCode() == 200) {
           HttpEntity he = resp.getEntity();
           respContent = EntityUtils.toString(he, "UTF-8");
       }
       return respContent;
   }


   public static String getMD5(String key) {
       char hexDigits[] = {
               '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
       };
       try {
           byte[] btInput = key.getBytes();
           // 獲得MD5摘要算法的 MessageDigest 對(duì)象
           MessageDigest mdInst = MessageDigest.getInstance("MD5");
           // 使用指定的字節(jié)更新摘要
           mdInst.update(btInput);
           // 獲得密文
           byte[] md = mdInst.digest();
           // 把密文轉(zhuǎn)換成十六進(jìn)制的字符串形式
           int j = md.length;
           char str[] = new char[j * 2];
           int k = 0;
           for (int i = 0; i < j; i++) {
               byte byte0 = md[i];
               str[k++] = hexDigits[byte0 >>> 4 & 0xf];
               str[k++] = hexDigits[byte0 & 0xf];
           }
           return new String(str);
       } catch (Exception e) {
           return null;
       }
   }

   public static void main(String[] args) throws Exception {
       String result = httpPostWithJSON(new FileInputStream(new File("C:\\lx.wav")));
       System.out.println(result);
   }
}

里面需要導(dǎo)入科大訊飛相關(guān)jar包,maven 要手動(dòng)導(dǎo)入到本地倉(cāng)庫(kù)


image.png

遇到問(wèn)題:
小程序語(yǔ)音翻譯不準(zhǔn)確。識(shí)別率很差。
解決辦法:
因?yàn)樾〕绦蛏傻恼Z(yǔ)音文件的問(wèn)題, 這就涉及到文件的采樣率 通道數(shù) 每秒播放或錄制的樣本數(shù)量。小程序?qū)?yīng)的相關(guān)字段就是
frameSize numberOfChannels sampleRate
其實(shí)最好的方法, 就是把自己的文件轉(zhuǎn)成跟demo里面文件 采樣率什么的都一樣的文件。
把小程序生成的mp3轉(zhuǎn)成訊飛語(yǔ)音需要的文件格式



import javazoom.spi.mpeg.sampled.file.MpegAudioFileReader;
import net.sf.json.JSONObject;

import javax.sound.sampled.*;
import java.io.*;
import java.util.HashMap;

import static javax.crypto.Cipher.SECRET_KEY;

public class MP3ToWav {
    /**
     * mp3的字節(jié)數(shù)組生成wav文件
     *
     * @param sourceBytes
     * @param targetPath
     */
    public static boolean byteToWav(byte[] sourceBytes, String targetPath) {
        if (sourceBytes == null || sourceBytes.length == 0) {
            System.out.println("Illegal Argument passed to this method");
            return false;
        }

        try (final ByteArrayInputStream bais = new ByteArrayInputStream(sourceBytes);
             final AudioInputStream sourceAIS = AudioSystem.getAudioInputStream(bais)) {
            AudioFormat baseFormat = sourceAIS.getFormat();
            // 設(shè)置MP3的語(yǔ)音格式,并設(shè)置16bit
            AudioFormat mp3tFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getFrameRate(), 16,
                    1, 2, baseFormat.getFrameRate(), false);
            // 設(shè)置百度語(yǔ)音識(shí)別的音頻格式
            // AudioFormat pcmFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000, 16, 1, 2, 16000, false);
            try (
                    // 先通過(guò)MP3轉(zhuǎn)一次,使音頻流能的格式完整
                    final AudioInputStream mp3AIS = AudioSystem.getAudioInputStream(mp3tFormat, sourceAIS);
            ) {
                // 根據(jù)路徑生成wav文件
                AudioSystem.write(mp3AIS, AudioFileFormat.Type.WAVE, new File(targetPath));
            }
            return true;
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("文件轉(zhuǎn)換異常:" + e.getMessage());
            return false;
        } catch (UnsupportedAudioFileException e) {
            e.printStackTrace();
            System.out.println("文件轉(zhuǎn)換異常:" + e.getMessage());
            return false;
        }
    }

    /**
     * 將文件轉(zhuǎn)成字節(jié)流
     *
     * @param filePath
     * @return
     */
    public static byte[] getBytes(String filePath) {
        byte[] buffer = null;
        try {
            File file = new File(filePath);
            FileInputStream fis = new FileInputStream(file);
            ByteArrayOutputStream bos = new ByteArrayOutputStream(1000);
            byte[] b = new byte[1000];
            int n;
            while ((n = fis.read(b)) != -1) {
                bos.write(b, 0, n);
            }
            fis.close();
            bos.close();
            buffer = bos.toByteArray();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return buffer;
    }

    /**
     * MP3轉(zhuǎn)換PCM文件方法
     *
     * @param mp3filepath 原始文件路徑
     * @param pcmfilepath 轉(zhuǎn)換文件的保存路徑
     * @throws Exception
     */
    public static void convertMP32PCM(String mp3filepath, String pcmfilepath) throws Exception {
        AudioInputStream audioInputStream = getPcmAudioInputStream(mp3filepath);
        AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, new File(pcmfilepath));
    }

    private static AudioInputStream getPcmAudioInputStream(String mp3filepath) {
        File mp3 = new File(mp3filepath);
        AudioInputStream audioInputStream = null;
        AudioFormat targetFormat = null;
        try {
            // = null;
            MpegAudioFileReader mp = new MpegAudioFileReader();
            AudioInputStream in = mp.getAudioInputStream(mp3);
            AudioFormat baseFormat = in.getFormat();
            targetFormat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, baseFormat.getFrameRate(), 16,
                    baseFormat.getChannels(), baseFormat.getFrameSize(), baseFormat.getFrameRate(), false);
            audioInputStream = AudioSystem.getAudioInputStream(targetFormat, in);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return audioInputStream;
    }

    private static void getMp3Info(String wavFilepath) {

        try {
            javax.sound.sampled.AudioInputStream audio_file = javax.sound.sampled.AudioSystem.getAudioInputStream(new File(wavFilepath));
            audio_file.getFrameLength();

            AudioFormat format = audio_file.getFormat();
            System.out.println("getFrameSize:"+format.getFrameSize());
            System.out.println("getFrameRate:"+format.getFrameRate());
            System.out.println("getChannels:"+format.getChannels());
            System.out.println("getSampleRate:"+format.getSampleRate());
            System.out.println(format.getEncoding());
            System.out.println(format.getSampleSizeInBits());

        } catch (Exception e) {

        }
    }

    public static void main(String args[]) throws Exception {
        String filePath = "C:\\hhh.mp3";
        String targetPath = "C\\xx.wav";

      byteToWav(getBytes(filePath),targetPath);

        //getMp3Info(filePath);
    }

}

注:在踩坑的過(guò)程中也遇到一些mp3 文件轉(zhuǎn)換成pcm文件時(shí)的問(wèn)題。 也是應(yīng)該原文件的格式參數(shù)問(wèn)題。

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容