feat: Enhance TTS functionality with Azure support and UI improvements

- Updated component declarations to include new Naive UI components. - Refactored environment variable access to use import.meta.env. - Added TTS_API_URL constant for Azure TTS integration. - Expanded SpeechSettings interface to support Azure voice and language options. - Implemented Azure TTS voice selection and loading mechanism in ReadDanmaku.vue. - Added loading timeout for audio playback and improved error handling. - Enhanced UI to allow users to select Azure voices and configure speech settings.
2025-12-06 18:36:55 +08:00 · 2025-10-13 18:25:20 +08:00
parent 4b4fb8d87e
commit ad277bc1aa
5 changed files with 318 additions and 1266 deletions
--- a/src/client/ClientReadDanmaku.vue
+++ b/src/client/ClientReadDanmaku.vue
--- a/src/components.d.ts
+++ b/src/components.d.ts
@@ -18,13 +18,18 @@ declare module 'vue' {
    LabelItem: typeof import('./components/LabelItem.vue')['default']
    LiveInfoContainer: typeof import('./components/LiveInfoContainer.vue')['default']
    MonacoEditorComponent: typeof import('./components/MonacoEditorComponent.vue')['default']
-    NEllipsis: typeof import('naive-ui')['NEllipsis']
-    NEmpty: typeof import('naive-ui')['NEmpty']
+    NAvatar: typeof import('naive-ui')['NAvatar']
+    NButton: typeof import('naive-ui')['NButton']
+    NCard: typeof import('naive-ui')['NCard']
    NFlex: typeof import('naive-ui')['NFlex']
    NFormItemGi: typeof import('naive-ui')['NFormItemGi']
    NGridItem: typeof import('naive-ui')['NGridItem']
    NIcon: typeof import('naive-ui')['NIcon']
+    NImage: typeof import('naive-ui')['NImage']
+    NPopconfirm: typeof import('naive-ui')['NPopconfirm']
    NScrollbar: typeof import('naive-ui')['NScrollbar']
+    NSpace: typeof import('naive-ui')['NSpace']
+    NSwitch: typeof import('naive-ui')['NSwitch']
    NTag: typeof import('naive-ui')['NTag']
    NText: typeof import('naive-ui')['NText']
    PointGoodsItem: typeof import('./components/manage/PointGoodsItem.vue')['default']
--- a/src/data/constants.ts
+++ b/src/data/constants.ts
@@ -19,7 +19,7 @@ export const THINGS_URL = `${FILE_BASE_URL}/things/`
 export const apiFail = ref(false)

 export const BASE_URL
-  = process.env.NODE_ENV === 'development'
+  = import.meta.env.NODE_ENV === 'development'
    ? debugAPI
    : apiFail.value
      ? failoverAPI
@@ -27,7 +27,7 @@ export const BASE_URL
 export const BASE_API_URL = `${BASE_URL}api/`
 export const FETCH_API = 'https://fetch.vtsuru.live/'
 export const BASE_HUB_URL
-  = `${process.env.NODE_ENV === 'development'
+  = `${import.meta.env.NODE_ENV === 'development'
    ? debugAPI
    : apiFail.value
      ? failoverAPI
@@ -65,6 +65,7 @@ export const CHECKIN_API_URL = `${BASE_API_URL}checkin/`
 export const USER_CONFIG_API_URL = `${BASE_API_URL}user-config/`
 export const FILE_API_URL = `${BASE_API_URL}files/`
 export const VOTE_API_URL = `${BASE_API_URL}vote/`
+export const TTS_API_URL = `${BASE_API_URL}tts/`

 export interface TemplateMapType {
  [key: string]: {
--- a/src/store/useSpeechService.ts
+++ b/src/store/useSpeechService.ts
@@ -7,7 +7,7 @@ import { clearInterval, setInterval } from 'worker-timers'
 import type { EventModel } from '@/api/api-models'
 import { DownloadConfig, UploadConfig, useAccount } from '@/api/account'
 import { EventDataTypes } from '@/api/api-models'
-import { FETCH_API } from '@/data/constants'
+import { FETCH_API, TTS_API_URL } from '@/data/constants'

 export interface SpeechSettings {
  speechInfo: SpeechInfo
@@ -16,12 +16,14 @@ export interface SpeechSettings {
  guardTemplate: string
  giftTemplate: string
  enterTemplate: string
-  voiceType: 'local' | 'api'
+  voiceType: 'local' | 'api' | 'azure'
  voiceAPISchemeType: 'http' | 'https'
  voiceAPI: string
  splitText: boolean
  useAPIDirectly: boolean
  combineGiftDelay: number | undefined
+  azureVoice: string
+  azureLanguage: string
 }

 export interface SpeechInfo {
@@ -65,6 +67,8 @@ const DEFAULT_SETTINGS: SpeechSettings = {
  useAPIDirectly: false,
  splitText: false,
  combineGiftDelay: 2,
+  azureVoice: 'zh-CN-XiaoxiaoNeural',
+  azureLanguage: 'zh-CN',
 }

 export const templateConstants = {
@@ -134,6 +138,7 @@ function createSpeechService() {

  const apiAudio = ref<HTMLAudioElement>()
  let checkTimer: number | undefined
+  let loadingTimeoutTimer: number | undefined // 音频加载超时计时器
  let speechQueueTimer: number | undefined

  const speechSynthesisInfo = ref<{
@@ -204,6 +209,11 @@ function createSpeechService() {
      checkTimer = undefined
    }

+    if (loadingTimeoutTimer) {
+      clearInterval(loadingTimeoutTimer)
+      loadingTimeoutTimer = undefined
+    }
+
    cancelSpeech()
    giftCombineMap.clear()
    speakQueue.value = []
@@ -294,10 +304,7 @@ function createSpeechService() {
      text = text.replace(templateConstants.guard_num.regex, (data.num ?? 0).toString())
    }

-    text = fullWidthToHalfWidth(text)
-      .replace(/[^0-9a-z\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF,.:'"\s]/gi, '')
-      .normalize('NFKC')
-
+    console.log(text)
    return text
  }

@@ -359,6 +366,13 @@ function createSpeechService() {
   * 构建API请求URL
   */
  function buildApiUrl(text: string): string | null {
+    // Azure TTS
+    if (settings.value.voiceType === 'azure') {
+      const apiUrl = `${TTS_API_URL}azure?text=${encodeURIComponent(text)}`
+      return apiUrl
+    }
+
+    // 自定义 API
    if (!settings.value.voiceAPI) {
      message.error('未设置语音API')
      return null
@@ -400,15 +414,47 @@ function createSpeechService() {
   * 使用API TTS朗读
   */
  function speakFromAPI(text: string) {
-    const url = buildApiUrl(text)
+    let url = buildApiUrl(text)
    if (!url) {
      cancelSpeech()
      return
    }

+    // 如果是 Azure TTS，添加额外参数
+    if (settings.value.voiceType === 'azure') {
+      const azureUrl = new URL(url)
+      azureUrl.searchParams.set('voice', settings.value.azureVoice)
+      azureUrl.searchParams.set('language', settings.value.azureLanguage)
+      azureUrl.searchParams.set('rate', settings.value.speechInfo.rate.toString())
+      azureUrl.searchParams.set('pitch', settings.value.speechInfo.pitch.toString())
+      azureUrl.searchParams.set('streaming', 'true')
+      url = azureUrl.toString()
+    }
+
    speechState.isSpeaking = true
    speechState.isApiAudioLoading = true
-    speechState.apiAudioSrc = url
+    
+    // 先清空 apiAudioSrc，确保 audio 元素能够正确重新加载
+    // 这样可以避免连续播放时 src 更新不触发加载的问题
+    speechState.apiAudioSrc = ''
+    
+    // 使用 nextTick 确保 DOM 更新后再设置新的 src
+    // 但由于这是在 store 中，我们使用 setTimeout 来模拟
+    setTimeout(() => {
+      speechState.apiAudioSrc = url
+    }, 0)
+
+    // 设置 10 秒加载超时
+    if (loadingTimeoutTimer) {
+      clearInterval(loadingTimeoutTimer)
+    }
+    loadingTimeoutTimer = setInterval(() => {
+      if (speechState.isApiAudioLoading) {
+        console.error('[TTS] 音频加载超时 (10秒)')
+        message.error('音频加载超时，请检查网络连接或API状态')
+        cancelSpeech()
+      }
+    }, 10000) // 10 秒超时
  }

  /**
@@ -470,7 +516,10 @@ function createSpeechService() {
      if (settings.value.voiceType == 'local') {
        speakDirect(text)
      } else {
-        text = settings.value.splitText ? insertSpaces(text) : text
+        // 只有自定义 API 且启用了 splitText 才进行文本拆分
+        if (settings.value.voiceType === 'api' && settings.value.splitText) {
+          text = insertSpaces(text)
+        }
        speakFromAPI(text)
      }

@@ -489,16 +538,34 @@ function createSpeechService() {
      checkTimer = undefined
    }

+    if (loadingTimeoutTimer) {
+      clearInterval(loadingTimeoutTimer)
+      loadingTimeoutTimer = undefined
+    }
+
    speechState.isApiAudioLoading = false

    if (apiAudio.value && !apiAudio.value.paused) {
      apiAudio.value.pause()
    }

+    // 清空音频源，确保下次播放时能正确加载新的音频
+    speechState.apiAudioSrc = ''
+
    EasySpeech.cancel()
    speechState.speakingText = ''
  }

+  /**
+   * 清除音频加载超时计时器
+   */
+  function clearLoadingTimeout() {
+    if (loadingTimeoutTimer) {
+      clearInterval(loadingTimeoutTimer)
+      loadingTimeoutTimer = undefined
+    }
+  }
+
  /**
   * 接收事件并添加到队列
   */
@@ -680,6 +747,7 @@ function createSpeechService() {
    startSpeech,
    stopSpeech,
    cancelSpeech,
+    clearLoadingTimeout,
    uploadConfig,
    downloadConfig,
    getTextFromDanmaku,
--- a/src/views/open_live/ReadDanmaku.vue
+++ b/src/views/open_live/ReadDanmaku.vue
@@ -47,6 +47,7 @@ import { EventDataTypes } from '@/api/api-models'
 import { useDanmakuClient } from '@/store/useDanmakuClient'
 import { templateConstants, useSpeechService } from '@/store/useSpeechService'
 import { copyToClipboard } from '@/Utils'
+import { TTS_API_URL } from '@/data/constants';

 const props = defineProps<{
  roomInfo?: any
@@ -68,6 +69,10 @@ const {
  apiAudio,
 } = speechService

+// Azure 语音列表
+const azureVoices = ref<Array<{ label: string; value: string; locale: string }>>([])
+const azureVoicesLoading = ref(false)
+
 // 计算属性
 const isVtsuruVoiceAPI = computed(() => {
  return (
@@ -197,6 +202,61 @@ function testAPI() {
  }
 }

+/**
+ * 获取 Azure 语音列表
+ */
+async function fetchAzureVoices() {
+  if (azureVoices.value.length > 0) {
+    return
+  }
+
+  azureVoicesLoading.value = true
+  try {
+    const response = await fetch(`${TTS_API_URL}voices`)
+    if (!response.ok) {
+      throw new Error('获取语音列表失败')
+    }
+
+    const voices = await response.json()
+
+    azureVoices.value = voices
+      .filter((v: any) => {
+        const locale = v.Locale || v.locale || ''
+        return locale.startsWith('zh-') || locale.startsWith('ja-') || locale.startsWith('en-')
+      })
+      .map((v: any) => {
+        const shortName = v.ShortName || v.shortName || ''
+        const localeName = v.LocaleName || v.localeName || ''
+        const localName = v.LocalName || v.localName || v.DisplayName || v.displayName || ''
+        const gender = v.Gender || v.gender || ''
+        const isMultilingual = shortName.toLowerCase().includes('multilingual')
+
+        return {
+          label: `[${localeName}] ${localName} (${gender === 'Male' ? '男' : '女'})${isMultilingual ? ' 🌍' : ''}`,
+          value: shortName,
+          locale: v.Locale || v.locale || '',
+        }
+      })
+      .sort((a: any, b: any) => {
+        // 多语言模型优先
+        const aMulti = a.value.toLowerCase().includes('multilingual')
+        const bMulti = b.value.toLowerCase().includes('multilingual')
+        if (aMulti && !bMulti) return -1
+        if (!aMulti && bMulti) return 1
+
+        // 然后按语言排序：中文排前面，日文其次，英文最后
+        const aScore = a.locale.startsWith('zh-') ? 0 : a.locale.startsWith('ja-') ? 1 : 2
+        const bScore = b.locale.startsWith('zh-') ? 0 : b.locale.startsWith('ja-') ? 1 : 2
+        return aScore - bScore
+      })
+  } catch (error) {
+    console.error('[Azure TTS] 获取语音列表失败:', error)
+    message.error('获取 Azure 语音列表失败')
+  } finally {
+    azureVoicesLoading.value = false
+  }
+}
+
 function getEventTypeTag(type: EventDataTypes) {
  switch (type) {
    case EventDataTypes.Message:
@@ -220,6 +280,16 @@ function onAPIError(_e: Event) {
  cancelSpeech()
 }

+function onAudioCanPlay() {
+  speechState.isApiAudioLoading = false
+  speechService.clearLoadingTimeout()
+}
+
+function onAudioError(e: Event) {
+  speechService.clearLoadingTimeout()
+  onAPIError(e)
+}
+
 // 生命周期
 onMounted(async () => {
  await speechService.initialize()
@@ -229,6 +299,11 @@ onMounted(async () => {
  client.onEvent('guard', onGetEvent)
  client.onEvent('gift', onGetEvent)
  client.onEvent('enter', onGetEvent)
+
+  // 如果默认使用 Azure TTS，则预加载语音列表
+  if (settings.value.voiceType === 'azure') {
+    fetchAzureVoices()
+  }
 })

 onUnmounted(() => {
@@ -646,6 +721,21 @@ onUnmounted(() => {
              </NSpace>
            </NRadioButton>

+            <NRadioButton value="azure">
+              <NSpace :size="4">
+                <span>Azure TTS</span>
+                <NTooltip>
+                  <template #trigger>
+                    <NIcon
+                      :component="Info24Filled"
+                      :size="16"
+                    />
+                  </template>
+                  使用 Microsoft Azure 语音合成服务, 混合语言输出效果和音质好, 略有延迟
+                </NTooltip>
+              </NSpace>
+            </NRadioButton>
+
            <NRadioButton value="api">
              <NSpace :size="4">
                <span>API 语音</span>
@@ -744,6 +834,127 @@ onUnmounted(() => {
              </div>
            </NSpace>

+            <!-- Azure TTS 设置 -->
+            <NSpace
+              v-else-if="settings.voiceType === 'azure'"
+              vertical
+              :size="16"
+            >
+              <NAlert
+                type="success"
+                :bordered="false"
+              >
+                <template #icon>
+                  <NIcon :component="Info24Filled" />
+                </template>
+                使用本站提供的 Microsoft Azure 语音合成服务，效果最好
+              </NAlert>
+
+              <div>
+                <NSpace justify="space-between" align="center">
+                  <NText strong>语音选择</NText>
+                  <NButton
+                    v-if="azureVoices.length === 0"
+                    text
+                    type="primary"
+                    size="small"
+                    :loading="azureVoicesLoading"
+                    @click="fetchAzureVoices"
+                  >
+                    加载语音列表
+                  </NButton>
+                  <NText v-else depth="3" style="font-size: 12px">
+                    共 {{ azureVoices.length }} 个语音
+                  </NText>
+                </NSpace>
+                <NSelect
+                  v-model:value="settings.azureVoice"
+                  :options="azureVoices.length > 0 ? azureVoices : [
+                    { label: '中文(普通话)女 - 晓晓', value: 'zh-CN-XiaoxiaoNeural' },
+                    { label: '中文(普通话)女 - 晓伊', value: 'zh-CN-XiaoyiNeural' },
+                    { label: '中文(普通话)女 - 晓梦', value: 'zh-CN-XiaomengNeural' },
+                    { label: '中文(普通话)女 - 晓莫', value: 'zh-CN-XiaomoNeural' },
+                    { label: '中文(普通话)女 - 晓秋', value: 'zh-CN-XiaoqiuNeural' },
+                    { label: '中文(普通话)女 - 晓双', value: 'zh-CN-XiaoshuangNeural' },
+                    { label: '中文(普通话)女 - 晓纯', value: 'zh-CN-XiaochenNeural' },
+                    { label: '中文(普通话)女 - 晓翔', value: 'zh-CN-XiaoxiangNeural' },
+                    { label: '中文(普通话)女 - 晓蕾', value: 'zh-CN-XiaorouNeural' },
+                    { label: '中文(普通话)女 - 晓瑶', value: 'zh-CN-XiaoyouNeural' },
+                    { label: '中文(普通话)男 - 云希', value: 'zh-CN-YunxiNeural' },
+                    { label: '中文(普通话)男 - 云扬', value: 'zh-CN-YunyangNeural' },
+                    { label: '中文(普通话)男 - 云健', value: 'zh-CN-YunjianNeural' },
+                    { label: '中文(普通话)儿童 - 晓晋', value: 'zh-CN-XiaozhenNeural' },
+                    { label: '中文(普通话)儿童 - 云夏', value: 'zh-CN-YunxiaNeural' },
+                  ]"
+                  :loading="azureVoicesLoading"
+                  :fallback-option="() => ({
+                    label: settings.azureVoice ? `已选择: ${settings.azureVoice}` : '未选择',
+                    value: settings.azureVoice || '',
+                  })"
+                  style="margin-top: 8px"
+                  filterable
+                  @focus="fetchAzureVoices"
+                />
+              </div>
+
+              <div>
+                <NSpace
+                  justify="space-between"
+                  align="center"
+                >
+                  <NText>音量</NText>
+                  <NText depth="3">
+                    {{ (settings.speechInfo.volume * 100).toFixed(0) }}%
+                  </NText>
+                </NSpace>
+                <NSlider
+                  v-model:value="settings.speechInfo.volume"
+                  :min="0"
+                  :max="1"
+                  :step="0.01"
+                  style="margin-top: 8px"
+                />
+              </div>
+
+              <div>
+                <NSpace
+                  justify="space-between"
+                  align="center"
+                >
+                  <NText>音调</NText>
+                  <NText depth="3">
+                    {{ settings.speechInfo.pitch.toFixed(2) }}
+                  </NText>
+                </NSpace>
+                <NSlider
+                  v-model:value="settings.speechInfo.pitch"
+                  :min="0.5"
+                  :max="2"
+                  :step="0.01"
+                  style="margin-top: 8px"
+                />
+              </div>
+
+              <div>
+                <NSpace
+                  justify="space-between"
+                  align="center"
+                >
+                  <NText>语速</NText>
+                  <NText depth="3">
+                    {{ settings.speechInfo.rate.toFixed(2) }}
+                  </NText>
+                </NSpace>
+                <NSlider
+                  v-model:value="settings.speechInfo.rate"
+                  :min="0.5"
+                  :max="2"
+                  :step="0.01"
+                  style="margin-top: 8px"
+                />
+              </div>
+            </NSpace>
+
            <!-- API 语音设置 -->
            <NSpace
              v-else
@@ -865,19 +1076,21 @@ onUnmounted(() => {
                  style="margin-top: 8px"
                />
              </div>
-
-              <!-- 隐藏的音频元素 -->
-              <audio
-                ref="apiAudio"
-                :src="speechState.apiAudioSrc"
-                :volume="settings.speechInfo.volume"
-                style="display: none"
-                @ended="cancelSpeech"
-                @canplay="speechState.isApiAudioLoading = false"
-                @error="onAPIError"
-              />
            </NSpace>
          </Transition>
+
+          <!-- 隐藏的音频元素 - 用于 API 和 Azure TTS -->
+          <audio
+            v-if="settings.voiceType !== 'local'"
+            ref="apiAudio"
+            :src="speechState.apiAudioSrc"
+            :volume="settings.speechInfo.volume"
+            style="display: none"
+            autoplay
+            @ended="cancelSpeech"
+            @canplay="onAudioCanPlay"
+            @error="onAudioError"
+          />
        </NSpace>
      </NCard>

@@ -1063,7 +1276,10 @@ onUnmounted(() => {
            </NInputGroup>
          </NSpace>

-          <NCheckbox v-model:checked="settings.splitText">
+          <NCheckbox
+            v-if="settings.voiceType === 'api'"
+            v-model:checked="settings.splitText"
+          >
            <NSpace
              :size="4"
              align="center"