|
|
@@ -185,7 +185,7 @@
|
|
|
启用后可上传音频文件并整段转写为文本(常见格式:mp3、wav、m4a、flac、ogg
|
|
|
等)。暂不支持视频上传。
|
|
|
</div>
|
|
|
- <el-form-item label="启用音频语音识别">
|
|
|
+ <el-form-item label="音频语音识别">
|
|
|
<div class="switch-wrap">
|
|
|
<el-segmented
|
|
|
v-model="form.asr_enabled"
|
|
|
@@ -233,6 +233,28 @@
|
|
|
<el-tab-pane v-if="isDocumentType" label="分块设置" name="chunk">
|
|
|
<div class="tab-intro">配置文档分块参数,优化检索效果</div>
|
|
|
<div class="collapse-body">
|
|
|
+ <el-form-item label="分块策略">
|
|
|
+ <div class="slider-wrap">
|
|
|
+ <div class="field-tip mb-4px mt-0">
|
|
|
+ 选择文档的分块方式。自动模式会分析每个文档的结构并选择最佳策略。
|
|
|
+ </div>
|
|
|
+ <el-segmented
|
|
|
+ v-model="form.chunk_strategy"
|
|
|
+ class="selection-segmented"
|
|
|
+ :options="chunkStrategyOptions"
|
|
|
+ />
|
|
|
+ <!-- <el-select
|
|
|
+ v-model="form.chunk_strategy"
|
|
|
+ filterable
|
|
|
+ default-first-option
|
|
|
+ placeholder="请选择"
|
|
|
+ :options="chunkStrategyOptions"
|
|
|
+ /> -->
|
|
|
+ <div class="field-tip">
|
|
|
+ {{ chunkSrategyTip?.[form.chunk_strategy] }}
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </el-form-item>
|
|
|
<el-form-item label="分块大小">
|
|
|
<div class="slider-wrap">
|
|
|
<el-slider
|
|
|
@@ -317,6 +339,54 @@
|
|
|
</div>
|
|
|
</el-form-item>
|
|
|
</div>
|
|
|
+
|
|
|
+ <el-collapse class="chunk-advanced-collapse mt-12px">
|
|
|
+ <el-collapse-item title="高级" name="advanced">
|
|
|
+ <div class="chunk-advanced-panel">
|
|
|
+ <el-form-item label="Token 上限">
|
|
|
+ <div class="slider-wrap">
|
|
|
+ <el-input-number
|
|
|
+ v-model="form.token_limit"
|
|
|
+ :min="0"
|
|
|
+ :max="8192"
|
|
|
+ :step="1"
|
|
|
+ style="width: 100%"
|
|
|
+ />
|
|
|
+ <div class="field-tip">
|
|
|
+ 每个分块的硬性 Token 上限(0-8192)。0 = 关闭(仅按字符数)。当嵌入模型
|
|
|
+ Token 上限较小时启用:MiniLM (256 tok) 用 200,BGE/Cohere (512 tok) 用
|
|
|
+ 400。现代嵌入器(OpenAI、Voyage、Jina-v3)支持 >2000 tokens,保持 0
|
|
|
+ 即可。
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </el-form-item>
|
|
|
+
|
|
|
+ <el-form-item label="语言提示">
|
|
|
+ <div class="switch-wrap">
|
|
|
+ <el-select
|
|
|
+ v-model="form.languages"
|
|
|
+ multiple
|
|
|
+ clearable
|
|
|
+ collapse-tags
|
|
|
+ collapse-tags-tooltip
|
|
|
+ placeholder="选择语言提示"
|
|
|
+ >
|
|
|
+ <el-option
|
|
|
+ v-for="item in languageOptions"
|
|
|
+ :key="item.value"
|
|
|
+ :label="item.label"
|
|
|
+ :value="item.value"
|
|
|
+ />
|
|
|
+ </el-select>
|
|
|
+ <div class="field-tip">
|
|
|
+ 限制启发式模式只识别选定的语言(DE/EN/ZH)。留空 =
|
|
|
+ 自动检测。同质化语料库可显式设置以避免跨语言误匹配。
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </el-form-item>
|
|
|
+ </div>
|
|
|
+ </el-collapse-item>
|
|
|
+ </el-collapse>
|
|
|
</div>
|
|
|
</el-tab-pane>
|
|
|
|
|
|
@@ -427,6 +497,11 @@ const separatorOptions = [
|
|
|
]
|
|
|
const DEFAULT_SEPARATORS = separatorOptions.map((item) => item.value)
|
|
|
const parserEngineOptions = ['builtin', 'markitdown', 'simple']
|
|
|
+const languageOptions = [
|
|
|
+ { label: '中文', value: 'zh' },
|
|
|
+ { label: '英语', value: 'en' },
|
|
|
+ { label: '德语', value: 'de' }
|
|
|
+]
|
|
|
const storageProviderOptions = ref<{ label: string; value: string }[]>([])
|
|
|
const knowledgeTypeOptions = [
|
|
|
{ label: '文档', value: 'document' },
|
|
|
@@ -471,6 +546,7 @@ const createDefaultForm = (): KnowledgeBaseForm => ({
|
|
|
wiki_enabled: false,
|
|
|
question_generation_enabled: true,
|
|
|
question_count: 3,
|
|
|
+ chunk_strategy: 'auto',
|
|
|
chunk_size: 512,
|
|
|
chunk_overlap: 100,
|
|
|
separators: [...DEFAULT_SEPARATORS],
|
|
|
@@ -478,12 +554,42 @@ const createDefaultForm = (): KnowledgeBaseForm => ({
|
|
|
enable_parent_child: true,
|
|
|
parent_chunk_size: 4096,
|
|
|
child_chunk_size: 384,
|
|
|
+ token_limit: 0,
|
|
|
+ languages: [],
|
|
|
storage_provider: 'local',
|
|
|
wiki_extraction_granularity: 'standard',
|
|
|
wiki_max_pages_per_ingest: 0,
|
|
|
wiki_synthesis_model_id: ''
|
|
|
})
|
|
|
|
|
|
+const chunkStrategyOptions = [
|
|
|
+ {
|
|
|
+ label: '自动',
|
|
|
+ value: 'auto'
|
|
|
+ },
|
|
|
+ {
|
|
|
+ label: '按标题切分',
|
|
|
+ value: 'heading'
|
|
|
+ },
|
|
|
+ {
|
|
|
+ label: '结构感知',
|
|
|
+ value: 'heuristic'
|
|
|
+ },
|
|
|
+ {
|
|
|
+ label: '按长度切分',
|
|
|
+ value: 'legacy'
|
|
|
+ }
|
|
|
+]
|
|
|
+
|
|
|
+const chunkSrategyTip = {
|
|
|
+ auto: '文档分析器根据内容结构自动在「按标题切分」「结构感知」「按长度切分」之间选择。',
|
|
|
+ heading:
|
|
|
+ '在 Markdown 标题(#、##、###)边界处切分,每块自动带上所在标题路径。适合结构清晰的 Markdown 文档。',
|
|
|
+ heuristic:
|
|
|
+ '识别分页符、编号章节、多语言章节标记(DE/EN/ZH)、全大写标题等结构信号进行切分。适合没有 Markdown 标题的 PDF / 扫描件。',
|
|
|
+ legacy: '忽略结构,仅按字符数和分隔符递归切分——原始行为。当上述策略对你的内容效果不佳时使用。'
|
|
|
+}
|
|
|
+
|
|
|
const form = reactive<KnowledgeBaseForm>(createDefaultForm())
|
|
|
|
|
|
const embeddingModels = computed(() => modelList.value.filter((item) => item.type === 'Embedding'))
|
|
|
@@ -509,7 +615,7 @@ async function fetchModels() {
|
|
|
}
|
|
|
|
|
|
async function fetchStorageProviders() {
|
|
|
- const res = await storageProvider.postStorageProviderEngines({})
|
|
|
+ const res = await storageProvider.postAiStorageProviderEngines({})
|
|
|
if (res?.isSuccess) {
|
|
|
storageProviderOptions.value = (res.result || [])
|
|
|
.filter((item) => {
|
|
|
@@ -582,21 +688,6 @@ function buildModelLabel(item: KnowledgeModelOption) {
|
|
|
return title ? `${title} (${item.name})` : item.name
|
|
|
}
|
|
|
|
|
|
-function formatSeparatorLabel(value: string) {
|
|
|
- if (value === '\n\n') return '\\n\\n'
|
|
|
- if (value === '\n') return '\\n'
|
|
|
- if (value === ' ') return '[space]'
|
|
|
- return value
|
|
|
-}
|
|
|
-
|
|
|
-function toggleSeparator(value: string) {
|
|
|
- if (form.separators.includes(value)) {
|
|
|
- form.separators = form.separators.filter((item) => item !== value)
|
|
|
- return
|
|
|
- }
|
|
|
- form.separators = [...form.separators, value]
|
|
|
-}
|
|
|
-
|
|
|
function handleKnowledgeBaseTypeChange(type: KnowledgeBaseForm['type']) {
|
|
|
activeTab.value = 'basic'
|
|
|
if (type === 'faq') {
|
|
|
@@ -625,6 +716,8 @@ function handleKnowledgeBaseTypeChange(type: KnowledgeBaseForm['type']) {
|
|
|
form.enable_parent_child = true
|
|
|
form.parent_chunk_size = 4096
|
|
|
form.child_chunk_size = 384
|
|
|
+ form.token_limit = 0
|
|
|
+ form.languages = []
|
|
|
form.storage_provider = 'local'
|
|
|
form.wiki_extraction_granularity = 'standard'
|
|
|
form.wiki_max_pages_per_ingest = 0
|
|
|
@@ -650,10 +743,22 @@ function openCreateDrawer() {
|
|
|
async function openEditDrawer(id: string) {
|
|
|
editingId.value = id
|
|
|
resetForm()
|
|
|
- const res = await knowledge.postKnowledgeBaseInfo({ id })
|
|
|
+ const res = await knowledge.postAiKnowledgeBaseInfo({ id })
|
|
|
if (!res?.isSuccess) return
|
|
|
|
|
|
const detail = res.result
|
|
|
+ const chunkingConfig = (detail.chunking_config || {}) as {
|
|
|
+ strategy?: KnowledgeBaseForm['chunk_strategy']
|
|
|
+ chunk_size?: number
|
|
|
+ chunk_overlap?: number
|
|
|
+ separators?: string[]
|
|
|
+ parser_engine_rules?: ParserEngineRule[]
|
|
|
+ enable_parent_child?: boolean
|
|
|
+ parent_chunk_size?: number
|
|
|
+ child_chunk_size?: number
|
|
|
+ tokenLimit?: number
|
|
|
+ languages?: string[]
|
|
|
+ }
|
|
|
Object.assign(form, {
|
|
|
name: detail.name,
|
|
|
description: detail.description || '',
|
|
|
@@ -673,15 +778,18 @@ async function openEditDrawer(id: string) {
|
|
|
wiki_enabled: detail.indexing_strategy?.wiki_enabled ?? false,
|
|
|
question_generation_enabled: detail.question_generation_config?.enabled ?? true,
|
|
|
question_count: detail.question_generation_config?.question_count ?? 3,
|
|
|
- chunk_size: detail.chunking_config?.chunk_size ?? 512,
|
|
|
- chunk_overlap: detail.chunking_config?.chunk_overlap ?? 100,
|
|
|
- separators: detail.chunking_config?.separators?.length
|
|
|
- ? [...detail.chunking_config.separators]
|
|
|
+ chunk_strategy: chunkingConfig.strategy ?? 'auto',
|
|
|
+ chunk_size: chunkingConfig.chunk_size ?? 512,
|
|
|
+ chunk_overlap: chunkingConfig.chunk_overlap ?? 100,
|
|
|
+ separators: chunkingConfig.separators?.length
|
|
|
+ ? [...chunkingConfig.separators]
|
|
|
: [...DEFAULT_SEPARATORS],
|
|
|
- parser_engine_rules: cloneParserRules(detail.chunking_config?.parser_engine_rules),
|
|
|
- enable_parent_child: detail.chunking_config?.enable_parent_child ?? true,
|
|
|
- parent_chunk_size: detail.chunking_config?.parent_chunk_size ?? 4096,
|
|
|
- child_chunk_size: detail.chunking_config?.child_chunk_size ?? 384,
|
|
|
+ parser_engine_rules: cloneParserRules(chunkingConfig.parser_engine_rules),
|
|
|
+ enable_parent_child: chunkingConfig.enable_parent_child ?? true,
|
|
|
+ parent_chunk_size: chunkingConfig.parent_chunk_size ?? 4096,
|
|
|
+ child_chunk_size: chunkingConfig.child_chunk_size ?? 384,
|
|
|
+ token_limit: chunkingConfig.tokenLimit ?? 0,
|
|
|
+ languages: chunkingConfig.languages?.length ? [...chunkingConfig.languages] : [],
|
|
|
storage_provider:
|
|
|
detail.storage_provider_config?.provider || detail.storage_config?.provider || 'local',
|
|
|
wiki_extraction_granularity: detail.wiki_config?.extraction_granularity || 'standard',
|
|
|
@@ -720,13 +828,16 @@ function buildCommonPayload() {
|
|
|
: form.summary_model_id
|
|
|
},
|
|
|
chunking_config: {
|
|
|
+ strategy: form.chunk_strategy,
|
|
|
chunk_size: form.chunk_size,
|
|
|
chunk_overlap: form.chunk_overlap,
|
|
|
separators: form.separators.length ? form.separators : [...DEFAULT_SEPARATORS],
|
|
|
parser_engine_rules: cloneParserRules(form.parser_engine_rules),
|
|
|
enable_parent_child: form.enable_parent_child,
|
|
|
parent_chunk_size: form.parent_chunk_size,
|
|
|
- child_chunk_size: form.child_chunk_size
|
|
|
+ child_chunk_size: form.child_chunk_size,
|
|
|
+ tokenLimit: form.token_limit,
|
|
|
+ languages: [...form.languages]
|
|
|
},
|
|
|
vlm_config: {
|
|
|
model_id: vlmEnabled ? form.vlm_model_id : '',
|
|
|
@@ -785,13 +896,13 @@ async function submitForm() {
|
|
|
try {
|
|
|
const payload = buildPayload()
|
|
|
if (editingId.value) {
|
|
|
- await knowledge.postKnowledgeBaseUpdate({
|
|
|
+ await knowledge.postAiKnowledgeBaseUpdate({
|
|
|
id: editingId.value,
|
|
|
...(payload as any)
|
|
|
} as any)
|
|
|
ElMessage.success('知识库已更新')
|
|
|
} else {
|
|
|
- await knowledge.postKnowledgeBaseCreate(payload as any)
|
|
|
+ await knowledge.postAiKnowledgeBaseCreate(payload as any)
|
|
|
ElMessage.success('知识库已创建')
|
|
|
}
|
|
|
drawerVisible.value = false
|
|
|
@@ -979,6 +1090,14 @@ onMounted(async () => {
|
|
|
color: var(--agent-text-soft);
|
|
|
}
|
|
|
|
|
|
+ .chunk-advanced-collapse {
|
|
|
+ margin-left: 120px;
|
|
|
+ }
|
|
|
+
|
|
|
+ .chunk-advanced-panel {
|
|
|
+ padding-top: 12px;
|
|
|
+ }
|
|
|
+
|
|
|
.parser-rule-list {
|
|
|
display: grid;
|
|
|
gap: 12px;
|