This commit is contained in:
2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
'use client'
import React from 'react'
import cn from '@/utils/classnames'
import Checkbox from '@/app/components/base/checkbox'
import Tooltip from '@/app/components/base/tooltip'
type CheckboxWithLabelProps = {
className?: string
isChecked: boolean
onChange: (isChecked: boolean) => void
label: string
labelClassName?: string
tooltip?: string
}
const CheckboxWithLabel = ({
className = '',
isChecked,
onChange,
label,
labelClassName,
tooltip,
}: CheckboxWithLabelProps) => {
return (
<label className={cn('flex items-center space-x-2', className)}>
<Checkbox checked={isChecked} onCheck={() => onChange(!isChecked)} />
<div className={cn('system-sm-medium text-text-secondary', labelClassName)}>{label}</div>
{tooltip && (
<Tooltip
popupContent={
<div className='w-[200px]'>{tooltip}</div>
}
triggerClassName='ml-0.5 w-4 h-4'
/>
)}
</label>
)
}
export default React.memo(CheckboxWithLabel)

View File

@@ -0,0 +1,80 @@
'use client'
import React, { useCallback } from 'react'
import cn from '@/utils/classnames'
import type { CrawlResultItem as CrawlResultItemType } from '@/models/datasets'
import Checkbox from '@/app/components/base/checkbox'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
import Radio from '@/app/components/base/radio/ui'
type CrawledResultItemProps = {
payload: CrawlResultItemType
isChecked: boolean
onCheckChange: (checked: boolean) => void
isPreview: boolean
showPreview: boolean
onPreview: () => void
isMultipleChoice?: boolean
}
const CrawledResultItem = ({
payload,
isChecked,
onCheckChange,
isPreview,
onPreview,
showPreview,
isMultipleChoice = true,
}: CrawledResultItemProps) => {
const { t } = useTranslation()
const handleCheckChange = useCallback(() => {
onCheckChange(!isChecked)
}, [isChecked, onCheckChange])
return (
<div className={cn(
'relative flex cursor-pointer gap-x-2 rounded-lg p-2',
isPreview ? 'bg-state-base-active' : 'group hover:bg-state-base-hover',
)}>
{
isMultipleChoice ? (
<Checkbox
className='shrink-0'
checked={isChecked}
onCheck={handleCheckChange}
/>
) : (
<Radio
isChecked={isChecked}
onCheck={handleCheckChange}
/>
)
}
<div className='flex min-w-0 grow flex-col gap-y-0.5'>
<div
className='system-sm-medium truncate text-text-secondary'
title={payload.title}
>
{payload.title}
</div>
<div
className='system-xs-regular truncate text-text-tertiary'
title={payload.source_url}
>
{payload.source_url}
</div>
</div>
{showPreview && (
<Button
size='small'
onClick={onPreview}
className='system-xs-medium-uppercase right-2 top-2 hidden px-1.5 group-hover:absolute group-hover:block'
>
{t('datasetCreation.stepOne.website.preview')}
</Button>
)}
</div>
)
}
export default React.memo(CrawledResultItem)

View File

@@ -0,0 +1,98 @@
'use client'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import cn from '@/utils/classnames'
import type { CrawlResultItem } from '@/models/datasets'
import CheckboxWithLabel from './checkbox-with-label'
import CrawledResultItem from './crawled-result-item'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type CrawledResultProps = {
className?: string
previewIndex?: number
list: CrawlResultItem[]
checkedList: CrawlResultItem[]
onSelectedChange: (selected: CrawlResultItem[]) => void
onPreview?: (payload: CrawlResultItem, index: number) => void
showPreview?: boolean
usedTime: number
isMultipleChoice?: boolean
}
const CrawledResult = ({
className = '',
previewIndex,
list,
checkedList,
onSelectedChange,
usedTime,
onPreview,
showPreview = false,
isMultipleChoice = true,
}: CrawledResultProps) => {
const { t } = useTranslation()
const isCheckAll = checkedList.length === list.length
const handleCheckedAll = useCallback(() => {
if (!isCheckAll)
onSelectedChange(list)
else
onSelectedChange([])
}, [isCheckAll, list, onSelectedChange])
const handleItemCheckChange = useCallback((item: CrawlResultItem) => {
return (checked: boolean) => {
if (checked) {
if (isMultipleChoice)
onSelectedChange([...checkedList, item])
else
onSelectedChange([item])
}
else { onSelectedChange(checkedList.filter(checkedItem => checkedItem.source_url !== item.source_url)) }
}
}, [checkedList, onSelectedChange, isMultipleChoice])
const handlePreview = useCallback((index: number) => {
if (!onPreview) return
onPreview(list[index], index)
}, [list, onPreview])
return (
<div className={cn('flex flex-col gap-y-2', className)}>
<div className='system-sm-medium pt-2 text-text-primary'>
{t(`${I18N_PREFIX}.scrapTimeInfo`, {
total: list.length,
time: usedTime.toFixed(1),
})}
</div>
<div className='overflow-hidden rounded-xl border border-components-panel-border bg-components-panel-bg'>
{isMultipleChoice && (
<div className='flex items-center px-4 py-2'>
<CheckboxWithLabel
isChecked={isCheckAll}
onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
/>
</div>
)}
<div className='flex flex-col gap-y-px border-t border-divider-subtle bg-background-default-subtle p-2'>
{list.map((item, index) => (
<CrawledResultItem
key={item.source_url}
payload={item}
isChecked={checkedList.some(checkedItem => checkedItem.source_url === item.source_url)}
onCheckChange={handleItemCheckChange(item)}
isPreview={index === previewIndex}
onPreview={handlePreview.bind(null, index)}
showPreview={showPreview}
isMultipleChoice={isMultipleChoice}
/>
))}
</div>
</div>
</div>
)
}
export default React.memo(CrawledResult)

View File

@@ -0,0 +1,89 @@
'use client'
import React from 'react'
import { useTranslation } from 'react-i18next'
import cn from '@/utils/classnames'
type CrawlingProps = {
className?: string
crawledNum: number
totalNum: number
}
type BlockProps = {
className?: string
}
type ItemProps = {
firstLineWidth: string
secondLineWidth: string
}
const Block = React.memo(({
className,
}: BlockProps) => {
return <div className={cn('bg-text-quaternary opacity-20', className)} />
})
const Item = React.memo(({
firstLineWidth,
secondLineWidth,
}: ItemProps) => {
return (
<div className='flex gap-x-2 px-2 py-[5px]'>
<div className='py-0.5'>
<Block className='size-4 rounded-[4px]' />
</div>
<div className='flex grow flex-col'>
<div className='flex h-5 w-full items-center'>
<Block className={cn('h-2.5 rounded-sm', firstLineWidth)} />
</div>
<div className='flex h-[18px] w-full items-center'>
<Block className={cn('h-1.5 rounded-sm', secondLineWidth)} />
</div>
</div>
</div>
)
})
const Crawling = ({
className = '',
crawledNum,
totalNum,
}: CrawlingProps) => {
const { t } = useTranslation()
const itemsConfig = [{
firstLineWidth: 'w-[35%]',
secondLineWidth: 'w-[50%]',
}, {
firstLineWidth: 'w-[40%]',
secondLineWidth: 'w-[45%]',
}, {
firstLineWidth: 'w-[30%]',
secondLineWidth: 'w-[36%]',
}]
return (
<div className={cn('mt-2 flex flex-col gap-y-2 pt-2', className)}>
<div className='system-sm-medium text-text-primary'>
{t('datasetCreation.stepOne.website.totalPageScraped')} {crawledNum}/{totalNum}
</div>
<div className='overflow-hidden rounded-xl border border-components-panel-border bg-components-panel-bg'>
<div className='flex items-center gap-x-2 px-4 py-2'>
<Block className='size-4 rounded-[4px]' />
<Block className='h-2.5 w-14 rounded-sm' />
</div>
<div className='flex flex-col gap-px border-t border-divider-subtle bg-background-default-subtle p-2'>
{itemsConfig.map((item, index) => (
<Item
key={index}
firstLineWidth={item.firstLineWidth}
secondLineWidth={item.secondLineWidth}
/>
))}
</div>
</div>
</div>
)
}
export default React.memo(Crawling)

View File

@@ -0,0 +1,34 @@
import React from 'react'
import cn from '@/utils/classnames'
import { RiErrorWarningFill } from '@remixicon/react'
type ErrorMessageProps = {
className?: string
title: string
errorMsg?: string
}
const ErrorMessage = ({
className,
title,
errorMsg,
}: ErrorMessageProps) => {
return (
// eslint-disable-next-line tailwindcss/migration-from-tailwind-2
<div className={cn(
'flex gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border bg-opacity-40 bg-toast-error-bg p-2 shadow-xs shadow-shadow-shadow-3',
className,
)}>
<div className='flex size-6 items-center justify-center'>
<RiErrorWarningFill className='h-4 w-4 text-text-destructive' />
</div>
<div className='flex flex-col gap-y-0.5 py-1'>
<div className='system-xs-medium text-text-primary'>{title}</div>
{errorMsg && (
<div className='system-xs-regular text-text-secondary'>{errorMsg}</div>
)}
</div>
</div>
)
}
export default React.memo(ErrorMessage)

View File

@@ -0,0 +1,123 @@
import Button from '@/app/components/base/button'
import { useAppForm } from '@/app/components/base/form'
import BaseField from '@/app/components/base/form/form-scenarios/base/field'
import { ArrowDownRoundFill } from '@/app/components/base/icons/src/vender/solid/general'
import cn from '@/utils/classnames'
import { RiPlayLargeLine } from '@remixicon/react'
import { useBoolean } from 'ahooks'
import { useEffect, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Toast from '@/app/components/base/toast'
import type { RAGPipelineVariables } from '@/models/pipeline'
import { useConfigurations, useInitialData } from '@/app/components/rag-pipeline/hooks/use-input-fields'
import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
import { CrawlStep } from '@/models/datasets'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
type OptionsProps = {
variables: RAGPipelineVariables
step: CrawlStep
runDisabled?: boolean
onSubmit: (data: Record<string, any>) => void
}
const Options = ({
variables,
step,
runDisabled,
onSubmit,
}: OptionsProps) => {
const { t } = useTranslation()
const initialData = useInitialData(variables)
const configurations = useConfigurations(variables)
const schema = useMemo(() => {
return generateZodSchema(configurations)
}, [configurations])
const form = useAppForm({
defaultValues: initialData,
validators: {
onSubmit: ({ value }) => {
const result = schema.safeParse(value)
if (!result.success) {
const issues = result.error.issues
const firstIssue = issues[0]
const errorMessage = `"${firstIssue.path.join('.')}" ${firstIssue.message}`
Toast.notify({
type: 'error',
message: errorMessage,
})
return errorMessage
}
return undefined
},
},
onSubmit: ({ value }) => {
onSubmit(value)
},
})
const [fold, {
toggle: foldToggle,
setTrue: foldHide,
setFalse: foldShow,
}] = useBoolean(false)
useEffect(() => {
// When the step change
if (step !== CrawlStep.init)
foldHide()
else
foldShow()
}, [step])
const isRunning = useMemo(() => step === CrawlStep.running, [step])
return (
<form
className='w-full'
onSubmit={(e) => {
e.preventDefault()
e.stopPropagation()
form.handleSubmit()
}}
>
<div className='flex items-center gap-x-1 px-4 py-2'>
<div
className='flex grow cursor-pointer select-none items-center gap-x-0.5'
onClick={foldToggle}
>
<span className='system-sm-semibold-uppercase text-text-secondary'>
{t(`${I18N_PREFIX}.options`)}
</span>
<ArrowDownRoundFill className={cn('h-4 w-4 shrink-0 text-text-quaternary', fold && '-rotate-90')} />
</div>
<Button
variant='primary'
onClick={form.handleSubmit}
disabled={runDisabled || isRunning}
loading={isRunning}
className='shrink-0 gap-x-0.5'
spinnerClassName='!ml-0'
>
<RiPlayLargeLine className='size-4' />
<span className='px-0.5'>{!isRunning ? t(`${I18N_PREFIX}.run`) : t(`${I18N_PREFIX}.running`)}</span>
</Button>
</div>
{!fold && (
<div className='flex flex-col gap-3 border-t border-divider-subtle px-4 py-3'>
{configurations.map((config, index) => {
const FieldComponent = BaseField({
initialData,
config,
})
return <FieldComponent key={index} form={form} />
})}
</div>
)}
</form>
)
}
export default Options

View File

@@ -0,0 +1,206 @@
'use client'
import React, { useCallback, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import type { CrawlResultItem } from '@/models/datasets'
import { CrawlStep } from '@/models/datasets'
import Header from '../base/header'
import Options from './base/options'
import Crawling from './base/crawling'
import ErrorMessage from './base/error-message'
import CrawledResult from './base/crawled-result'
import {
useDraftPipelinePreProcessingParams,
usePublishedPipelinePreProcessingParams,
} from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DatasourceType } from '@/models/pipeline'
import { ssePost } from '@/service/base'
import type {
DataSourceNodeCompletedResponse,
DataSourceNodeErrorResponse,
DataSourceNodeProcessingResponse,
} from '@/types/pipeline'
import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
import { useDataSourceStore, useDataSourceStoreWithSelector } from '../store'
import { useShallow } from 'zustand/react/shallow'
import { useModalContextSelector } from '@/context/modal-context'
import { useGetDataSourceAuth } from '@/service/use-datasource'
import { useDocLink } from '@/context/i18n'
import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
export type WebsiteCrawlProps = {
nodeId: string
nodeData: DataSourceNodeType
isInPipeline?: boolean
onCredentialChange: (credentialId: string) => void
}
const WebsiteCrawl = ({
nodeId,
nodeData,
isInPipeline = false,
onCredentialChange,
}: WebsiteCrawlProps) => {
const { t } = useTranslation()
const docLink = useDocLink()
const [totalNum, setTotalNum] = useState(0)
const [crawledNum, setCrawledNum] = useState(0)
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const setShowAccountSettingModal = useModalContextSelector(s => s.setShowAccountSettingModal)
const {
crawlResult,
step,
checkedCrawlResult,
previewIndex,
currentCredentialId,
} = useDataSourceStoreWithSelector(useShallow(state => ({
crawlResult: state.crawlResult,
step: state.step,
checkedCrawlResult: state.websitePages,
previewIndex: state.previewIndex,
currentCredentialId: state.currentCredentialId,
})))
const { data: dataSourceAuth } = useGetDataSourceAuth({
pluginId: nodeData.plugin_id,
provider: nodeData.provider_name,
})
const dataSourceStore = useDataSourceStore()
const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams)
const { data: paramsConfig, isFetching: isFetchingParams } = usePreProcessingParams.current({
pipeline_id: pipelineId!,
node_id: nodeId,
}, !!pipelineId && !!nodeId)
const isInit = step === CrawlStep.init
const isCrawlFinished = step === CrawlStep.finished
const isRunning = step === CrawlStep.running
const showError = isCrawlFinished && crawlErrorMessage
const datasourceNodeRunURL = !isInPipeline
? `/rag/pipelines/${pipelineId}/workflows/published/datasource/nodes/${nodeId}/run`
: `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
const handleCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => {
const { setWebsitePages } = dataSourceStore.getState()
setWebsitePages(checkedCrawlResult)
}, [dataSourceStore])
const handlePreview = useCallback((website: CrawlResultItem, index: number) => {
const { setCurrentWebsite, setPreviewIndex } = dataSourceStore.getState()
setCurrentWebsite(website)
setPreviewIndex(index)
}, [dataSourceStore])
const handleRun = useCallback(async (value: Record<string, any>) => {
const { setStep, setCrawlResult, currentCredentialId } = dataSourceStore.getState()
setStep(CrawlStep.running)
ssePost(
datasourceNodeRunURL,
{
body: {
inputs: value,
datasource_type: DatasourceType.websiteCrawl,
credential_id: currentCredentialId,
response_mode: 'streaming',
},
},
{
onDataSourceNodeProcessing: (data: DataSourceNodeProcessingResponse) => {
setTotalNum(data.total ?? 0)
setCrawledNum(data.completed ?? 0)
},
onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => {
const { data: crawlData, time_consuming } = data
const crawlResultData = {
data: crawlData as CrawlResultItem[],
time_consuming: time_consuming ?? 0,
}
setCrawlResult(crawlResultData)
handleCheckedCrawlResultChange(isInPipeline ? [crawlData[0]] : crawlData) // default select the crawl result
setCrawlErrorMessage('')
setStep(CrawlStep.finished)
},
onDataSourceNodeError: (error: DataSourceNodeErrorResponse) => {
setCrawlErrorMessage(error.error || t(`${I18N_PREFIX}.unknownError`))
setStep(CrawlStep.finished)
},
},
)
}, [dataSourceStore, datasourceNodeRunURL, handleCheckedCrawlResultChange, isInPipeline, t])
const handleSubmit = useCallback((value: Record<string, any>) => {
handleRun(value)
}, [handleRun])
const handleSetting = useCallback(() => {
setShowAccountSettingModal({
payload: ACCOUNT_SETTING_TAB.DATA_SOURCE,
})
}, [setShowAccountSettingModal])
const handleCredentialChange = useCallback((credentialId: string) => {
setCrawledNum(0)
setTotalNum(0)
setCrawlErrorMessage('')
onCredentialChange(credentialId)
}, [dataSourceStore, onCredentialChange])
return (
<div className='flex flex-col'>
<Header
docTitle='Docs'
docLink={docLink('/guides/knowledge-base/knowledge-pipeline/authorize-data-source')}
onClickConfiguration={handleSetting}
pluginName={nodeData.datasource_label}
currentCredentialId={currentCredentialId}
onCredentialChange={handleCredentialChange}
credentials={dataSourceAuth?.result || []}
/>
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
<Options
variables={paramsConfig?.variables || []}
step={step}
runDisabled={!currentCredentialId || isFetchingParams}
onSubmit={handleSubmit}
/>
</div>
{!isInit && (
<div className='relative flex flex-col'>
{isRunning && (
<Crawling
crawledNum={crawledNum}
totalNum={totalNum}
/>
)}
{showError && (
<ErrorMessage
className='mt-2'
title={t(`${I18N_PREFIX}.exceptionErrorTitle`)}
errorMsg={crawlErrorMessage}
/>
)}
{isCrawlFinished && !showError && (
<CrawledResult
className='mt-2'
list={crawlResult?.data || []}
checkedList={checkedCrawlResult}
onSelectedChange={handleCheckedCrawlResultChange}
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
previewIndex={previewIndex}
onPreview={handlePreview}
showPreview={!isInPipeline}
isMultipleChoice={!isInPipeline} // only support single choice in test run
/>
)}
</div>
)}
</div>
)
}
export default React.memo(WebsiteCrawl)