add help info (#142)

This commit is contained in:
KevinHuSh 2024-03-22 15:35:06 +08:00 committed by GitHub
parent 73c2f4d418
commit 1edbd36baf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 131 additions and 84 deletions

View File

@ -15,7 +15,10 @@ const SimilaritySlider = ({ isTooltipShown = false }: IProps) => {
<Form.Item<FieldType>
label="Similarity threshold"
name={'similarity_threshold'}
tooltip={isTooltipShown && 'coming soon'}
tooltip={isTooltipShown && `We use hybrid similarity score to evaluate distance between two lines of text.
It\'s weighted keywords similarity and vector cosine similarity.
If the similarity between query and chunk is less than this threshold, the chunk will be filtered out.`
}
initialValue={0.2}
>
<Slider max={1} step={0.01} />
@ -24,7 +27,10 @@ const SimilaritySlider = ({ isTooltipShown = false }: IProps) => {
label="Vector similarity weight"
name={'vector_similarity_weight'}
initialValue={0.3}
tooltip={isTooltipShown && 'coming soon'}
tooltip={isTooltipShown && `We use hybrid similarity score to evaluate distance between two lines of text.
It\'s weighted keywords similarity and vector cosine similarity.
The sum of both weights is 1.0.
`}
>
<Slider max={1} step={0.01} />
</Form.Item>

View File

@ -33,16 +33,16 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
{imageList.length > 0 ? (
<>
<Title level={5} className={styles.topTitle}>
{item.title} Category
"{item.title}" Chunking Method Description
</Title>
<p
dangerouslySetInnerHTML={{
__html: item.description,
}}
></p>
<Title level={5}>{item.title} Image Examples</Title>
<Title level={5}>"{item.title}" Examples</Title>
<Text>
We've prepared detailed visual guides to make understanding easier
This visual guides is in order to make understanding easier
for you.
</Text>
<Row gutter={[10, 10]} className={styles.imageRow}>

View File

@ -83,7 +83,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
<Form.Item
name="permission"
label="Permissions"
tooltip="coming soon"
tooltip="If the permission is 'Team', all the team member can manipulate the knowledgebase."
rules={[{ required: true }]}
>
<Radio.Group>
@ -93,22 +93,22 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
</Form.Item>
<Form.Item
name="embd_id"
label="Embedding Model"
label="Embedding model"
rules={[{ required: true }]}
tooltip="xx"
tooltip="The embedding model used to embedding chunks. It's unchangable once the knowledgebase has chunks. You need to delete all the chunks if you want to change it."
>
<Select
placeholder="Please select a country"
placeholder="Please select a embedding model"
options={embeddingModelOptions}
></Select>
</Form.Item>
<Form.Item
name="parser_id"
label="Chunk method"
tooltip="xx"
tooltip="The instruction is at right."
rules={[{ required: true }]}
>
<Select placeholder="Please select a country">
<Select placeholder="Please select a chunk method">
{parserList.map((x) => (
<Option value={x.value} key={x.value}>
{x.label}
@ -122,7 +122,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
if (parserId === 'naive') {
return (
<Form.Item label="Max token number" tooltip="xxx">
<Form.Item label="Token number" tooltip="It determine the token number of a chunk approximately.">
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item

View File

@ -7,78 +7,117 @@ export const ImageMap = {
book: getImageName('book', 4),
laws: getImageName('law', 4),
manual: getImageName('manual', 4),
media: getImageName('media', 2),
picture: getImageName('picture', 2),
naive: getImageName('naive', 2),
paper: getImageName('paper', 2),
presentation: getImageName('presentation', 2),
qa: getImageName('qa', 2),
resume: getImageName('resume', 2),
table: getImageName('table', 2),
one: getImageName('one', 2),
};
export const TextMap = {
book: {
title: '',
description: `Supported file formats are docx, excel, pdf, txt.
description: `<p>Supported file formats are <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.</p><p>
Since a book is long and not all the parts are useful, if it's a PDF,
please setup the page ranges for every book in order eliminate negative effects and save computing time for analyzing.`,
please setup the <i>page ranges</i> for every book in order eliminate negative effects and save computing time for analyzing.</p>`,
},
laws: {
title: '',
description: `Supported file formats are docx, pdf, txt.`,
description: `<p>Supported file formats are <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.</p><p>
Legal documents have a very rigorous writing format. We use text feature to detect split point.
</p><p>
The chunk granularity is consistent with 'ARTICLE', and all the upper level text will be included in the chunk.
</p>`,
},
manual: { title: '', description: `Only pdf is supported.` },
media: { title: '', description: '' },
manual: { title: '', description: `<p>Only <b>PDF</b> is supported.</p><p>
We assume manual has hierarchical section structure. We use the lowest section titles as pivots to slice documents.
So, the figures and tables in the same section will not be sliced apart, and chunk size might be large.
</p>` },
naive: {
title: '',
description: `Supported file formats are docx, pdf, txt.
This method apply the naive ways to chunk files.
Successive text will be sliced into pieces using 'delimiter'.
Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'.`,
description: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT</b>.</p>
<p>This method apply the naive ways to chunk files: </p>
<p>
<li>Successive text will be sliced into pieces using vision detection model.</li>
<li>Next, these successive pieces are merge into chunks whose token number is no more than 'Token number'.</li></p>`,
},
paper: {
title: '',
description: `Only pdf is supported.
The special part is that, the abstract of the paper will be sliced as an entire chunk, and will not be sliced partly.`,
description: `<p>Only <b>PDF</b> file is supported.</p><p>
If our model works well, the paper will be sliced by it's sections, like <i>abstract, 1.1, 1.2</i>, etc. </p><p>
The benefit of doing this is that LLM can better summarize the content of relevant sections in the paper,
resulting in more comprehensive answers that help readers better understand the paper.
The downside is that it increases the context of the LLM conversation and adds computational cost,
so during the conversation, you can consider reducing the <b>topN</b> setting.</p>`,
},
presentation: {
title: '',
description: `The supported file formats are pdf, pptx.
Every page will be treated as a chunk. And the thumbnail of every page will be stored.
PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary.`,
description: `<p>The supported file formats are <b>PDF</b>, <b>PPTX</b>.</p><p>
Every page will be treated as a chunk. And the thumbnail of every page will be stored.</p><p>
<i>All the PPT files you uploaded will be chunked by using this method automatically, setting-up for every PPT file is not necessary.</i></p>`,
},
qa: {
title: '',
description: `Excel and csv(txt) format files are supported.
If the file is in excel format, there should be 2 column question and answer without header.
description: `<p><b>EXCEL</b> and <b>CSV/TXT</b> files are supported.</p><p>
If the file is in excel format, there should be 2 columns question and answer without header.
And question column is ahead of answer column.
And it's O.K if it has multiple sheets as long as the columns are rightly composed.
And it's O.K if it has multiple sheets as long as the columns are rightly composed.</p><p>
If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer.
If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer.</p><p>
All the deformed lines will be ignored.
Every pair of Q&A will be treated as a chunk.`,
<i>All the deformed lines will be ignored.
Every pair of Q&A will be treated as a chunk.</i></p>`,
},
resume: {
title: '',
description: `The supported file formats are pdf, docx and txt.`,
description: `<p>The supported file formats are <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.
</p><p>
The résumé comes in a variety of formats, just like a persons personality, but we often have to organize them into structured data that makes it easy to search.
</p><p>
Instead of chunking the résumé, we parse the résumé into structured data. As a HR, you can dump all the résumé you have,
the you can list all the candidates that match the qualifications just by talk with <i>'RagFlow'</i>.
</p>
`,
},
table: {
title: '',
description: `Excel and csv(txt) format files are supported.
For csv or txt file, the delimiter between columns is TAB.
The first line must be column headers.
Column headers must be meaningful terms inorder to make our NLP model understanding.
It's good to enumerate some synonyms using slash '/' to separate, and even better to
enumerate values using brackets like 'gender/sex(male, female)'.
Here are some examples for headers:
1. supplier/vendor\tcolor(yellow, red, brown)\tgender/sex(male, female)\tsize(M,L,XL,XXL)
2. /\t电话//\t最高学历MPAMBAEMBA
Every row in table will be treated as a chunk.
visual:
Image files are supported. Video is comming soon.
If the picture has text in it, OCR is applied to extract the text as a description of it.
If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.`,
description: `<p><b>EXCEL</b> and <b>CSV/TXT</b> format files are supported.</p><p>
Here're some tips:
<ul>
<li>For csv or txt file, the delimiter between columns is <em><b>TAB</b></em>.</li>
<li>The first line must be column headers.</li>
<li>Column headers must be meaningful terms in order to make our LLM understanding.
It's good to enumerate some synonyms using slash <i>'/'</i> to separate, and even better to
enumerate values using brackets like <i>'gender/sex(male, female)'</i>.<p>
Here are some examples for headers:<ol>
<li>supplier/vendor<b>'TAB'</b>color(yellow, red, brown)<b>'TAB'</b>gender/sex(male, female)<b>'TAB'</b>size(M,L,XL,XXL)</li>
<li>/<b>'TAB'</b>//<b>'TAB'</b>MPAMBAEMBA</li>
</ol>
</p>
</li>
<li>Every row in table will be treated as a chunk.</li>
</ul>`,
},
picture: {
title: '',
description: `
<p>Image files are supported. Video is coming soon.</p><p>
If the picture has text in it, OCR is applied to extract the text as its text description.
</p><p>
If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.
</p>`,
},
one: {
title: '',
description: `
<p>Supported file formats are <b>DOCX, EXCEL, PDF, TXT</b>.
</p><p>
For a document, it will be treated as an entire chunk, no split at all.
</p><p>
If you don't trust any chunk method and the selected LLM's context length covers the document length, you can try this method.
</p>`,
},
};

View File

@ -53,9 +53,10 @@ const TestingControl = ({ form, handleTesting }: IProps) => {
>
<SimilaritySlider isTooltipShown></SimilaritySlider>
<Form.Item<FieldType>
label="Top k"
label="Top K"
name={'top_k'}
tooltip="coming soon"
tooltip="For the computaion cost, not all the retrieved chunk will be computed vector cosine similarity with query.
The bigger the 'Top K' is, the higher the recall rate is, the slower the retrieval speed is."
>
<Slider marks={{ 0: 0, 2048: 2048 }} max={2048} />
</Form.Item>

View File

@ -55,6 +55,7 @@ const AssistantSetting = ({ show }: ISegmentedContentProps) => {
label="Language"
initialValue={'Chinese'}
tooltip="coming soon"
style={{display:'none'}}
>
<Select
options={[
@ -66,22 +67,23 @@ const AssistantSetting = ({ show }: ISegmentedContentProps) => {
<Form.Item
name={['prompt_config', 'empty_response']}
label="Empty response"
tooltip="coming soon"
tooltip="If nothing is retrieved with user's question in the knowledgebase, it will use this as an answer.
If you want LLM comes up with its own opinion when nothing is retrieved, leave this blank."
>
<Input placeholder="" />
</Form.Item>
<Form.Item
name={['prompt_config', 'prologue']}
label="Set an opener"
tooltip="coming soon"
tooltip="How do you want to welcome your clients?"
initialValue={"Hi! I'm your assistant, what can I do for you?"}
>
<Input.TextArea autoSize={{ minRows: 5 }} />
</Form.Item>
<Form.Item
label="Select one context"
label="Knowledgebases"
name="kb_ids"
tooltip="coming soon"
tooltip="Select knowledgebases associated."
rules={[
{
required: true,

View File

@ -46,16 +46,16 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
<Form.Item
label="Model"
name="llm_id"
tooltip="coming soon"
tooltip="Large language chat model"
rules={[{ required: true, message: 'Please select!' }]}
>
<Select options={modelOptions} showSearch />
</Form.Item>
<Divider></Divider>
<Form.Item
label="Parameters"
label="Freedom"
name="parameters"
tooltip="coming soon"
tooltip="'Precise' means the LLM will be conservative and answer your question cautiously. 'Improvise' means the you want LLM talk much and freely. 'Balance' is between cautiously and freely."
initialValue={ModelVariableType.Precise}
// rules={[{ required: true, message: 'Please input!' }]}
>
@ -64,7 +64,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
onChange={handleParametersChange}
/>
</Form.Item>
<Form.Item label="Temperature" tooltip={'xx'}>
<Form.Item label="Temperature" tooltip={'This parameter controls the randomness of predictions by the model. A lower temperature makes the model more confident in its responses, while a higher temperature makes it more creative and diverse.'}>
<Flex gap={20} align="center">
<Form.Item
name={'temperatureEnabled'}
@ -96,7 +96,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
</Form.Item>
</Flex>
</Form.Item>
<Form.Item label="Top P" tooltip={'xx'}>
<Form.Item label="Top P" tooltip={'Also known as “nucleus sampling,” this parameter sets a threshold to select a smaller set of words to sample from. It focuses on the most likely words, cutting off the less probable ones.'}>
<Flex gap={20} align="center">
<Form.Item name={'topPEnabled'} valuePropName="checked" noStyle>
<Switch size="small" />
@ -124,7 +124,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
</Form.Item>
</Flex>
</Form.Item>
<Form.Item label="Presence Penalty" tooltip={'xx'}>
<Form.Item label="Presence Penalty" tooltip={'This discourages the model from repeating the same information by penalizing words that have already appeared in the conversation.'}>
<Flex gap={20} align="center">
<Form.Item
name={'presencePenaltyEnabled'}
@ -160,7 +160,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
</Form.Item>
</Flex>
</Form.Item>
<Form.Item label="Frequency Penalty" tooltip={'xx'}>
<Form.Item label="Frequency Penalty" tooltip={'Similar to the presence penalty, this reduces the models tendency to repeat the same words frequently.'}>
<Flex gap={20} align="center">
<Form.Item
name={'frequencyPenaltyEnabled'}
@ -196,7 +196,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
</Form.Item>
</Flex>
</Form.Item>
<Form.Item label="Max Tokens" tooltip={'xx'}>
<Form.Item label="Max Tokens" tooltip={'This sets the maximum length of the models output, measured in the number of tokens (words or pieces of words).'}>
<Flex gap={20} align="center">
<Form.Item name={'maxTokensEnabled'} valuePropName="checked" noStyle>
<Switch size="small" />

View File

@ -154,7 +154,7 @@ const PromptEngine = (
<Form.Item
label="System"
rules={[{ required: true, message: 'Please input!' }]}
tooltip="coming soon"
tooltip="Instructions you need LLM to follow when LLM answers questions, like charactor design, answer length and answer language etc."
name={['prompt_config', 'system']}
initialValue={`你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
@ -166,10 +166,10 @@ const PromptEngine = (
<Divider></Divider>
<SimilaritySlider isTooltipShown></SimilaritySlider>
<Form.Item<FieldType>
label="Top n"
label="Top N"
name={'top_n'}
initialValue={8}
tooltip={'xxx'}
tooltip={`Not all the chunks whose similarity score is above the 'simialrity threashold' will be feed to LLMs. LLM can only see these 'Top N' chunks.`}
>
<Slider max={30} />
</Form.Item>
@ -178,7 +178,10 @@ const PromptEngine = (
<Col span={7} className={styles.variableAlign}>
<label className={styles.variableLabel}>
Variables
<Tooltip title="coming soon">
<Tooltip title="If you use dialog APIs, the varialbes might help you chat with your clients with different strategies.
The variables are used to fill-in the 'System' part in prompt in order to give LLM a hint.
The 'knowledge' is a very special variable which will be filled-in with the retrieved chunks.
All the variables in 'System' should be curly bracketed.">
<QuestionCircleOutlined className={styles.variableIcon} />
</Tooltip>
</label>

View File

@ -66,7 +66,7 @@ const ApiKeyModal = ({
<Form.Item<FieldType>
label="Api-Key"
name="api_key"
tooltip="coming soon"
tooltip="The API key can be obtained by registering the corresponding LLM supplier."
rules={[{ required: true, message: 'Please input api key!' }]}
>
<Input />

View File

@ -43,25 +43,27 @@ const SystemModelSettingModal = ({
confirmLoading={loading}
>
<Form form={form} onValuesChange={onFormLayoutChange} layout={'vertical'}>
<Form.Item
label="Sequence2txt model"
name="asr_id"
tooltip="coming soon"
>
<Select options={allOptions[LlmModelType.Speech2text]} />
<Form.Item label="Chat model" name="llm_id" tooltip="The default chat LLM all the newly created knowledgebase will use.">
<Select options={allOptions[LlmModelType.Chat]} />
</Form.Item>
<Form.Item label="Embedding model" name="embd_id" tooltip="coming soon">
<Form.Item label="Embedding model" name="embd_id" tooltip="The default embedding model all the newly created knowledgebase will use.">
<Select options={allOptions[LlmModelType.Embedding]} />
</Form.Item>
<Form.Item
label="Img2txt model"
name="img2txt_id"
tooltip="coming soon"
tooltip="The default multi-module model all the newly created knowledgebase will use. It can describe a picture or video."
>
<Select options={allOptions[LlmModelType.Image2text]} />
</Form.Item>
<Form.Item label="Chat model" name="llm_id" tooltip="coming soon">
<Select options={allOptions[LlmModelType.Chat]} />
<Form.Item
label="Sequence2txt model"
name="asr_id"
tooltip="The default ASR model all the newly created knowledgebase will use. Use this model to translate voices to corresponding text."
>
<Select options={allOptions[LlmModelType.Speech2text]} />
</Form.Item>
</Form>
</Modal>

View File

@ -110,9 +110,6 @@ const UserSettingProfile = () => {
<div>
<Space>
Your photo
<Tooltip title="coming soon">
<QuestionCircleOutlined />
</Tooltip>
</Space>
<div>This will be displayed on your profile.</div>
</div>
@ -140,7 +137,6 @@ const UserSettingProfile = () => {
<Form.Item<FieldType>
label="Color schema"
name="color_schema"
tooltip="coming soon"
rules={[
{ required: true, message: 'Please select your color schema!' },
]}
@ -154,7 +150,6 @@ const UserSettingProfile = () => {
<Form.Item<FieldType>
label="Language"
name="language"
tooltip="coming soon"
rules={[{ required: true, message: 'Please input your language!' }]}
>
<Select placeholder="select your language">
@ -166,7 +161,6 @@ const UserSettingProfile = () => {
<Form.Item<FieldType>
label="Timezone"
name="timezone"
tooltip="coming soon"
rules={[{ required: true, message: 'Please input your timezone!' }]}
>
<Select placeholder="select your timezone" showSearch>