add help info (#142)

2025-07-09 17:31:51 +08:00 · 2024-03-22 15:35:06 +08:00 · 2024-03-22 15:35:06 +08:00 · 1edbd36baf
commit 1edbd36baf
parent 73c2f4d418
11 changed files with 131 additions and 84 deletions
--- a/web/src/components/similarity-slider/index.tsx
+++ b/web/src/components/similarity-slider/index.tsx
@ -15,7 +15,10 @@ const SimilaritySlider = ({ isTooltipShown = false }: IProps) => {
      <Form.Item<FieldType>
        label="Similarity threshold"
        name={'similarity_threshold'}
-        tooltip={isTooltipShown && 'coming soon'}
+        tooltip={isTooltipShown && `We use hybrid similarity score to evaluate distance between two lines of text. 
+        It\'s weighted keywords similarity and vector cosine similarity. 
+        If the similarity between query and chunk is less than this threshold, the chunk will be filtered out.`
+    }
        initialValue={0.2}
      >
        <Slider max={1} step={0.01} />
@ -24,7 +27,10 @@ const SimilaritySlider = ({ isTooltipShown = false }: IProps) => {
        label="Vector similarity weight"
        name={'vector_similarity_weight'}
        initialValue={0.3}
-        tooltip={isTooltipShown && 'coming soon'}
+        tooltip={isTooltipShown && `We use hybrid similarity score to evaluate distance between two lines of text. 
+        It\'s weighted keywords similarity and vector cosine similarity.
+        The sum of both weights is 1.0.
+        `}
      >
        <Slider max={1} step={0.01} />
      </Form.Item>
--- a/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/category-panel.tsx
@ -33,16 +33,16 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
      {imageList.length > 0 ? (
        <>
          <Title level={5} className={styles.topTitle}>
-            {item.title} Category
+            "{item.title}" Chunking Method Description
          </Title>
          <p
            dangerouslySetInnerHTML={{
              __html: item.description,
            }}
          ></p>
-          <Title level={5}>{item.title} Image Examples</Title>
+          <Title level={5}>"{item.title}" Examples</Title>
          <Text>
-            We've prepared detailed visual guides to make understanding easier
+            This visual guides is in order to make understanding easier
            for you.
          </Text>
          <Row gutter={[10, 10]} className={styles.imageRow}>
--- a/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx
@ -83,7 +83,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
      <Form.Item
        name="permission"
        label="Permissions"
-        tooltip="coming soon"
+        tooltip="If the permission is 'Team', all the team member can manipulate the knowledgebase."
        rules={[{ required: true }]}
      >
        <Radio.Group>
@ -93,22 +93,22 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
      </Form.Item>
      <Form.Item
        name="embd_id"
-        label="Embedding Model"
+        label="Embedding model"
        rules={[{ required: true }]}
-        tooltip="xx"
+        tooltip="The embedding model used to embedding chunks. It's unchangable once the knowledgebase has chunks. You need to delete all the chunks if you want to change it."
      >
        <Select
-          placeholder="Please select a country"
+          placeholder="Please select a embedding model"
          options={embeddingModelOptions}
        ></Select>
      </Form.Item>
      <Form.Item
        name="parser_id"
        label="Chunk method"
-        tooltip="xx"
+        tooltip="The instruction is at right."
        rules={[{ required: true }]}
      >
-        <Select placeholder="Please select a country">
+        <Select placeholder="Please select a chunk method">
          {parserList.map((x) => (
            <Option value={x.value} key={x.value}>
              {x.label}
@ -122,7 +122,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {

          if (parserId === 'naive') {
            return (
-              <Form.Item label="Max token number" tooltip="xxx">
+              <Form.Item label="Token number" tooltip="It determine the token number of a chunk approximately.">
                <Flex gap={20} align="center">
                  <Flex flex={1}>
                    <Form.Item
--- a/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts
@ -7,78 +7,117 @@ export const ImageMap = {
  book: getImageName('book', 4),
  laws: getImageName('law', 4),
  manual: getImageName('manual', 4),
-  media: getImageName('media', 2),
+  picture: getImageName('picture', 2),
  naive: getImageName('naive', 2),
  paper: getImageName('paper', 2),
  presentation: getImageName('presentation', 2),
  qa: getImageName('qa', 2),
  resume: getImageName('resume', 2),
  table: getImageName('table', 2),
+  one: getImageName('one', 2),
 };

 export const TextMap = {
  book: {
    title: '',
-    description: `Supported file formats are docx, excel, pdf, txt.
+    description: `<p>Supported file formats are <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.</p><p>
  Since a book is long and not all the parts are useful, if it's a PDF,
-  please setup the page ranges for every book in order eliminate negative effects and save computing time for analyzing.`,
+  please setup the <i>page ranges</i> for every book in order eliminate negative effects and save computing time for analyzing.</p>`,
  },
  laws: {
    title: '',
-    description: `Supported file formats are docx, pdf, txt.`,
+    description: `<p>Supported file formats are <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.</p><p>
+    Legal documents have a very rigorous writing format. We use text feature to detect split point. 
+    </p><p>
+    The chunk granularity is consistent with 'ARTICLE', and all the upper level text will be included in the chunk.
+    </p>`,
  },
-  manual: { title: '', description: `Only pdf is supported.` },
-  media: { title: '', description: '' },
+  manual: { title: '', description: `<p>Only <b>PDF</b> is supported.</p><p>
+  We assume manual has hierarchical section structure. We use the lowest section titles as pivots to slice documents.
+  So, the figures and tables in the same section will not be sliced apart, and chunk size might be large.
+  </p>` },
  naive: {
    title: '',
-    description: `Supported file formats are docx, pdf, txt.
-  This method apply the naive ways to chunk files.
-  Successive text will be sliced into pieces using 'delimiter'.
-  Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'.`,
+    description: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT</b>.</p>
+  <p>This method apply the naive ways to chunk files: </p>
+  <p>
+  <li>Successive text will be sliced into pieces using vision detection model.</li>
+  <li>Next, these successive pieces are merge into chunks whose token number is no more than 'Token number'.</li></p>`,
  },
  paper: {
    title: '',
-    description: `Only pdf is supported.
-  The special part is that, the abstract of the paper will be sliced as an entire chunk, and will not be sliced partly.`,
+    description: `<p>Only <b>PDF</b> file is supported.</p><p>
+    If our model works well, the paper will be sliced by it's sections, like <i>abstract, 1.1, 1.2</i>, etc. </p><p>
+    The benefit of doing this is that LLM can better summarize the content of relevant sections in the paper, 
+    resulting in more comprehensive answers that help readers better understand the paper. 
+    The downside is that it increases the context of the LLM conversation and adds computational cost, 
+    so during the conversation, you can consider reducing the ‘<b>topN</b>’ setting.</p>`,
  },
  presentation: {
    title: '',
-    description: `The supported file formats are pdf, pptx.
-  Every page will be treated as a chunk. And the thumbnail of every page will be stored.
-  PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary.`,
+    description: `<p>The supported file formats are <b>PDF</b>, <b>PPTX</b>.</p><p>
+  Every page will be treated as a chunk. And the thumbnail of every page will be stored.</p><p>
+  <i>All the PPT files you uploaded will be chunked by using this method automatically, setting-up for every PPT file is not necessary.</i></p>`,
  },
  qa: {
    title: '',
-    description: `Excel and csv(txt) format files are supported.
-  If the file is in excel format, there should be 2 column question and answer without header.
+    description: `<p><b>EXCEL</b> and <b>CSV/TXT</b> files are supported.</p><p>
+  If the file is in excel format, there should be 2 columns question and answer without header.
  And question column is ahead of answer column.
-  And it's O.K if it has multiple sheets as long as the columns are rightly composed.
+  And it's O.K if it has multiple sheets as long as the columns are rightly composed.</p><p>

-  If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer.
+  If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer.</p><p>

-  All the deformed lines will be ignored.
-  Every pair of Q&A will be treated as a chunk.`,
+  <i>All the deformed lines will be ignored.
+  Every pair of Q&A will be treated as a chunk.</i></p>`,
  },
  resume: {
    title: '',
-    description: `The supported file formats are pdf, docx and txt.`,
+    description: `<p>The supported file formats are <b>DOCX</b>, <b>PDF</b>, <b>TXT</b>.
+    </p><p>
+    The résumé comes in a variety of formats, just like a person’s personality, but we often have to organize them into structured data that makes it easy to search.
+    </p><p>
+    Instead of chunking the résumé, we parse the résumé into structured data. As a HR, you can dump all the résumé you have, 
+    the you can list all the candidates that match the qualifications just by talk with <i>'RagFlow'</i>.
+    </p>
+    `,
  },
  table: {
    title: '',
-    description: `Excel and csv(txt) format files are supported.
-  For csv or txt file, the delimiter between columns is TAB.
-  The first line must be column headers.
-  Column headers must be meaningful terms inorder to make our NLP model understanding.
-  It's good to enumerate some synonyms using slash '/' to separate, and even better to
-  enumerate values using brackets like 'gender/sex(male, female)'.
-  Here are some examples for headers:
-      1. supplier/vendor\tcolor(yellow, red, brown)\tgender/sex(male, female)\tsize(M,L,XL,XXL)
-      2. 姓名/名字\t电话/手机/微信\t最高学历（高中，职高，硕士，本科，博士，初中，中技，中专，专科，专升本，MPA，MBA，EMBA）
-  Every row in table will be treated as a chunk.
-
-visual:
-  Image files are supported. Video is comming soon.
-  If the picture has text in it, OCR is applied to extract the text as a description of it.
-  If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.`,
+    description: `<p><b>EXCEL</b> and <b>CSV/TXT</b> format files are supported.</p><p>
+    Here're some tips:
+    <ul>
+  <li>For csv or txt file, the delimiter between columns is <em><b>TAB</b></em>.</li>
+  <li>The first line must be column headers.</li>
+  <li>Column headers must be meaningful terms in order to make our LLM understanding.
+  It's good to enumerate some synonyms using slash <i>'/'</i> to separate, and even better to
+  enumerate values using brackets like <i>'gender/sex(male, female)'</i>.<p>
+  Here are some examples for headers:<ol>
+      <li>supplier/vendor<b>'TAB'</b>color(yellow, red, brown)<b>'TAB'</b>gender/sex(male, female)<b>'TAB'</b>size(M,L,XL,XXL)</li>
+      <li>姓名/名字<b>'TAB'</b>电话/手机/微信<b>'TAB'</b>最高学历（高中，职高，硕士，本科，博士，初中，中技，中专，专科，专升本，MPA，MBA，EMBA）</li>
+      </ol>
+      </p>
+  </li>
+  <li>Every row in table will be treated as a chunk.</li>
+  </ul>`,
+},
+picture: {
+  title: '',
+  description: `
+  <p>Image files are supported. Video is coming soon.</p><p>
+  If the picture has text in it, OCR is applied to extract the text as its text description.
+  </p><p>
+  If the text extracted by OCR is not enough, visual LLM is used to get the descriptions.
+  </p>`,
+  },
+one: {
+  title: '',
+  description: `
+  <p>Supported file formats are <b>DOCX, EXCEL, PDF, TXT</b>.
+  </p><p>
+  For a document, it will be treated as an entire chunk, no split at all.
+  </p><p>
+  If you don't trust any chunk method and the selected LLM's context length covers the document length, you can try this method.
+  </p>`,
  },
 };
--- a/web/src/pages/add-knowledge/components/knowledge-testing/testing-control/index.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-testing/testing-control/index.tsx
@ -53,9 +53,10 @@ const TestingControl = ({ form, handleTesting }: IProps) => {
        >
          <SimilaritySlider isTooltipShown></SimilaritySlider>
          <Form.Item<FieldType>
-            label="Top k"
+            label="Top K"
            name={'top_k'}
-            tooltip="coming soon"
+            tooltip="For the computaion cost, not all the retrieved chunk will be computed vector cosine similarity with query. 
+            The bigger the 'Top K' is, the higher the recall rate is, the slower the retrieval speed is."
          >
            <Slider marks={{ 0: 0, 2048: 2048 }} max={2048} />
          </Form.Item>
--- a/web/src/pages/chat/chat-configuration-modal/assistant-setting.tsx
+++ b/web/src/pages/chat/chat-configuration-modal/assistant-setting.tsx
@ -55,6 +55,7 @@ const AssistantSetting = ({ show }: ISegmentedContentProps) => {
        label="Language"
        initialValue={'Chinese'}
        tooltip="coming soon"
+        style={{display:'none'}}
      >
        <Select
          options={[
@ -66,22 +67,23 @@ const AssistantSetting = ({ show }: ISegmentedContentProps) => {
      <Form.Item
        name={['prompt_config', 'empty_response']}
        label="Empty response"
-        tooltip="coming soon"
+        tooltip="If nothing is retrieved with user's question in the knowledgebase, it will use this as an answer.
+        If you want LLM comes up with its own opinion when nothing is retrieved, leave this blank."
      >
        <Input placeholder="" />
      </Form.Item>
      <Form.Item
        name={['prompt_config', 'prologue']}
        label="Set an opener"
-        tooltip="coming soon"
+        tooltip="How do you want to welcome your clients?"
        initialValue={"Hi! I'm your assistant, what can I do for you?"}
      >
        <Input.TextArea autoSize={{ minRows: 5 }} />
      </Form.Item>
      <Form.Item
-        label="Select one context"
+        label="Knowledgebases"
        name="kb_ids"
-        tooltip="coming soon"
+        tooltip="Select knowledgebases associated."
        rules={[
          {
            required: true,
--- a/web/src/pages/chat/chat-configuration-modal/model-setting.tsx
+++ b/web/src/pages/chat/chat-configuration-modal/model-setting.tsx
@ -46,16 +46,16 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
      <Form.Item
        label="Model"
        name="llm_id"
-        tooltip="coming soon"
+        tooltip="Large language chat model"
        rules={[{ required: true, message: 'Please select!' }]}
      >
        <Select options={modelOptions} showSearch />
      </Form.Item>
      <Divider></Divider>
      <Form.Item
-        label="Parameters"
+        label="Freedom"
        name="parameters"
-        tooltip="coming soon"
+        tooltip="'Precise' means the LLM will be conservative and answer your question cautiously. 'Improvise' means the you want LLM talk much and freely. 'Balance' is between cautiously and freely."
        initialValue={ModelVariableType.Precise}
        // rules={[{ required: true, message: 'Please input!' }]}
      >
@ -64,7 +64,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
          onChange={handleParametersChange}
        />
      </Form.Item>
-      <Form.Item label="Temperature" tooltip={'xx'}>
+      <Form.Item label="Temperature" tooltip={'This parameter controls the randomness of predictions by the model. A lower temperature makes the model more confident in its responses, while a higher temperature makes it more creative and diverse.'}>
        <Flex gap={20} align="center">
          <Form.Item
            name={'temperatureEnabled'}
@ -96,7 +96,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
          </Form.Item>
        </Flex>
      </Form.Item>
-      <Form.Item label="Top P" tooltip={'xx'}>
+      <Form.Item label="Top P" tooltip={'Also known as “nucleus sampling,” this parameter sets a threshold to select a smaller set of words to sample from. It focuses on the most likely words, cutting off the less probable ones.'}>
        <Flex gap={20} align="center">
          <Form.Item name={'topPEnabled'} valuePropName="checked" noStyle>
            <Switch size="small" />
@ -124,7 +124,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
          </Form.Item>
        </Flex>
      </Form.Item>
-      <Form.Item label="Presence Penalty" tooltip={'xx'}>
+      <Form.Item label="Presence Penalty" tooltip={'This discourages the model from repeating the same information by penalizing words that have already appeared in the conversation.'}>
        <Flex gap={20} align="center">
          <Form.Item
            name={'presencePenaltyEnabled'}
@ -160,7 +160,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
          </Form.Item>
        </Flex>
      </Form.Item>
-      <Form.Item label="Frequency Penalty" tooltip={'xx'}>
+      <Form.Item label="Frequency Penalty" tooltip={'Similar to the presence penalty, this reduces the model’s tendency to repeat the same words frequently.'}>
        <Flex gap={20} align="center">
          <Form.Item
            name={'frequencyPenaltyEnabled'}
@ -196,7 +196,7 @@ const ModelSetting = ({ show, form }: ISegmentedContentProps) => {
          </Form.Item>
        </Flex>
      </Form.Item>
-      <Form.Item label="Max Tokens" tooltip={'xx'}>
+      <Form.Item label="Max Tokens" tooltip={'This sets the maximum length of the model’s output, measured in the number of tokens (words or pieces of words).'}>
        <Flex gap={20} align="center">
          <Form.Item name={'maxTokensEnabled'} valuePropName="checked" noStyle>
            <Switch size="small" />
--- a/web/src/pages/chat/chat-configuration-modal/prompt-engine.tsx
+++ b/web/src/pages/chat/chat-configuration-modal/prompt-engine.tsx
@ -154,7 +154,7 @@ const PromptEngine = (
      <Form.Item
        label="System"
        rules={[{ required: true, message: 'Please input!' }]}
-        tooltip="coming soon"
+        tooltip="Instructions you need LLM to follow when LLM answers questions, like charactor design, answer length and answer language etc."
        name={['prompt_config', 'system']}
        initialValue={`你是一个智能助手，请总结知识库的内容来回答问题，请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时，你的回答必须包括“知识库中未找到您要的答案！”这句话。回答需要考虑聊天历史。
        以下是知识库：
@ -166,10 +166,10 @@ const PromptEngine = (
      <Divider></Divider>
      <SimilaritySlider isTooltipShown></SimilaritySlider>
      <Form.Item<FieldType>
-        label="Top n"
+        label="Top N"
        name={'top_n'}
        initialValue={8}
-        tooltip={'xxx'}
+        tooltip={`Not all the chunks whose similarity score is above the 'simialrity threashold' will be feed to LLMs. LLM can only see these 'Top N' chunks.`}
      >
        <Slider max={30} />
      </Form.Item>
@ -178,7 +178,10 @@ const PromptEngine = (
          <Col span={7} className={styles.variableAlign}>
            <label className={styles.variableLabel}>
              Variables
-              <Tooltip title="coming soon">
+              <Tooltip title="If you use dialog APIs, the varialbes might help you chat with your clients with different strategies. 
+              The variables are used to fill-in the 'System' part in prompt in order to give LLM a hint.
+              The 'knowledge' is a very special variable which will be filled-in with the retrieved chunks.
+              All the variables in 'System' should be curly bracketed.">
                <QuestionCircleOutlined className={styles.variableIcon} />
              </Tooltip>
            </label>
--- a/web/src/pages/user-setting/setting-model/api-key-modal/index.tsx
+++ b/web/src/pages/user-setting/setting-model/api-key-modal/index.tsx
@ -66,7 +66,7 @@ const ApiKeyModal = ({
        <Form.Item<FieldType>
          label="Api-Key"
          name="api_key"
-          tooltip="coming soon"
+          tooltip="The API key can be obtained by registering the corresponding LLM supplier."
          rules={[{ required: true, message: 'Please input api key!' }]}
        >
          <Input />
--- a/web/src/pages/user-setting/setting-model/system-model-setting-modal/index.tsx
+++ b/web/src/pages/user-setting/setting-model/system-model-setting-modal/index.tsx
@ -43,25 +43,27 @@ const SystemModelSettingModal = ({
      confirmLoading={loading}
    >
      <Form form={form} onValuesChange={onFormLayoutChange} layout={'vertical'}>
-        <Form.Item
-          label="Sequence2txt model"
-          name="asr_id"
-          tooltip="coming soon"
-        >
-          <Select options={allOptions[LlmModelType.Speech2text]} />
+        
+      <Form.Item label="Chat model" name="llm_id" tooltip="The default chat LLM all the newly created knowledgebase will use.">
+          <Select options={allOptions[LlmModelType.Chat]} />
        </Form.Item>
-        <Form.Item label="Embedding model" name="embd_id" tooltip="coming soon">
+        <Form.Item label="Embedding model" name="embd_id" tooltip="The default embedding model all the newly created knowledgebase will use.">
          <Select options={allOptions[LlmModelType.Embedding]} />
        </Form.Item>
        <Form.Item
          label="Img2txt model"
          name="img2txt_id"
-          tooltip="coming soon"
+          tooltip="The default multi-module model all the newly created knowledgebase will use. It can describe a picture or video."
        >
          <Select options={allOptions[LlmModelType.Image2text]} />
        </Form.Item>
-        <Form.Item label="Chat model" name="llm_id" tooltip="coming soon">
-          <Select options={allOptions[LlmModelType.Chat]} />
+        
+        <Form.Item
+          label="Sequence2txt model"
+          name="asr_id"
+          tooltip="The default ASR model all the newly created knowledgebase will use. Use this model to translate voices to corresponding text."
+        >
+          <Select options={allOptions[LlmModelType.Speech2text]} />
        </Form.Item>
      </Form>
    </Modal>
--- a/web/src/pages/user-setting/setting-profile/index.tsx
+++ b/web/src/pages/user-setting/setting-profile/index.tsx
@ -110,9 +110,6 @@ const UserSettingProfile = () => {
              <div>
                <Space>
                  Your photo
-                  <Tooltip title="coming soon">
-                    <QuestionCircleOutlined />
-                  </Tooltip>
                </Space>
                <div>This will be displayed on your profile.</div>
              </div>
@ -140,7 +137,6 @@ const UserSettingProfile = () => {
          <Form.Item<FieldType>
            label="Color schema"
            name="color_schema"
-            tooltip="coming soon"
            rules={[
              { required: true, message: 'Please select your color schema!' },
            ]}
@ -154,7 +150,6 @@ const UserSettingProfile = () => {
          <Form.Item<FieldType>
            label="Language"
            name="language"
-            tooltip="coming soon"
            rules={[{ required: true, message: 'Please input your language!' }]}
          >
            <Select placeholder="select your language">
@ -166,7 +161,6 @@ const UserSettingProfile = () => {
          <Form.Item<FieldType>
            label="Timezone"
            name="timezone"
-            tooltip="coming soon"
            rules={[{ required: true, message: 'Please input your timezone!' }]}
          >
            <Select placeholder="select your timezone" showSearch>