Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from common.utils.common import bulk_create_in_batches
from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
from knowledge.serializers.document import DocumentSerializers


class ParagraphInstanceSerializer(serializers.Serializer):
Expand Down Expand Up @@ -187,9 +188,19 @@ def save(self, document_list):

return document_model_list, knowledge_id, workspace_id

@staticmethod
def post_embedding(document_model_list, knowledge_id, workspace_id):
for document in document_model_list:
DocumentSerializers.Operate(data={
'knowledge_id': knowledge_id,
'document_id': document.id,
'workspace_id': workspace_id
}).refresh()

def execute(self, documents, **kwargs) -> NodeResult:

document_model_list, knowledge_id, workspace_id = self.save(documents)
self.post_embedding(document_model_list, knowledge_id, workspace_id)

write_content_list = [{
"name": document.get("name"),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code appears to be functional but could benefit from certain enhancements and optimizations. Here are some suggestions:

  1. Code Formatting: Ensure consistent indentation and spacing for better readability:

        ...
            }).refresh()
  2. Variable Naming: Use meaningful variable names to improve code clarity, e.g., write_content_list instead of write_content.

  3. Comments: Add comments where necessary to explain the purpose of each function or operation.

  4. Documentation Strings: Include docstrings for all classes and methods.

  5. Error Handling: Consider adding error handling for database operations and other edge cases.

Here is a revised version with these improvements:

@@ -20,6 +20,7 @@
 from common.utils.common import bulk_create_in_batches
 from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping
 from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
+from knowledge.serializers.document import DocumentSerializers
 
 
 class ParagraphInstanceSerializer(serializers.Serializer):
@@ -201,9 +201,19 @@ def save(self, documents):
         """
         Save the list of documents into the database.

         :param documents: List of dictionaries containing document data.
         :return: Tuple containing lists of saved models, knowledge ID, and workspace ID.
         """
         if not isinstance(documents, list) or documents == []:
             raise ValueError("Documents must be a non-empty list.")

         # Proceed with saving the documents...
@@ -228,7 +248,19 @@ def execute(self, documents, **kwargs) -> NodeResult:
     """

     # Execute the logic to process the documents

     document_model_list, knowledge_id, workspace_id = self.save(documents)

-    # Call a static method to perform additional operations like embedding generation
+    """
+    Post-processing step to generate embeddings, update metadata, etc., after documents have been created.
+    This can include indexing in external systems, setting up permissions, etc.
+    """
+    self.post_embedding(document_model_list, knowledge_id, workspace_id)

     write_content_list = [
         {
             "name": document.get("name"),

These changes enhance the code's readability, maintainability, and robustness while maintaining its functionality.

Expand Down
6 changes: 4 additions & 2 deletions apps/common/sql/list_embedding_text.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ SELECT
problem.knowledge_id AS knowledge_id,
0 AS source_type,
problem."content" AS "text",
paragraph.is_active AS is_active
paragraph.is_active AS is_active,
paragraph.chunks AS chunks
FROM
problem problem
LEFT JOIN problem_paragraph_mapping problem_paragraph_mapping ON problem_paragraph_mapping.problem_id=problem."id"
Expand All @@ -20,7 +21,8 @@ SELECT
paragraph.knowledge_id AS knowledge_id,
1 AS source_type,
concat_ws(E'\n',paragraph.title,paragraph."content") AS "text",
paragraph.is_active AS is_active
paragraph.is_active AS is_active,
paragraph.chunks AS chunks
FROM
paragraph paragraph

Expand Down
3 changes: 3 additions & 0 deletions apps/locales/en_US/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -8781,4 +8781,7 @@ msgid "SAML2 Log in"
msgstr ""

msgid "SAML2 SSO"
msgstr ""

msgid "Workflow"
msgstr ""
3 changes: 3 additions & 0 deletions apps/locales/zh_CN/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -8908,3 +8908,6 @@ msgstr "SAML2 登录"

msgid "SAML2 SSO"
msgstr "SAML2 单点登录"

msgid "Workflow"
msgstr "工作流"
5 changes: 4 additions & 1 deletion apps/locales/zh_Hant/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -8907,4 +8907,7 @@ msgid "SAML2 Log in"
msgstr "SAML2 登入"

msgid "SAML2 SSO"
msgstr "SAML2 單點登入"
msgstr "SAML2 單點登入"

msgid "Workflow"
msgstr "工作流"
Loading