AmadeusITGroup · dpomian · Aug 19, 2025 · Aug 3, 2025 · Aug 5, 2025
diff --git a/src/docs2vecs/subcommands/indexer/skills/ada002_embedding_skill.py b/src/docs2vecs/subcommands/indexer/skills/ada002_embedding_skill.py
@@ -11,14 +11,23 @@ class AzureAda002EmbeddingSkill(IndexerSkill):
     def __init__(self, config: dict, global_config: Config):
         super().__init__(config, global_config)
 
-    def az_ada002_embeddings(self, content: str):
+    def az_ada002_embeddings(self, content: str, chunk_id=None):
+        self.logger.debug(
+            f"Requesting embedding for chunk_id={chunk_id}, content_length={len(content)}"
+        )
         embed_model = AzureOpenAIEmbedding(
             deployment_name=self._config["deployment_name"],
             api_key=self._config["api_key"],
             azure_endpoint=self._config["endpoint"],
             api_version=self._config["api_version"],
         )
-        return embed_model.get_query_embedding(content)
+        try:
+            embedding = embed_model.get_query_embedding(content)
+            self.logger.debug(f"Received embedding for chunk_id={chunk_id}")
+            return embedding
+        except Exception as e:
+            self.logger.error(f"Embedding failed for chunk_id={chunk_id}: {e}")
+            return None
 
     def run(self, input: Optional[List[Document]] = None) -> Optional[List[Document]]:
         self.logger.info(
@@ -36,10 +45,11 @@ def run(self, input: Optional[List[Document]] = None) -> Optional[List[Document]
             self.logger.debug(f"Processing document: {doc.filename}")
             for chunk in doc.chunks:
                 self.logger.debug(f"Creating embedding for chunk: {chunk.chunk_id}")
-                chunk.embedding = (
-                    ""
-                    if not chunk.content
-                    else self.az_ada002_embeddings(chunk.content)
-                )
-
-        return input
+                if not chunk.content:
+                    chunk.embedding = ""
+                else:
+                    chunk.embedding = self.az_ada002_embeddings(
+                        chunk.content, chunk_id=chunk.chunk_id
+                    )
+
+        return input
diff --git a/src/docs2vecs/subcommands/indexer/skills/default_file_reader.py b/src/docs2vecs/subcommands/indexer/skills/default_file_reader.py
@@ -24,6 +24,7 @@ class DefaultFileReader(FileLoaderSkill):
     - .doc, .docx: Word documents using UnstructuredWordDocumentLoader
     - .ppt, .pptx: PowerPoint files using UnstructuredPowerPointLoader
     - .xls, .xlsx: Excel files using UnstructuredExcelLoader
+    - .yml, .yaml: YAML files using PyYAML (multiple documents supported)
     """
 
     def __init__(self, skill_config: dict, global_config: Config) -> None:
@@ -38,7 +39,32 @@ def __init__(self, skill_config: dict, global_config: Config) -> None:
             ".pptx": self._load_powerpoint,
             ".xls": self._load_excel,
             ".xlsx": self._load_excel,
+            ".yml": self._load_yaml,
+            ".yaml": self._load_yaml,
         }
+
+    def _load_yaml(self, file_path: Path) -> List[Document]:
+        """Load YAML files that may contain multiple documents separated by ---."""
+        try:
+            import yaml
+        except ImportError as err:
+            raise ImportError("yaml module is required to read YAML files.") from err
+
+        documents = []
+        with file_path.open() as fp:
+            for i, content in enumerate(yaml.safe_load_all(fp)):
+                if content is not None:                    
+                    yaml_text = yaml.safe_dump(content)                    
+                    doc_name = f"{file_path.stem}_doc_{i}{file_path.suffix}"
+                    documents.append(
+                        Document(
+                            filename=str(file_path), 
+                            source_url=str(file_path), 
+                            text=yaml_text
+                        )
+                    )
+
+        return documents
 
     def run(self, documents: Optional[List[Document]]) -> List[Document]:
         """Process input documents by reading their content based on file extension.
@@ -118,4 +144,4 @@ def _load_excel(self, file_path: Path) -> List[Document]:
         """Load Excel files using UnstructuredExcelLoader."""
         loader = UnstructuredExcelLoader(str(file_path))
         docs = loader.load()
-        return [Document(filename=str(file_path), source_url=str(file_path), text=doc.page_content) for doc in docs]
+        return [Document(filename=str(file_path), source_url=str(file_path), text=doc.page_content) for doc in docs]