@@ -64,6 +64,55 @@ def load_data(self, spreadsheet_ids: List[str]) -> List[Document]:
6464 )
6565 return results
6666
67+ def load_sheet_as_documents (
68+ self , spreadsheet_id : str , sheet_name : str , text_column_name : str = "text"
69+ ) -> List [Document ]:
70+ """Load data from a Google Sheet and convert each row into a Document.
71+
72+ Args:
73+ spreadsheet_id (str): The ID of the spreadsheet.
74+ sheet_name (str): The name of the sheet to be processed.
75+ text_column_name (str): The name of the column to be used for the "text" field (default is "text").
76+
77+ Returns:
78+ List[Document]: A list of Document objects with "text" and "meta" fields.
79+ """
80+ import googleapiclient .discovery as discovery
81+
82+ # Get the sheets service and data for the specified sheet.
83+ credentials = self ._get_credentials ()
84+ sheets_service = discovery .build ("sheets" , "v4" , credentials = credentials )
85+ sheet_data = (
86+ sheets_service .spreadsheets ()
87+ .values ()
88+ .get (spreadsheetId = spreadsheet_id , range = sheet_name )
89+ .execute ()
90+ )
91+
92+ # Extract the rows and header.
93+ rows = sheet_data .get ("values" , [])
94+ header = rows .pop (0 ) if rows else []
95+
96+ # Find the index of the column specified by text_column_name.
97+ try :
98+ text_col_index = header .index (text_column_name )
99+ except ValueError :
100+ raise ValueError (
101+ f'The sheet must contain a column named "{ text_column_name } ".'
102+ )
103+
104+ # Process each row as a Document.
105+ documents = []
106+ for row in rows :
107+ text_value = row [text_col_index ] if text_col_index < len (row ) else ""
108+ # Create a dictionary for the rest of the row data to be used as metadata.
109+ meta = {
110+ key : value for key , value in zip (header , row ) if key != text_column_name
111+ }
112+ documents .append (Document (text = text_value , meta = meta ))
113+
114+ return documents
115+
67116 def _load_sheet (self , spreadsheet_id : str ) -> str :
68117 """Load a sheet from Google Sheets.
69118
0 commit comments