Skip to content
This repository was archived by the owner on Mar 1, 2024. It is now read-only.

Commit b9d5689

Browse files
authored
Extend GoogleSheetsReader to Accept Custom Text Column Names (#620)
* Update base.py * Update base.py fix linting
1 parent 7405add commit b9d5689

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

llama_hub/google_sheets/base.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,55 @@ def load_data(self, spreadsheet_ids: List[str]) -> List[Document]:
6464
)
6565
return results
6666

67+
def load_sheet_as_documents(
68+
self, spreadsheet_id: str, sheet_name: str, text_column_name: str = "text"
69+
) -> List[Document]:
70+
"""Load data from a Google Sheet and convert each row into a Document.
71+
72+
Args:
73+
spreadsheet_id (str): The ID of the spreadsheet.
74+
sheet_name (str): The name of the sheet to be processed.
75+
text_column_name (str): The name of the column to be used for the "text" field (default is "text").
76+
77+
Returns:
78+
List[Document]: A list of Document objects with "text" and "meta" fields.
79+
"""
80+
import googleapiclient.discovery as discovery
81+
82+
# Get the sheets service and data for the specified sheet.
83+
credentials = self._get_credentials()
84+
sheets_service = discovery.build("sheets", "v4", credentials=credentials)
85+
sheet_data = (
86+
sheets_service.spreadsheets()
87+
.values()
88+
.get(spreadsheetId=spreadsheet_id, range=sheet_name)
89+
.execute()
90+
)
91+
92+
# Extract the rows and header.
93+
rows = sheet_data.get("values", [])
94+
header = rows.pop(0) if rows else []
95+
96+
# Find the index of the column specified by text_column_name.
97+
try:
98+
text_col_index = header.index(text_column_name)
99+
except ValueError:
100+
raise ValueError(
101+
f'The sheet must contain a column named "{text_column_name}".'
102+
)
103+
104+
# Process each row as a Document.
105+
documents = []
106+
for row in rows:
107+
text_value = row[text_col_index] if text_col_index < len(row) else ""
108+
# Create a dictionary for the rest of the row data to be used as metadata.
109+
meta = {
110+
key: value for key, value in zip(header, row) if key != text_column_name
111+
}
112+
documents.append(Document(text=text_value, meta=meta))
113+
114+
return documents
115+
67116
def _load_sheet(self, spreadsheet_id: str) -> str:
68117
"""Load a sheet from Google Sheets.
69118

0 commit comments

Comments
 (0)