430 lines
16 KiB
TypeScript
430 lines
16 KiB
TypeScript
import { omit } from 'lodash'
|
|
import { ICommonObject, IDocument, INode, INodeData, INodeParams, INodeOptionsValue } from '../../../src/Interface'
|
|
import { TextSplitter } from 'langchain/text_splitter'
|
|
import {
|
|
convertMultiOptionsToStringArray,
|
|
getCredentialData,
|
|
getCredentialParam,
|
|
handleEscapeCharacters,
|
|
INodeOutputsValue,
|
|
refreshOAuth2Token
|
|
} from '../../../src'
|
|
|
|
class GoogleSheets_DocumentLoaders implements INode {
|
|
label: string
|
|
name: string
|
|
version: number
|
|
description: string
|
|
type: string
|
|
icon: string
|
|
category: string
|
|
baseClasses: string[]
|
|
credential: INodeParams
|
|
inputs: INodeParams[]
|
|
outputs: INodeOutputsValue[]
|
|
|
|
constructor() {
|
|
this.label = 'Google Sheets'
|
|
this.name = 'googleSheets'
|
|
this.version = 1.0
|
|
this.type = 'Document'
|
|
this.icon = 'google-sheets.svg'
|
|
this.category = 'Document Loaders'
|
|
this.description = `Load data from Google Sheets as documents`
|
|
this.baseClasses = [this.type]
|
|
this.credential = {
|
|
label: 'Connect Credential',
|
|
name: 'credential',
|
|
type: 'credential',
|
|
description: 'Google Sheets OAuth2 Credential',
|
|
credentialNames: ['googleSheetsOAuth2']
|
|
}
|
|
this.inputs = [
|
|
{
|
|
label: 'Select Spreadsheet',
|
|
name: 'spreadsheetIds',
|
|
type: 'asyncMultiOptions',
|
|
loadMethod: 'listSpreadsheets',
|
|
description: 'Select spreadsheet from your Google Drive',
|
|
refresh: true
|
|
},
|
|
{
|
|
label: 'Sheet Names',
|
|
name: 'sheetNames',
|
|
type: 'string',
|
|
description: 'Comma-separated list of sheet names to load. If empty, loads all sheets.',
|
|
placeholder: 'Sheet1, Sheet2',
|
|
optional: true
|
|
},
|
|
{
|
|
label: 'Range',
|
|
name: 'range',
|
|
type: 'string',
|
|
description: 'Range to load (e.g., A1:E10). If empty, loads entire sheet.',
|
|
placeholder: 'A1:E10',
|
|
optional: true
|
|
},
|
|
{
|
|
label: 'Include Headers',
|
|
name: 'includeHeaders',
|
|
type: 'boolean',
|
|
description: 'Whether to include the first row as headers',
|
|
default: true
|
|
},
|
|
{
|
|
label: 'Value Render Option',
|
|
name: 'valueRenderOption',
|
|
type: 'options',
|
|
description: 'How values should be represented in the output',
|
|
options: [
|
|
{
|
|
label: 'Formatted Value',
|
|
name: 'FORMATTED_VALUE'
|
|
},
|
|
{
|
|
label: 'Unformatted Value',
|
|
name: 'UNFORMATTED_VALUE'
|
|
},
|
|
{
|
|
label: 'Formula',
|
|
name: 'FORMULA'
|
|
}
|
|
],
|
|
default: 'FORMATTED_VALUE',
|
|
optional: true
|
|
},
|
|
{
|
|
label: 'Text Splitter',
|
|
name: 'textSplitter',
|
|
type: 'TextSplitter',
|
|
optional: true
|
|
},
|
|
{
|
|
label: 'Additional Metadata',
|
|
name: 'metadata',
|
|
type: 'json',
|
|
description: 'Additional metadata to be added to the extracted documents',
|
|
optional: true,
|
|
additionalParams: true
|
|
},
|
|
{
|
|
label: 'Omit Metadata Keys',
|
|
name: 'omitMetadataKeys',
|
|
type: 'string',
|
|
rows: 4,
|
|
description:
|
|
'Each document loader comes with a default set of metadata keys that are extracted from the document. You can use this field to omit some of the default metadata keys. The value should be a list of keys, seperated by comma. Use * to omit all metadata keys execept the ones you specify in the Additional Metadata field',
|
|
placeholder: 'key1, key2, key3.nestedKey1',
|
|
optional: true,
|
|
additionalParams: true
|
|
}
|
|
]
|
|
this.outputs = [
|
|
{
|
|
label: 'Document',
|
|
name: 'document',
|
|
description: 'Array of document objects containing metadata and pageContent',
|
|
baseClasses: [...this.baseClasses, 'json']
|
|
},
|
|
{
|
|
label: 'Text',
|
|
name: 'text',
|
|
description: 'Concatenated string from pageContent of documents',
|
|
baseClasses: ['string', 'json']
|
|
}
|
|
]
|
|
}
|
|
|
|
//@ts-ignore
|
|
loadMethods = {
|
|
async listSpreadsheets(nodeData: INodeData, options: ICommonObject): Promise<INodeOptionsValue[]> {
|
|
const returnData: INodeOptionsValue[] = []
|
|
|
|
try {
|
|
let credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
|
credentialData = await refreshOAuth2Token(nodeData.credential ?? '', credentialData, options)
|
|
const accessToken = getCredentialParam('access_token', credentialData, nodeData)
|
|
|
|
if (!accessToken) {
|
|
return returnData
|
|
}
|
|
|
|
// Query for Google Sheets files specifically
|
|
const query = "mimeType='application/vnd.google-apps.spreadsheet' and trashed = false"
|
|
|
|
const url = new URL('https://www.googleapis.com/drive/v3/files')
|
|
url.searchParams.append('q', query)
|
|
url.searchParams.append('pageSize', '100')
|
|
url.searchParams.append('fields', 'files(id, name, modifiedTime, webViewLink)')
|
|
url.searchParams.append('orderBy', 'modifiedTime desc')
|
|
|
|
const response = await fetch(url.toString(), {
|
|
headers: {
|
|
Authorization: `Bearer ${accessToken}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
})
|
|
|
|
if (!response.ok) {
|
|
console.error(`Failed to list spreadsheets: ${response.statusText}`)
|
|
return returnData
|
|
}
|
|
|
|
const data = await response.json()
|
|
|
|
for (const file of data.files) {
|
|
const obj: INodeOptionsValue = {
|
|
name: file.id,
|
|
label: file.name,
|
|
description: `Modified: ${new Date(file.modifiedTime).toLocaleDateString()}`
|
|
}
|
|
returnData.push(obj)
|
|
}
|
|
} catch (error) {
|
|
console.error('Error listing Google Sheets:', error)
|
|
}
|
|
|
|
return returnData
|
|
}
|
|
}
|
|
|
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
|
const _spreadsheetIds = nodeData.inputs?.spreadsheetIds as string
|
|
const sheetNames = nodeData.inputs?.sheetNames as string
|
|
const range = nodeData.inputs?.range as string
|
|
const includeHeaders = nodeData.inputs?.includeHeaders as boolean
|
|
const valueRenderOption = (nodeData.inputs?.valueRenderOption as string) || 'FORMATTED_VALUE'
|
|
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
|
|
const metadata = nodeData.inputs?.metadata
|
|
const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string
|
|
const output = nodeData.outputs?.output as string
|
|
|
|
let omitMetadataKeys: string[] = []
|
|
if (_omitMetadataKeys) {
|
|
omitMetadataKeys = _omitMetadataKeys.split(',').map((key) => key.trim())
|
|
}
|
|
|
|
if (!_spreadsheetIds) {
|
|
throw new Error('At least one spreadsheet is required')
|
|
}
|
|
|
|
let spreadsheetIds = convertMultiOptionsToStringArray(_spreadsheetIds)
|
|
|
|
let credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
|
credentialData = await refreshOAuth2Token(nodeData.credential ?? '', credentialData, options)
|
|
const accessToken = getCredentialParam('access_token', credentialData, nodeData)
|
|
|
|
if (!accessToken) {
|
|
throw new Error('No access token found in credential')
|
|
}
|
|
|
|
let docs: IDocument[] = []
|
|
|
|
try {
|
|
// Process each spreadsheet
|
|
for (const spreadsheetId of spreadsheetIds) {
|
|
try {
|
|
// Get spreadsheet metadata first
|
|
const spreadsheetMetadata = await this.getSpreadsheetMetadata(spreadsheetId, accessToken)
|
|
|
|
// Determine which sheets to load
|
|
let sheetsToLoad: string[] = []
|
|
if (sheetNames) {
|
|
sheetsToLoad = sheetNames.split(',').map((name) => name.trim())
|
|
} else {
|
|
// Get all sheet names from metadata
|
|
sheetsToLoad = spreadsheetMetadata.sheets?.map((sheet: any) => sheet.properties.title) || []
|
|
}
|
|
|
|
// Load data from each sheet
|
|
for (const sheetName of sheetsToLoad) {
|
|
const sheetRange = range ? `${sheetName}!${range}` : sheetName
|
|
const sheetData = await this.getSheetData(spreadsheetId, sheetRange, valueRenderOption, accessToken)
|
|
|
|
if (sheetData.values && sheetData.values.length > 0) {
|
|
const sheetDoc = this.convertSheetToDocument(
|
|
sheetData,
|
|
sheetName,
|
|
spreadsheetId,
|
|
spreadsheetMetadata,
|
|
includeHeaders
|
|
)
|
|
docs.push(sheetDoc)
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.warn(`Failed to process spreadsheet ${spreadsheetId}: ${error.message}`)
|
|
// Continue processing other spreadsheets even if one fails
|
|
}
|
|
}
|
|
|
|
// Apply text splitter if provided
|
|
if (textSplitter && docs.length > 0) {
|
|
docs = await textSplitter.splitDocuments(docs)
|
|
}
|
|
|
|
// Apply metadata transformations
|
|
if (metadata) {
|
|
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
|
|
docs = docs.map((doc) => ({
|
|
...doc,
|
|
metadata:
|
|
_omitMetadataKeys === '*'
|
|
? {
|
|
...parsedMetadata
|
|
}
|
|
: omit(
|
|
{
|
|
...doc.metadata,
|
|
...parsedMetadata
|
|
},
|
|
omitMetadataKeys
|
|
)
|
|
}))
|
|
} else {
|
|
docs = docs.map((doc) => ({
|
|
...doc,
|
|
metadata:
|
|
_omitMetadataKeys === '*'
|
|
? {}
|
|
: omit(
|
|
{
|
|
...doc.metadata
|
|
},
|
|
omitMetadataKeys
|
|
)
|
|
}))
|
|
}
|
|
} catch (error) {
|
|
throw new Error(`Failed to load Google Sheets data: ${error.message}`)
|
|
}
|
|
|
|
if (output === 'document') {
|
|
return docs
|
|
} else {
|
|
let finaltext = ''
|
|
for (const doc of docs) {
|
|
finaltext += `${doc.pageContent}\n`
|
|
}
|
|
return handleEscapeCharacters(finaltext, false)
|
|
}
|
|
}
|
|
|
|
private async getSpreadsheetMetadata(spreadsheetId: string, accessToken: string): Promise<any> {
|
|
const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}`
|
|
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
Authorization: `Bearer ${accessToken}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
})
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text()
|
|
throw new Error(`Failed to get spreadsheet metadata: ${response.status} ${response.statusText} - ${errorText}`)
|
|
}
|
|
|
|
return response.json()
|
|
}
|
|
|
|
private async getSheetData(spreadsheetId: string, range: string, valueRenderOption: string, accessToken: string): Promise<any> {
|
|
const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}/values/${encodeURIComponent(range)}`
|
|
const params = new URLSearchParams({
|
|
valueRenderOption,
|
|
dateTimeRenderOption: 'FORMATTED_STRING',
|
|
majorDimension: 'ROWS'
|
|
})
|
|
|
|
const response = await fetch(`${url}?${params}`, {
|
|
headers: {
|
|
Authorization: `Bearer ${accessToken}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
})
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text()
|
|
throw new Error(`Failed to get sheet data: ${response.status} ${response.statusText} - ${errorText}`)
|
|
}
|
|
|
|
return response.json()
|
|
}
|
|
|
|
private convertSheetToDocument(
|
|
sheetData: any,
|
|
sheetName: string,
|
|
spreadsheetId: string,
|
|
spreadsheetMetadata: any,
|
|
includeHeaders: boolean
|
|
): IDocument {
|
|
const values = sheetData.values || []
|
|
|
|
if (values.length === 0) {
|
|
return {
|
|
pageContent: '',
|
|
metadata: {
|
|
source: `Google Sheets: ${spreadsheetMetadata.properties?.title || 'Unknown'} - ${sheetName}`,
|
|
spreadsheetId,
|
|
sheetName,
|
|
spreadsheetTitle: spreadsheetMetadata.properties?.title,
|
|
range: sheetData.range,
|
|
rowCount: 0,
|
|
columnCount: 0
|
|
}
|
|
}
|
|
}
|
|
|
|
let headers: string[] = []
|
|
let dataRows: string[][] = []
|
|
|
|
if (includeHeaders && values.length > 0) {
|
|
headers = values[0] || []
|
|
dataRows = values.slice(1)
|
|
} else {
|
|
// Generate default headers like A, B, C, etc.
|
|
const maxColumns = Math.max(...values.map((row: any[]) => row.length))
|
|
headers = Array.from({ length: maxColumns }, (_, i) => String.fromCharCode(65 + i))
|
|
dataRows = values
|
|
}
|
|
|
|
// Convert to markdown table format
|
|
let content = ''
|
|
|
|
if (headers.length > 0) {
|
|
// Create header row
|
|
content += '| ' + headers.join(' | ') + ' |\n'
|
|
// Create separator row
|
|
content += '| ' + headers.map(() => '---').join(' | ') + ' |\n'
|
|
|
|
// Add data rows
|
|
for (const row of dataRows) {
|
|
const paddedRow = [...row]
|
|
// Pad row to match header length
|
|
while (paddedRow.length < headers.length) {
|
|
paddedRow.push('')
|
|
}
|
|
content += '| ' + paddedRow.join(' | ') + ' |\n'
|
|
}
|
|
}
|
|
|
|
return {
|
|
pageContent: content,
|
|
metadata: {
|
|
source: `Google Sheets: ${spreadsheetMetadata.properties?.title || 'Unknown'} - ${sheetName}`,
|
|
spreadsheetId,
|
|
sheetName,
|
|
spreadsheetTitle: spreadsheetMetadata.properties?.title,
|
|
spreadsheetUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}`,
|
|
range: sheetData.range,
|
|
rowCount: values.length,
|
|
columnCount: headers.length,
|
|
headers: includeHeaders ? headers : undefined,
|
|
totalDataRows: dataRows.length
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = { nodeClass: GoogleSheets_DocumentLoaders }
|