Switched to specifying Airtable fields to include rather than exclude - this helps reduce the amount of data fetched by the DocumentLoader when there are massive numbers of fields in an Airtable table.

This commit is contained in:
Darien Kindlund 2024-01-25 11:00:58 -05:00
parent 6f7b7408e1
commit 71f456af90
1 changed files with 39 additions and 28 deletions

View File

@ -65,13 +65,13 @@ class Airtable_DocumentLoaders implements INode {
optional: true optional: true
}, },
{ {
label: 'Exclude Field Names', label: 'Fields',
name: 'excludeFieldNames', name: 'fields',
type: 'string', type: 'string',
placeholder: 'Name, Assignee', placeholder: 'Name, Assignee, fld1u0qUz0SoOQ9Gg, fldew39v6LBN5CjUl',
optional: true, optional: true,
additionalParams: true, additionalParams: true,
description: 'Comma-separated list of field names to exclude' description: 'Comma-separated list of field names or IDs to include. Use field IDs if field names contain commas.'
}, },
{ {
label: 'Return All', label: 'Return All',
@ -102,7 +102,8 @@ class Airtable_DocumentLoaders implements INode {
const baseId = nodeData.inputs?.baseId as string const baseId = nodeData.inputs?.baseId as string
const tableId = nodeData.inputs?.tableId as string const tableId = nodeData.inputs?.tableId as string
const viewId = nodeData.inputs?.viewId as string const viewId = nodeData.inputs?.viewId as string
const excludeFieldNames = nodeData.inputs?.excludeFieldNames as string const fieldsInput = nodeData.inputs?.fields as string
const fields = fieldsInput ? fieldsInput.split(',').map((field) => field.trim()) : []
const returnAll = nodeData.inputs?.returnAll as boolean const returnAll = nodeData.inputs?.returnAll as boolean
const limit = nodeData.inputs?.limit as string const limit = nodeData.inputs?.limit as string
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
@ -115,7 +116,7 @@ class Airtable_DocumentLoaders implements INode {
baseId, baseId,
tableId, tableId,
viewId, viewId,
excludeFieldNames: excludeFieldNames ? excludeFieldNames.split(',').map((id) => id.trim()) : [], fields,
returnAll, returnAll,
accessToken, accessToken,
limit: limit ? parseInt(limit, 10) : 100 limit: limit ? parseInt(limit, 10) : 100
@ -156,7 +157,7 @@ interface AirtableLoaderParams {
tableId: string tableId: string
accessToken: string accessToken: string
viewId?: string viewId?: string
excludeFieldNames?: string[] fields?: string[]
limit?: number limit?: number
returnAll?: boolean returnAll?: boolean
} }
@ -179,7 +180,7 @@ class AirtableLoader extends BaseDocumentLoader {
public readonly viewId?: string public readonly viewId?: string
public readonly excludeFieldNames: string[] public readonly fields: string[]
public readonly accessToken: string public readonly accessToken: string
@ -187,12 +188,12 @@ class AirtableLoader extends BaseDocumentLoader {
public readonly returnAll: boolean public readonly returnAll: boolean
constructor({ baseId, tableId, viewId, excludeFieldNames = [], accessToken, limit = 100, returnAll = false }: AirtableLoaderParams) { constructor({ baseId, tableId, viewId, fields = [], accessToken, limit = 100, returnAll = false }: AirtableLoaderParams) {
super() super()
this.baseId = baseId this.baseId = baseId
this.tableId = tableId this.tableId = tableId
this.viewId = viewId this.viewId = viewId
this.excludeFieldNames = excludeFieldNames this.fields = fields
this.accessToken = accessToken this.accessToken = accessToken
this.limit = limit this.limit = limit
this.returnAll = returnAll this.returnAll = returnAll
@ -205,14 +206,14 @@ class AirtableLoader extends BaseDocumentLoader {
return this.loadLimit() return this.loadLimit()
} }
protected async fetchAirtableData(url: string, params: ICommonObject): Promise<AirtableLoaderResponse> { protected async fetchAirtableData(url: string, data: any): Promise<AirtableLoaderResponse> {
try { try {
const headers = { const headers = {
Authorization: `Bearer ${this.accessToken}`, Authorization: `Bearer ${this.accessToken}`,
'Content-Type': 'application/json', 'Content-Type': 'application/json',
Accept: 'application/json' Accept: 'application/json'
} }
const response = await axios.get(url, { params, headers }) const response = await axios.get(url, data, { headers })
return response.data return response.data
} catch (error) { } catch (error) {
throw new Error(`Failed to fetch ${url} from Airtable: ${error}`) throw new Error(`Failed to fetch ${url} from Airtable: ${error}`)
@ -222,12 +223,6 @@ class AirtableLoader extends BaseDocumentLoader {
private createDocumentFromPage(page: AirtableLoaderPage): Document { private createDocumentFromPage(page: AirtableLoaderPage): Document {
// Generate the URL // Generate the URL
const pageUrl = `https://api.airtable.com/v0/${this.baseId}/${this.tableId}/${page.id}` const pageUrl = `https://api.airtable.com/v0/${this.baseId}/${this.tableId}/${page.id}`
const fields = { ...page.fields }
// Exclude any specified fields
this.excludeFieldNames.forEach((id) => {
delete fields[id]
})
// Return a langchain document // Return a langchain document
return new Document({ return new Document({
@ -239,24 +234,40 @@ class AirtableLoader extends BaseDocumentLoader {
} }
private async loadLimit(): Promise<Document[]> { private async loadLimit(): Promise<Document[]> {
const params = { maxRecords: this.limit, view: this.viewId } const data = {
const data = await this.fetchAirtableData(`https://api.airtable.com/v0/${this.baseId}/${this.tableId}`, params) maxRecords: this.limit,
if (data.records.length === 0) { view: this.viewId
}
if (this.fields.length > 0) {
data.fields = this.fields
}
const response = await this.fetchAirtableData(`https://api.airtable.com/v0/${this.baseId}/${this.tableId}`, data)
if (response.records.length === 0) {
return [] return []
} }
return data.records.map((page) => this.createDocumentFromPage(page)) return response.records.map((page) => this.createDocumentFromPage(page))
} }
private async loadAll(): Promise<Document[]> { private async loadAll(): Promise<Document[]> {
const params: ICommonObject = { pageSize: 100, view: this.viewId } const data = {
let data: AirtableLoaderResponse pageSize: 100,
view: this.viewId
}
if (this.fields.length > 0) {
data.fields = this.fields
}
let response: AirtableLoaderResponse
let returnPages: AirtableLoaderPage[] = [] let returnPages: AirtableLoaderPage[] = []
do { do {
data = await this.fetchAirtableData(`https://api.airtable.com/v0/${this.baseId}/${this.tableId}`, params) response = await this.fetchAirtableData(`https://api.airtable.com/v0/${this.baseId}/${this.tableId}`, data)
returnPages.push.apply(returnPages, data.records) returnPages.push.apply(returnPages, response.records)
params.offset = data.offset params.offset = response.offset
} while (data.offset !== undefined) } while (response.offset !== undefined)
return returnPages.map((page) => this.createDocumentFromPage(page)) return returnPages.map((page) => this.createDocumentFromPage(page))
} }
} }