fix: Upgrade Hugging Face Inference API to support Inference Providers (#5454)

- Upgrade @huggingface/inference from v2.6.1 to v4.13.2
- Update ChatHuggingFace to use InferenceClient with chatCompletion API
- Update HuggingFaceInference (LLM) to use v4 HfInference with Inference Providers
- Update HuggingFaceInferenceEmbedding to use v4 HfInference
- Add endpoint handling logic to ignore custom endpoints for provider-based models
- Add improved error handling and validation for API keys
- Update UI descriptions to guide users on proper configuration

Fixes #5161

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Siddharth Chauhan 2025-11-25 17:13:36 +05:30 committed by GitHub
parent 097404f24a
commit 0cc7b3036e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 192 additions and 82 deletions

View File

@ -1569,16 +1569,20 @@ class Agent_Agentflow implements INode {
for await (const chunk of await llmNodeInstance.stream(messages, { signal: abortController?.signal })) {
if (sseStreamer) {
let content = ''
if (Array.isArray(chunk.content) && chunk.content.length > 0) {
if (typeof chunk === 'string') {
content = chunk
} else if (Array.isArray(chunk.content) && chunk.content.length > 0) {
const contents = chunk.content as MessageContentText[]
content = contents.map((item) => item.text).join('')
} else {
} else if (chunk.content) {
content = chunk.content.toString()
}
sseStreamer.streamTokenEvent(chatId, content)
}
response = response.concat(chunk)
const messageChunk = typeof chunk === 'string' ? new AIMessageChunk(chunk) : chunk
response = response.concat(messageChunk)
}
} catch (error) {
console.error('Error during streaming:', error)

View File

@ -241,8 +241,11 @@ class HumanInput_Agentflow implements INode {
if (isStreamable) {
const sseStreamer: IServerSideEventStreamer = options.sseStreamer as IServerSideEventStreamer
for await (const chunk of await llmNodeInstance.stream(messages)) {
sseStreamer.streamTokenEvent(chatId, chunk.content.toString())
response = response.concat(chunk)
const content = typeof chunk === 'string' ? chunk : chunk.content.toString()
sseStreamer.streamTokenEvent(chatId, content)
const messageChunk = typeof chunk === 'string' ? new AIMessageChunk(chunk) : chunk
response = response.concat(messageChunk)
}
humanInputDescription = response.content as string
} else {

View File

@ -824,16 +824,20 @@ class LLM_Agentflow implements INode {
for await (const chunk of await llmNodeInstance.stream(messages, { signal: abortController?.signal })) {
if (sseStreamer) {
let content = ''
if (Array.isArray(chunk.content) && chunk.content.length > 0) {
if (typeof chunk === 'string') {
content = chunk
} else if (Array.isArray(chunk.content) && chunk.content.length > 0) {
const contents = chunk.content as MessageContentText[]
content = contents.map((item) => item.text).join('')
} else {
} else if (chunk.content) {
content = chunk.content.toString()
}
sseStreamer.streamTokenEvent(chatId, content)
}
response = response.concat(chunk)
const messageChunk = typeof chunk === 'string' ? new AIMessageChunk(chunk) : chunk
response = response.concat(messageChunk)
}
} catch (error) {
console.error('Error during streaming:', error)

View File

@ -41,15 +41,17 @@ class ChatHuggingFace_ChatModels implements INode {
label: 'Model',
name: 'model',
type: 'string',
description: 'If using own inference endpoint, leave this blank',
placeholder: 'gpt2'
description:
'Model name (e.g., deepseek-ai/DeepSeek-V3.2-Exp:novita). If model includes provider (:) or using router endpoint, leave Endpoint blank.',
placeholder: 'deepseek-ai/DeepSeek-V3.2-Exp:novita'
},
{
label: 'Endpoint',
name: 'endpoint',
type: 'string',
placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2',
description: 'Using your own inference endpoint',
description:
'Custom inference endpoint (optional). Not needed for models with providers (:) or router endpoints. Leave blank to use Inference Providers.',
optional: true
},
{
@ -124,6 +126,15 @@ class ChatHuggingFace_ChatModels implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const huggingFaceApiKey = getCredentialParam('huggingFaceApiKey', credentialData, nodeData)
if (!huggingFaceApiKey) {
console.error('[ChatHuggingFace] API key validation failed: No API key found')
throw new Error('HuggingFace API key is required. Please configure it in the credential settings.')
}
if (!huggingFaceApiKey.startsWith('hf_')) {
console.warn('[ChatHuggingFace] API key format warning: Key does not start with "hf_"')
}
const obj: Partial<HFInput> = {
model,
apiKey: huggingFaceApiKey

View File

@ -56,9 +56,9 @@ export class HuggingFaceInference extends LLM implements HFInput {
this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
this.endpointUrl = fields?.endpointUrl
this.includeCredentials = fields?.includeCredentials
if (!this.apiKey) {
if (!this.apiKey || this.apiKey.trim() === '') {
throw new Error(
'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.'
'Please set an API key for HuggingFace Hub. Either configure it in the credential settings in the UI, or set the environment variable HUGGINGFACEHUB_API_KEY.'
)
}
}
@ -68,19 +68,21 @@ export class HuggingFaceInference extends LLM implements HFInput {
}
invocationParams(options?: this['ParsedCallOptions']) {
return {
model: this.model,
parameters: {
// make it behave similar to openai, returning only the generated text
return_full_text: false,
temperature: this.temperature,
max_new_tokens: this.maxTokens,
stop: options?.stop ?? this.stopSequences,
top_p: this.topP,
top_k: this.topK,
repetition_penalty: this.frequencyPenalty
}
// Return parameters compatible with chatCompletion API (OpenAI-compatible format)
const params: any = {
temperature: this.temperature,
max_tokens: this.maxTokens,
stop: options?.stop ?? this.stopSequences,
top_p: this.topP
}
// Include optional parameters if they are defined
if (this.topK !== undefined) {
params.top_k = this.topK
}
if (this.frequencyPenalty !== undefined) {
params.frequency_penalty = this.frequencyPenalty
}
return params
}
async *_streamResponseChunks(
@ -88,51 +90,109 @@ export class HuggingFaceInference extends LLM implements HFInput {
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): AsyncGenerator<GenerationChunk> {
const hfi = await this._prepareHFInference()
const stream = await this.caller.call(async () =>
hfi.textGenerationStream({
...this.invocationParams(options),
inputs: prompt
})
)
for await (const chunk of stream) {
const token = chunk.token.text
yield new GenerationChunk({ text: token, generationInfo: chunk })
await runManager?.handleLLMNewToken(token ?? '')
// stream is done
if (chunk.generated_text)
yield new GenerationChunk({
text: '',
generationInfo: { finished: true }
try {
const client = await this._prepareHFInference()
const stream = await this.caller.call(async () =>
client.chatCompletionStream({
model: this.model,
messages: [{ role: 'user', content: prompt }],
...this.invocationParams(options)
})
)
for await (const chunk of stream) {
const token = chunk.choices[0]?.delta?.content || ''
if (token) {
yield new GenerationChunk({ text: token, generationInfo: chunk })
await runManager?.handleLLMNewToken(token)
}
// stream is done when finish_reason is set
if (chunk.choices[0]?.finish_reason) {
yield new GenerationChunk({
text: '',
generationInfo: { finished: true }
})
break
}
}
} catch (error: any) {
console.error('[ChatHuggingFace] Error in _streamResponseChunks:', error)
// Provide more helpful error messages
if (error?.message?.includes('endpointUrl') || error?.message?.includes('third-party provider')) {
throw new Error(
`Cannot use custom endpoint with model "${this.model}" that includes a provider. Please leave the Endpoint field blank in the UI. Original error: ${error.message}`
)
}
throw error
}
}
/** @ignore */
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
const hfi = await this._prepareHFInference()
const args = { ...this.invocationParams(options), inputs: prompt }
const res = await this.caller.callWithOptions({ signal: options.signal }, hfi.textGeneration.bind(hfi), args)
return res.generated_text
try {
const client = await this._prepareHFInference()
// Use chatCompletion for chat models (v4 supports conversational models via Inference Providers)
const args = {
model: this.model,
messages: [{ role: 'user', content: prompt }],
...this.invocationParams(options)
}
const res = await this.caller.callWithOptions({ signal: options.signal }, client.chatCompletion.bind(client), args)
const content = res.choices[0]?.message?.content || ''
if (!content) {
console.error('[ChatHuggingFace] No content in response:', JSON.stringify(res))
throw new Error(`No content received from HuggingFace API. Response: ${JSON.stringify(res)}`)
}
return content
} catch (error: any) {
console.error('[ChatHuggingFace] Error in _call:', error.message)
// Provide more helpful error messages
if (error?.message?.includes('endpointUrl') || error?.message?.includes('third-party provider')) {
throw new Error(
`Cannot use custom endpoint with model "${this.model}" that includes a provider. Please leave the Endpoint field blank in the UI. Original error: ${error.message}`
)
}
if (error?.message?.includes('Invalid username or password') || error?.message?.includes('authentication')) {
throw new Error(
`HuggingFace API authentication failed. Please verify your API key is correct and starts with "hf_". Original error: ${error.message}`
)
}
throw error
}
}
/** @ignore */
private async _prepareHFInference() {
const { HfInference } = await HuggingFaceInference.imports()
const hfi = new HfInference(this.apiKey, {
includeCredentials: this.includeCredentials
})
return this.endpointUrl ? hfi.endpoint(this.endpointUrl) : hfi
if (!this.apiKey || this.apiKey.trim() === '') {
console.error('[ChatHuggingFace] API key validation failed: Empty or undefined')
throw new Error('HuggingFace API key is required. Please configure it in the credential settings.')
}
const { InferenceClient } = await HuggingFaceInference.imports()
// Use InferenceClient for chat models (works better with Inference Providers)
const client = new InferenceClient(this.apiKey)
// Don't override endpoint if model uses a provider (contains ':') or if endpoint is router-based
// When using Inference Providers, endpoint should be left blank - InferenceClient handles routing automatically
if (
this.endpointUrl &&
!this.model.includes(':') &&
!this.endpointUrl.includes('/v1/chat/completions') &&
!this.endpointUrl.includes('router.huggingface.co')
) {
return client.endpoint(this.endpointUrl)
}
// Return client without endpoint override - InferenceClient will use Inference Providers automatically
return client
}
/** @ignore */
static async imports(): Promise<{
HfInference: typeof import('@huggingface/inference').HfInference
InferenceClient: typeof import('@huggingface/inference').InferenceClient
}> {
try {
const { HfInference } = await import('@huggingface/inference')
return { HfInference }
const { InferenceClient } = await import('@huggingface/inference')
return { InferenceClient }
} catch (e) {
throw new Error('Please install huggingface as a dependency with, e.g. `pnpm install @huggingface/inference`')
}

View File

@ -23,24 +23,22 @@ export class HuggingFaceInferenceEmbeddings extends Embeddings implements Huggin
this.model = fields?.model ?? 'sentence-transformers/distilbert-base-nli-mean-tokens'
this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
this.endpoint = fields?.endpoint ?? ''
this.client = new HfInference(this.apiKey)
if (this.endpoint) this.client.endpoint(this.endpoint)
const hf = new HfInference(this.apiKey)
// v4 uses Inference Providers by default; only override if custom endpoint provided
this.client = this.endpoint ? hf.endpoint(this.endpoint) : hf
}
async _embed(texts: string[]): Promise<number[][]> {
// replace newlines, which can negatively affect performance.
const clean = texts.map((text) => text.replace(/\n/g, ' '))
const hf = new HfInference(this.apiKey)
const obj: any = {
inputs: clean
}
if (this.endpoint) {
hf.endpoint(this.endpoint)
} else {
if (!this.endpoint) {
obj.model = this.model
}
const res = await this.caller.callWithOptions({}, hf.featureExtraction.bind(hf), obj)
const res = await this.caller.callWithOptions({}, this.client.featureExtraction.bind(this.client), obj)
return res as number[][]
}

View File

@ -78,6 +78,8 @@ export class HuggingFaceInference extends LLM implements HFInput {
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
const { HfInference } = await HuggingFaceInference.imports()
const hf = new HfInference(this.apiKey)
// v4 uses Inference Providers by default; only override if custom endpoint provided
const hfClient = this.endpoint ? hf.endpoint(this.endpoint) : hf
const obj: any = {
parameters: {
// make it behave similar to openai, returning only the generated text
@ -90,12 +92,10 @@ export class HuggingFaceInference extends LLM implements HFInput {
},
inputs: prompt
}
if (this.endpoint) {
hf.endpoint(this.endpoint)
} else {
if (!this.endpoint) {
obj.model = this.model
}
const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), obj)
const res = await this.caller.callWithOptions({ signal: options.signal }, hfClient.textGeneration.bind(hfClient), obj)
return res.generated_text
}

View File

@ -43,7 +43,7 @@
"@google-cloud/storage": "^7.15.2",
"@google/generative-ai": "^0.24.0",
"@grpc/grpc-js": "^1.10.10",
"@huggingface/inference": "^2.6.1",
"@huggingface/inference": "^4.13.2",
"@langchain/anthropic": "0.3.33",
"@langchain/aws": "^0.1.11",
"@langchain/baidu-qianfan": "^0.1.0",

File diff suppressed because one or more lines are too long