From 186f909d4e158f09bbfe44b613d7068cd6e3e56d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Sep 2025 16:34:53 +0000 Subject: [PATCH] Add documentation and manual testing for HuggingFace embedding fix Co-authored-by: HenryHengZJ <26460777+HenryHengZJ@users.noreply.github.com> --- HUGGINGFACE_EMBEDDING_FIX.md | 79 +++++++++++++++ .../components/test_huggingface_manual.js | 96 +++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 HUGGINGFACE_EMBEDDING_FIX.md create mode 100644 packages/components/test_huggingface_manual.js diff --git a/HUGGINGFACE_EMBEDDING_FIX.md b/HUGGINGFACE_EMBEDDING_FIX.md new file mode 100644 index 000000000..e4af90b42 --- /dev/null +++ b/HUGGINGFACE_EMBEDDING_FIX.md @@ -0,0 +1,79 @@ +# HuggingFace Embedding API Fix + +## Issue +HuggingFace embedding Inference API was not working and outputting "Error in Agent node: An error occurred while fetching the blob". This was caused by deprecated endpoints in the old langchain version, as referenced in [langchainjs PR #8237](https://github.com/langchain-ai/langchainjs/pull/8237). + +## Root Cause +The project was using a very outdated version of langchain (0.0.112) while the latest version was 0.3.34. The old version had deprecated HuggingFace API endpoints that were no longer working. + +## Solution +Updated the langchain ecosystem to the latest versions and switched to using the official HuggingFace embeddings implementation from `@langchain/community`. + +### Changes Made + +#### 1. Updated Dependencies (`packages/components/package.json`) +- `langchain`: `^0.0.112` → `^0.3.34` +- `@huggingface/inference`: `^2.6.1` → `^4.0.5` +- Added `@langchain/community`: `^0.3.56` +- Added `@langchain/core`: `^0.3.78` + +#### 2. Updated HuggingFace Embedding Implementation +**File**: `packages/components/nodes/embeddings/HuggingFaceInferenceEmbedding/HuggingFaceInferenceEmbedding.ts` + +**Before**: +```typescript +import { HuggingFaceInferenceEmbeddings, HuggingFaceInferenceEmbeddingsParams } from './core' +``` + +**After**: +```typescript +import { HuggingFaceInferenceEmbeddings, HuggingFaceInferenceEmbeddingsParams } from '@langchain/community/dist/embeddings/hf.cjs' +``` + +#### 3. Parameter Mapping Fix +Updated parameter mapping for the new API: +- `endpoint` → `endpointUrl` + +**Before**: +```typescript +if (endpoint) obj.endpoint = endpoint +``` + +**After**: +```typescript +if (endpoint) obj.endpointUrl = endpoint +``` + +#### 4. Removed Custom Implementation +- Renamed `core.ts` to `core.ts.backup` since we now use the official langchain community implementation +- The custom implementation is no longer needed as the official version has all the fixes + +### Testing +Created test scripts to verify the fix: +- `test_huggingface_manual.js` - Manual test with real API calls (requires valid API key) +- Automated tests confirm the embeddings class can be instantiated and configured correctly + +### Benefits +1. **Fixed deprecated endpoints**: Uses the latest HuggingFace API endpoints that are actively maintained +2. **Better maintenance**: Official implementation receives updates automatically with langchain updates +3. **Improved compatibility**: Updated dependencies resolve version conflicts +4. **Future-proof**: Latest versions will receive ongoing support and security updates + +## Usage +The HuggingFace embedding node works the same way from a user perspective: +1. Add HuggingFace API key +2. Optionally specify model name +3. Optionally specify custom endpoint URL + +No changes required in existing flows - the fix is backward compatible. + +## Testing +To test the fix manually with a real API key: + +```bash +cd packages/components +export HUGGINGFACEHUB_API_KEY=your_hf_token_here +node test_huggingface_manual.js +``` + +This will test actual embedding generation to confirm the deprecated endpoints issue is resolved. \ No newline at end of file diff --git a/packages/components/test_huggingface_manual.js b/packages/components/test_huggingface_manual.js new file mode 100644 index 000000000..679ac4d08 --- /dev/null +++ b/packages/components/test_huggingface_manual.js @@ -0,0 +1,96 @@ +#!/usr/bin/env node + +/** + * Manual test script for HuggingFace Embedding API + * + * To run this test: + * 1. Set your HuggingFace API key: export HUGGINGFACEHUB_API_KEY=your_api_key_here + * 2. Run: node test_huggingface_manual.js + * + * This will test the actual HuggingFace embedding API calls to verify the + * deprecated endpoints issue has been fixed. + */ + +const { HuggingFaceInferenceEmbeddings } = require('@langchain/community/dist/embeddings/hf.cjs'); + +async function testHuggingFaceEmbeddings() { + console.log('HuggingFace Embedding API Manual Test'); + console.log('=====================================\n'); + + const apiKey = process.env.HUGGINGFACEHUB_API_KEY; + + if (!apiKey) { + console.log('❌ No API key found. Please set HUGGINGFACEHUB_API_KEY environment variable.'); + console.log(' Example: export HUGGINGFACEHUB_API_KEY=hf_your_token_here'); + process.exit(1); + } + + console.log('✓ API key found, testing embedding functionality...\n'); + + try { + // Test 1: Basic embedding with default model + console.log('Test 1: Basic embedding generation'); + const embeddings1 = new HuggingFaceInferenceEmbeddings({ + apiKey: apiKey, + model: 'sentence-transformers/all-MiniLM-L6-v2' + }); + + const testText = 'Hello, this is a test sentence for embedding generation.'; + console.log(` Input text: "${testText}"`); + + const result = await embeddings1.embedQuery(testText); + console.log(` ✓ Generated embedding vector of length: ${result.length}`); + console.log(` ✓ First few values: [${result.slice(0, 5).map(v => v.toFixed(4)).join(', ')}...]`); + + // Test 2: Batch embedding + console.log('\nTest 2: Batch embedding generation'); + const documents = [ + 'This is the first document.', + 'Here is the second document.', + 'And this is the third one.' + ]; + + const batchResults = await embeddings1.embedDocuments(documents); + console.log(` ✓ Generated embeddings for ${batchResults.length} documents`); + console.log(` ✓ Each embedding has ${batchResults[0].length} dimensions`); + + // Test 3: Custom endpoint (if you have one) + if (process.env.HUGGINGFACE_ENDPOINT) { + console.log('\nTest 3: Custom endpoint'); + const embeddings3 = new HuggingFaceInferenceEmbeddings({ + apiKey: apiKey, + endpointUrl: process.env.HUGGINGFACE_ENDPOINT + }); + + const customResult = await embeddings3.embedQuery(testText); + console.log(` ✓ Custom endpoint generated embedding of length: ${customResult.length}`); + } else { + console.log('\nTest 3: Skipped (no custom endpoint provided)'); + console.log(' Set HUGGINGFACE_ENDPOINT environment variable to test custom endpoints'); + } + + console.log('\n✅ All tests passed! HuggingFace embedding API is working correctly.'); + console.log('\n🎉 The deprecated endpoints issue has been resolved by updating to:'); + console.log(' - langchain: 0.3.34'); + console.log(' - @langchain/community: 0.3.56'); + console.log(' - @langchain/core: 0.3.78'); + console.log(' - @huggingface/inference: 4.0.5'); + + } catch (error) { + console.error('\n❌ Test failed:'); + console.error(` Error: ${error.message}`); + + if (error.message.includes('401') || error.message.includes('unauthorized')) { + console.error(' This looks like an API key issue. Please check your HUGGINGFACEHUB_API_KEY.'); + } else if (error.message.includes('blob') || error.message.includes('fetch')) { + console.error(' This might be the original deprecated endpoints issue.'); + console.error(' Please verify all dependencies are updated correctly.'); + } + + console.error('\n Full error:', error); + process.exit(1); + } +} + +// Run the test +testHuggingFaceEmbeddings(); \ No newline at end of file