From 1e18c289cb06b8cc4401f53d99b31f689b059b13 Mon Sep 17 00:00:00 2001 From: chungyau97 Date: Sun, 16 Apr 2023 00:15:12 +0700 Subject: [PATCH] add CSV loader --- .../nodes/documentloaders/Csv/Csv.png | Bin 0 -> 8498 bytes .../nodes/documentloaders/Csv/Csv.ts | 59 ++++++++++++++++++ packages/components/package.json | 1 + 3 files changed, 60 insertions(+) create mode 100644 packages/components/nodes/documentloaders/Csv/Csv.png create mode 100644 packages/components/nodes/documentloaders/Csv/Csv.ts diff --git a/packages/components/nodes/documentloaders/Csv/Csv.png b/packages/components/nodes/documentloaders/Csv/Csv.png new file mode 100644 index 0000000000000000000000000000000000000000..41b84e16a04dd8b6b6eb1606f4fa9f5317bffc96 GIT binary patch literal 8498 zcmeHsWmJ?=+w~bb1Zf26lt!eHR7y%gLRz}JL&iWtL|Oqsx?$*!A*DgOLApV@d1rXm zdVjq?zMtQ>zIWD|b)9qOy7s>IzRx|g?lU1TRps$;C~*J)c!~Az|Vh$*&{!W&lr?(HDo5c5%>h*+d>lhtw4aI|pqeCuKkJUu$7iZeD&tVG*pjq_nKOqOz*GrnauWp|J_x+|t_C-toJ$tGlPSuYX|h z&(QG5=-Bwg)PYk$Q}khV|hU8fOlF2*Cz@#@wOC)kgY^@7nfmZ=c=j<+YIKz@+$EG?!(6uTC5a zX@lofUIwECpX%@Exjf7GqF`>0;V$Ey97m5Ey=|t}*z4ft4OnR;oln=q)wq)M$?b^7 z=hY@{7MB3k#uK^KCStzZsv$=q-rEvug)2!>t)A+u?{nYEe`OnW|G&Ez?F47@j1uCZ86`;a29UziY zNe^a%^f4b{#t5K7-z&HSe``4rz?dJzyt)q_-~nnF9tgvLp2VR8IaH}>98{1FNr@!{a4TTN4tO5| zN)*tG0UyUkCYz67BC~#P0c2Lg3V{9rgOv<-$(l@nml7mkAhMy!7{CH`q3({roFClP zz|fH4vKxXd-H}C>zLfb-4L-8wGYTk!r1S&09pFQ7B9e%;@51#81WBVUdlxnpkj?X> zf**JQ4;4WpWdNX)z)TPY3C~)D3=u*I7Iq+C&;bSB8x=WiJ*LK+UKNu#iqSvSA>DL@y>XSfCAiGn7j_i&G1>r!1J1de!)SU|%?#Cd=bghjLNWo!VXtFd=U#sA}AdR(o^NTDScSdOWU1-8!<|w4sevBvf7)^5IBc zMc%j}edAT>VZbED^HHTKr}&vGqcZZx$8jfyjK}|eg$DbmT$Ps5=UY2yBHrY@q{1Sr#fBt+ZKY@wyR_KC20oLuAjs}Pb<@ebC_}}cwkTew3 z>E8!CQUnbJ)PK1l>!A;P^b8D8ipl7YD#vPj0^C?+*6q#7qUXNmNM$z}2$*+X+b`}_ zr+TcY2b!{vhS*LVG#aW&91e{z*YPdai5(BejUKX!4>!49sf;R-6u#srdbL&L9~9eFI>f0+GH>=X8>0YJ70?LiF1QzRrX!A%jOtabwXN?pm62HHo zJ)B*ZoW~BdjJxQn_K2O#R$s4$^caLICd~?0iol#Jo$Ll=e}9;|@Zf#6H2c{Kx3x>= z{nsUj&1P-3o||NC$_dGm&cvW1v8^LttA)OkS~CU*nt#?>3u5GJhwKN{J&)W?h2y#S zTrv|S0CU&CB!B0*g86&7vLQR)UR|AmNinDVuJ>~|y9-oJg9a&nnG}USEs5oc7|oa@ z8Il`st7cz3{pe%tG9`h}0+P-}5js0u6Be)HZw zTTS58fYz&tHNURsRPKc3x4}E)Pj|3ExI}`{!BbMX#HlMHqHFu3CF389>YMQ>CWogH z?p*hg*&jd(7cvo6~<6*@XzT&{qwo!Qvm<#alpzRM>cldt#mv-1dHwwvE00R zRxFk-k(?(9a9?xk{V`l{(+JZKa6CIc5MCQB4DW%iv~^5J?gA)gSS1w(DJsO^E< z-XCfrK@wa1Zbp1zP1#}~v75cC>G;`8SUv-DT|Swy@<&&9A(5}M3LPot39FztOg;KG zRJy@|hhf@i4rb+^u% zdC3@{vM*r29V?)7P%~hgH84x@@g-X`IFzvbo5)hBe^k&9{Ipfx+!XqA`#!SUtc3Op zi;rjiEX*6%x2qwxz6Za*GP2T?kQVt(%7;6piuq@&keH1T$MRKI1}gAn&*^Cz6<4@6 zk{$i~1#ZpWEA00v8Wfquunr%iFRZ=5Ii$5iE2u#6Wd1PII?Wf})U1d`LHoc!K@Xg0 zzKRdv5OJE74y^P2I4VN%JBZqYpe59#uBQThK}s|897a5GXwQDtch4o4RU|gNYCCA3 zqhxfEq825>CwAyc&mYCdu&8Yw^4;`AHA`EZwX)k`0#5|&jIPf6NT#f9Z71Mx=%!AY ze`s;uNg)^ect{j!FZS8lBtPE`eZf4tKW}a;*PMbkj2hTbU?#g($x2uYOmGmX3z5-y$1wGHX6c7s)^ej`$EnXyet|TK*D3> zQZs$+!?PPq+0#Or`NR3O+i1hBMhv%=A0RX1`b!XhNXh9htzUl~b7tup*(%TJHn8E4 z%QA^P3I4ql_-3y8we9Wo;cwLIbu?Fkvt%Y|yrsgpH38EVN#d(ucXH-fI}M-)PZb%@Z0Hb4je5ClSGPF{<-<+T(<~6gIIKq>HjpH`|{cP&ynxcXLeGy2hI* zIG|j_W4C$r{%@6699Pw%%%=IIE(&k!=Z&CY#UhMQ{|@s=6z5ID%F5}9=3tr)EI8rK z>#&x?%r)tH6sem%&E;QmyhWbH)E`aTP9(;Np#64r6`tJ#AHC?R8=|f>h?YKVO+EJd zGN>Osl8PQ4}ZEvZHLP+Zyt~WW78T z`0pRVI$`1-tvXX>wANT~t>kt;4pMXJr*HITYsbIwazA}#B5s-<4tQB1+%IE4-N=mN z#X{h@(s;|159#b}&Q$4d?V7&*V^Q zL6up1MNzXpQ+jf!yhEmUq5KS}7LP&M4nCHW3@+&VT@CVew5!D)^XYXVAnRX2tv3$_ z-qailn)g$np3QtC_(xR!E&4NXqD(KCpb7-_J;LvKV!WHaJ71cLzYh-)+Z43L0@++0 zIG4(AZ#Ibgv5p>V&dtB|vMhAqFr01TLx{trGBtR;`b&V*u|2;QGs&VJwZvg|@%zSm zgewBAS|ie#;HQPb%TrF-T?*Twu~om9q1sdvc?gA0cDZxVBHpb~N-E8zq(7bZ*9@i# z`RoWxr|?1;2`Zc_tf0~$tKAP%EbP<-*s9T;jjr(#xPAeB_s~JE>KF^>dE05~5|2dAd%L$riuOZ< zP?~$s>SQ25#nUbFEgty`Z>h%?Zlw1v{?Qx0yP@EMSkYB#JmsHuwiGj9t?nJZWU7YI zdyu#}bkhmk%6CJIIjzTIWYS~9Vf^*pA8ZxcvG2_hv$c&+8qVcG6ii=B$$iEGs?vh{ zkD8UZaX*Ds{WboYD~bD!CX*S=^KI8ZWCu|=)z!NBa8x~g{~*5IKxqlI_Iy15Y-T{q zyKRS03it*7#nIW=z$p>e2~P=((|rDTVi_lu7jN&rq@(F0MBD zpbSPk+a3qv10#+qG#&dSEsWB3TM`mn;YTdVhDYhifR`n3EQNr*qR1A7!I#=)^^(41 z2YPt%Nohu3-Ur1ge?mF^C5qJLi#irKJO9ideufT9E4WQ#G0deP3-I3j)eRy6Q!J*i z#adO2(4lsXI@BG;v;6(eU;OAPwG+x~gD6tITMBe3@aLGx?FO@dA*(FCMATlW);Pcv zEz2?(@jUjGiNIK(?DpS5@`Md^$51Rd-1n6>X!8``DtU=26|e5`jiX}Z+E~Ttt^^fTyHUn{zpK{R7I4KnM-ji82&vNCC zlUVz0_b>c}lt)66%hl`B7mh=u(D?@zx+_G$tu*_S(EB>OLK}%URwXq=gIy&rLpjrOX)AsrnKwtoPChz9ow4B;ML@F(ln(nkN$e{M|dYciDqF?O(!m z7=2K#vYr602OeaRGzW!SKjz;)vMX@mG+bRuqjseszp969+>;9UoOF}%H9Q=)@`wce zTd_g#rTT9pFQ}DVCrKgVE+%^?u#xd?dc~Ch_a^z{Vj=OOz2-1uxD_7!@}AgRHeEug zuJj%4AQRP+KHRTH)w&T@sVf~B0$^JB^BiMeOc|%dj0JB;$yZ7C@>s8%0og<}mEnh= z->bO6zh5Tf7v{7rE%$%}9sx|xubJw;^nKgqJs(u5FjAABg?1_Lej2;H5&Lr%l}(j}70O%KiU!oV8uedIm&P4*i8#2qu`{_mmbjNI@eGkL~j zd;VrsqaVhiuNqBjQTNGytE&xZ_3)U=rDN6c&@t+o*^<>ckf9D_IC1b4P3&{IIYph8Ezlu%{@|!u?)2j5cAfy;wPn>81zPjeHoqLhICo~OGM7P5 zOxSS{UDX=}YFh75w^eaN(D{hIqGCtrkMWX`FSaEnk8F>Q9y7@$1|RwQ_Rg^-9OvPx z1Nnr{!VIi3Kfn-D?Mh-C zUAv?dVnV&Ne&ZXJ%Yst) zA$kw(cl=L{-d9Mj1!Me@*E$Qc1jhEX{;C7ExX|jR**T{1SrprV-Aqx|b)KncPw=Bd zfTBb}8+=;vt?O|g$9E`nibHI=Q*ktS`~yMAlcQcq(~4V*6V6bN(iu4tH`yKrYZ?@2 z^0J=4v=Vq=+u&Jd`D%psmXaDz6#R*-N0V}S+c^17G!Cuq%fR$*N5h9eHqk$&DFH@{ zY``i5udYNgPjOLw4c3SGV5ixmt_&?Rvu7jMIDk8DmkX%x98HiXWi#Ab9)5ptkl!i< zWeKYyuX8;T#JR^ZPqYmyjhjGx@h|>^;f|bX2z>jOQ>3Vh@1Nw{eY{QCN7#G5K|s0< z>#4_(s_c`UO0TG{PAc%8 zYj<8ZIQy^pk=tEapDLWF6RLVR+`ygJ z>mtb^Daoqhn%jc6hvMJMr7rOuehAbL_mR6%OOg+Ir|{IQ%RQ6WuNvFURT-F`Fxkan zZC2x;DjnQ{NZH`^d`;^fx1TAQ&rw|ZeBZ0spiIw(w0M~1I|ctlJNGyHsgi+U>_aX8 zf9F%DmTHuLYV#Um+gOD|-#rY9d=$T;0<2s!GIe56Yi{&&r(g7Mx*Ss1Mpw7XSV2-Y z@*u@1_6HJLx#PM0X#srQ@pTo+=ui=j!9WR|1!2R?;XGBxuKxbK*Oi_Sr+*u*8qiqX z(l2>#wKLXdNwb8$kwK77yBsnoXkFIRyl^v&6C?WDHZRJ9JZDtLk>a{F{LSX}rn#LR z@0jY0BWcGPtdDbOq7du`RJP6H7}Um(3OB!(%+<(ofdEmD{1+%!_)Kdg+G|eLiD!GB z3r-E=kQDyd+i%fV-ZMr0MtYvEh~2PLY!|);Nh!b_VQzDb_p9ggEaA?!3@h0w;qj&g zICFceFWzYukuV5OmEci}y3oT=V^&Kl&wyrhHh;eQ8$TAN6+J5M)_ToWfJp^34 zd`Zi&s~Z^?3e`7oo$$x_JbiTjXoV0w^7X2YDmZ)N>=2c3HSeY@gV9*J;ii!cfv!it zU>{msds0&vb#)pwoZ#p<(DI`3DH@>OgBkmsttwr|XhqyG95QDvVCy;1rq`o^KMv>V zZh_pnF}vy!xA~quFPd5O-U+Qe2EkgRQAvqOyq`4Y^)Ai_xw!79;?3(FFo6K8 zoq(1M=A6c+KPk+|%x{`ZE4Hc3VvWU`ejc!c=kup*49wDzT*Oym3Y%Jk75|F8(95(o z-c{a8dp`haWlFUj)qR7cJZO_jV)o|qHV$Fh0VkH~shQhPCgQRoXV2noeME09?l)Eb zI=ej#!tTQnE;URWXHm~6oAEUg=-vWX=jAtM%Sk7WXy=Z!rJ;4bW>b0Z+_yxH%F=<8m%ccr^|_g~cZmBg-r(HzJQ0TX(Jv)kKi=o7m@-lz4;nz ztTz!akowQFAIpiJoEa*%bB@{Lcr@Qi}kf0XhI60P8C*oU`Ty1`(f~7Fp!8cN zs(d6GNWDP+@BXb)(U_P7OkYFkx_#-1s0% zi{7c=mPN)5T33EH;K)2>vxy>-bD`m~;IH?tRkL)so8`yJ2Y u(oR+NV%^$;V!ERC6o&GwPsubR^KVgWi2vFX5{O|V#VX3GJ};6n{_tOAR;!Kx literal 0 HcmV?d00001 diff --git a/packages/components/nodes/documentloaders/Csv/Csv.ts b/packages/components/nodes/documentloaders/Csv/Csv.ts new file mode 100644 index 000000000..db1a3bac6 --- /dev/null +++ b/packages/components/nodes/documentloaders/Csv/Csv.ts @@ -0,0 +1,59 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { CSVLoader } from "langchain/document_loaders/fs/csv"; + +class Csv_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Csv File' + this.name = 'csvFile' + this.type = 'Document' + this.icon = 'Csv.png' + this.category = 'Document Loaders' + this.description = `Load data from CSV files` + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Csv File', + name: 'csvFile', + type: 'file', + fileType: '.csv' + }, + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const csvFileBase64 = nodeData.inputs?.csvFile as string + const splitDataURI = csvFileBase64.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + + const blob = new Blob([bf]) + const loader = new CSVLoader(blob) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + return docs + } else { + const docs = await loader.load() + return docs + } + } +} + +module.exports = { nodeClass: Csv_DocumentLoaders } diff --git a/packages/components/package.json b/packages/components/package.json index d2d680376..d75cda9d5 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -21,6 +21,7 @@ "@pinecone-database/pinecone": "^0.0.12", "axios": "^0.27.2", "chromadb": "^1.3.1", + "d3-dsv": "2", "dotenv": "^16.0.0", "express": "^4.17.3", "form-data": "^4.0.0",