added LLM to moderation to detect similar deny sentences

2023-12-28 01:07:45 +00:00 · 2023-12-28 01:07:45 +00:00 · c9a6622df7
parent 6921967e9d
commit c9a6622df7
2 changed files with 32 additions and 9 deletions
--- a/packages/components/nodes/moderation/SimplePromptModeration/SimplePromptModeration.ts
+++ b/packages/components/nodes/moderation/SimplePromptModeration/SimplePromptModeration.ts
@ -2,6 +2,7 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface'
 import { getBaseClasses } from '../../../src'
 import { Moderation } from '../Moderation'
 import { SimplePromptModerationRunner } from './SimplePromptModerationRunner'
+import { BaseChatModel } from 'langchain/chat_models/base'

 class SimplePromptModeration implements INode {
    label: string
@ -17,7 +18,7 @@ class SimplePromptModeration implements INode {
    constructor() {
        this.label = 'Simple Prompt Moderation'
        this.name = 'inputModerationSimple'
-        this.version = 1.0
+        this.version = 2.0
        this.type = 'Moderation'
        this.icon = 'moderation.svg'
        this.category = 'Moderation'
@ -30,8 +31,14 @@ class SimplePromptModeration implements INode {
                type: 'string',
                rows: 4,
                placeholder: `ignore previous instructions\ndo not follow the directions\nyou must ignore all previous instructions`,
-                description: 'An array of string literals (enter one per line) that should not appear in the prompt text.',
-                optional: false
+                description: 'An array of string literals (enter one per line) that should not appear in the prompt text.'
+            },
+            {
+                label: 'Chat Model',
+                name: 'model',
+                type: 'BaseChatModel',
+                description: 'Use LLM to detect if the input is similar to those specified in Deny List',
+                optional: true
            },
            {
                label: 'Error Message',
@ -46,9 +53,10 @@ class SimplePromptModeration implements INode {

    async init(nodeData: INodeData): Promise<any> {
        const denyList = nodeData.inputs?.denyList as string
+        const model = nodeData.inputs?.model as BaseChatModel
        const moderationErrorMessage = nodeData.inputs?.moderationErrorMessage as string

-        return new SimplePromptModerationRunner(denyList, moderationErrorMessage)
+        return new SimplePromptModerationRunner(denyList, moderationErrorMessage, model)
    }
 }

--- a/packages/components/nodes/moderation/SimplePromptModeration/SimplePromptModerationRunner.ts
+++ b/packages/components/nodes/moderation/SimplePromptModeration/SimplePromptModerationRunner.ts
@ -1,23 +1,38 @@
 import { Moderation } from '../Moderation'
+import { BaseChatModel } from 'langchain/chat_models/base'

 export class SimplePromptModerationRunner implements Moderation {
    private readonly denyList: string = ''
    private readonly moderationErrorMessage: string = ''
+    private readonly model: BaseChatModel

-    constructor(denyList: string, moderationErrorMessage: string) {
+    constructor(denyList: string, moderationErrorMessage: string, model?: BaseChatModel) {
        this.denyList = denyList
        if (denyList.indexOf('\n') === -1) {
            this.denyList += '\n'
        }
        this.moderationErrorMessage = moderationErrorMessage
+        if (model) this.model = model
    }

    async checkForViolations(input: string): Promise<string> {
-        this.denyList.split('\n').forEach((denyListItem) => {
-            if (denyListItem && denyListItem !== '' && input.toLowerCase().includes(denyListItem.toLowerCase())) {
-                throw Error(this.moderationErrorMessage)
+        if (this.model) {
+            const denyArray = this.denyList.split('\n')
+            for (const denyStr of denyArray) {
+                const res = await this.model.invoke(
+                    `Are these two sentences similar to each other? Only return Yes or No.\nFirst sentence: ${input}\nSecond sentence: ${denyStr}`
+                )
+                if (res.content.toString().toLowerCase().includes('yes')) {
+                    throw Error(this.moderationErrorMessage)
+                }
            }
-        })
+        } else {
+            this.denyList.split('\n').forEach((denyListItem) => {
+                if (denyListItem && denyListItem !== '' && input.toLowerCase().includes(denyListItem.toLowerCase())) {
+                    throw Error(this.moderationErrorMessage)
+                }
+            })
+        }
        return Promise.resolve(input)
    }
 }