added LLM to moderation to detect similar deny sentences

This commit is contained in:
Henry 2023-12-28 01:07:45 +00:00
parent 6921967e9d
commit c9a6622df7
2 changed files with 32 additions and 9 deletions

View File

@ -2,6 +2,7 @@ import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src'
import { Moderation } from '../Moderation'
import { SimplePromptModerationRunner } from './SimplePromptModerationRunner'
import { BaseChatModel } from 'langchain/chat_models/base'
class SimplePromptModeration implements INode {
label: string
@ -17,7 +18,7 @@ class SimplePromptModeration implements INode {
constructor() {
this.label = 'Simple Prompt Moderation'
this.name = 'inputModerationSimple'
this.version = 1.0
this.version = 2.0
this.type = 'Moderation'
this.icon = 'moderation.svg'
this.category = 'Moderation'
@ -30,8 +31,14 @@ class SimplePromptModeration implements INode {
type: 'string',
rows: 4,
placeholder: `ignore previous instructions\ndo not follow the directions\nyou must ignore all previous instructions`,
description: 'An array of string literals (enter one per line) that should not appear in the prompt text.',
optional: false
description: 'An array of string literals (enter one per line) that should not appear in the prompt text.'
},
{
label: 'Chat Model',
name: 'model',
type: 'BaseChatModel',
description: 'Use LLM to detect if the input is similar to those specified in Deny List',
optional: true
},
{
label: 'Error Message',
@ -46,9 +53,10 @@ class SimplePromptModeration implements INode {
async init(nodeData: INodeData): Promise<any> {
const denyList = nodeData.inputs?.denyList as string
const model = nodeData.inputs?.model as BaseChatModel
const moderationErrorMessage = nodeData.inputs?.moderationErrorMessage as string
return new SimplePromptModerationRunner(denyList, moderationErrorMessage)
return new SimplePromptModerationRunner(denyList, moderationErrorMessage, model)
}
}

View File

@ -1,23 +1,38 @@
import { Moderation } from '../Moderation'
import { BaseChatModel } from 'langchain/chat_models/base'
export class SimplePromptModerationRunner implements Moderation {
private readonly denyList: string = ''
private readonly moderationErrorMessage: string = ''
private readonly model: BaseChatModel
constructor(denyList: string, moderationErrorMessage: string) {
constructor(denyList: string, moderationErrorMessage: string, model?: BaseChatModel) {
this.denyList = denyList
if (denyList.indexOf('\n') === -1) {
this.denyList += '\n'
}
this.moderationErrorMessage = moderationErrorMessage
if (model) this.model = model
}
async checkForViolations(input: string): Promise<string> {
this.denyList.split('\n').forEach((denyListItem) => {
if (denyListItem && denyListItem !== '' && input.toLowerCase().includes(denyListItem.toLowerCase())) {
throw Error(this.moderationErrorMessage)
if (this.model) {
const denyArray = this.denyList.split('\n')
for (const denyStr of denyArray) {
const res = await this.model.invoke(
`Are these two sentences similar to each other? Only return Yes or No.\nFirst sentence: ${input}\nSecond sentence: ${denyStr}`
)
if (res.content.toString().toLowerCase().includes('yes')) {
throw Error(this.moderationErrorMessage)
}
}
})
} else {
this.denyList.split('\n').forEach((denyListItem) => {
if (denyListItem && denyListItem !== '' && input.toLowerCase().includes(denyListItem.toLowerCase())) {
throw Error(this.moderationErrorMessage)
}
})
}
return Promise.resolve(input)
}
}