ResponsibleAI - Input Moderation - Simplifying the Options for OpenAI Moderation.

2023-11-22 19:19:59 +05:30 · 2023-11-22 19:19:59 +05:30 · 619fb4f5c1
parent ad8281e553
commit 619fb4f5c1
2 changed files with 5 additions and 144 deletions
--- a/packages/components/nodes/responsibleAI/InputModeration/OpenAIModeration.ts
+++ b/packages/components/nodes/responsibleAI/InputModeration/OpenAIModeration.ts
@ -24,26 +24,6 @@ class OpenAIModeration implements INode {
        this.description = 'Check whether content complies with OpenAI usage policies.'
        this.baseClasses = [this.type, ...getBaseClasses(Moderation)]
        this.inputs = [
-            {
-                label: 'Moderation Checks',
-                name: 'moderationConfig',
-                type: 'options',
-                default: 'useDefault',
-                options: [
-                    {
-                        label: 'OpenAI Default',
-                        name: 'useDefault'
-                    },
-                    {
-                        label: 'Use Custom Threshold Values',
-                        name: 'useCustom'
-                    },
-                    {
-                        label: 'Combine OpenAI Default with Custom Threshold Values',
-                        name: 'combineBoth'
-                    }
-                ]
-            },
            {
                label: 'Error Message',
                name: 'moderationErrorMessage',
@ -51,102 +31,14 @@ class OpenAIModeration implements INode {
                rows: 2,
                default: "Cannot Process! Input violates OpenAI's content moderation policies.",
                optional: true
-            },
-            {
-                label: 'Threshold Score - Sexual',
-                name: 'catSexualThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Sexual/Minors',
-                name: 'catSexualMinorsThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Hate',
-                name: 'catHateThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Hate/Threatening',
-                name: 'catHateThreateningThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Harassment',
-                name: 'catHarassmentThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Harassment/Threatening',
-                name: 'catHarassmentThreateningThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Self Harm',
-                name: 'catSelfHarmThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Self-Harm/Intent',
-                name: 'catSelfHarmIntentThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Self-Harm/Instructions',
-                name: 'catSelfHarmInstructionsThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Violence',
-                name: 'catViolenceThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
-            },
-            {
-                label: 'Threshold Score - Violence/Graphic',
-                name: 'catViolenceGraphicThreshold',
-                type: 'number',
-                default: 0.01,
-                additionalParams: true
            }
        ]
    }

    async init(nodeData: INodeData): Promise<any> {
        const runner = new OpenAIModerationRunner()
-        this.inputs.forEach((value) => {
-            if (value.additionalParams === true) {
-                // remove thePrefix - 'cat'
-                let categoryName = value.name.substring(3)
-                // remove theSuffix - 'Threshold'
-                categoryName = categoryName.substring(0, categoryName.length - 9)
-                categoryName = categoryName.substring(0, 1).toLowerCase() + categoryName.substring(1)
-                let categoryThreshold = nodeData.inputs ? nodeData.inputs[value.name] : value.default
-                runner.setParameter(categoryName, parseFloat(categoryThreshold))
-            } else {
-                runner.setParameter(value.name, nodeData.inputs ? nodeData.inputs[value.name] : value.default)
-            }
-        })
+        const moderationErrorMessage = nodeData.inputs?.moderationErrorMessage as string
+        if (moderationErrorMessage) runner.setErrorMessage(moderationErrorMessage)
        return runner
    }
 }
--- a/packages/components/nodes/responsibleAI/InputModeration/OpenAIModerationRunner.ts
+++ b/packages/components/nodes/responsibleAI/InputModeration/OpenAIModerationRunner.ts
@ -3,19 +3,7 @@ import { BaseLanguageModel } from 'langchain/base_language'
 import { OpenAIModerationChain } from 'langchain/chains'

 export class OpenAIModerationRunner implements Moderation {
-    private moderationConfig: string = 'useDefault'
    private moderationErrorMessage: string = "Text was found that violates OpenAI's content policy."
-    private sexual: number = 0.01
-    private sexualMinors: number = 0.01
-    private hate: number = 0.01
-    private hateThreatening: number = 0.01
-    private harassment: number = 0.01
-    private harassmentThreatening: number = 0.01
-    private selfHarm: number = 0.01
-    private selfHarmIntent: number = 0.01
-    private selfHarmInstructions: number = 0.01
-    private violence: number = 0.01
-    private violenceGraphic: number = 0.01

    async checkForViolations(llm: BaseLanguageModel, input: string): Promise<string> {
        const openAIApiKey = (llm as any).openAIApiKey
@ -31,32 +19,13 @@ export class OpenAIModerationRunner implements Moderation {
        const { output: moderationOutput, results } = await moderation.call({
            input: input
        })
-        if (this.moderationConfig != 'useCustom' && results[0].flagged) {
+        if (results[0].flagged) {
            throw Error(this.moderationErrorMessage)
        }
-        if (this.moderationConfig != 'useDefault') {
-            const categoryScores = results[0].category_scores
-            if (
-                categoryScores['harassment'] > this.harassment ||
-                categoryScores['harassment/threatening'] > this.harassmentThreatening ||
-                categoryScores['self-harm'] > this.selfHarm ||
-                categoryScores['self-harm/intent'] > this.selfHarmIntent ||
-                categoryScores['self-harm/instructions'] > this.selfHarmInstructions ||
-                categoryScores['sexual'] > this.sexual ||
-                categoryScores['sexual/minors'] > this.sexualMinors ||
-                categoryScores['hate'] > this.hate ||
-                categoryScores['hate/threatening'] > this.hateThreatening ||
-                categoryScores['violence'] > this.violence ||
-                categoryScores['violence/graphic'] > this.violenceGraphic
-            ) {
-                throw Error(this.moderationErrorMessage)
-            }
-        }
        return moderationOutput
    }

-    setParameter(category: string, value: number) {
-        // @ts-ignore
-        this[category] = value
+    setErrorMessage(message: string) {
+        this.moderationErrorMessage = message
    }
 }