ResponsibleAI - Input Moderation - Simplifying the Options for OpenAI Moderation.

This commit is contained in:
vinodkiran 2023-11-22 19:19:59 +05:30
parent ad8281e553
commit 619fb4f5c1
2 changed files with 5 additions and 144 deletions

View File

@ -24,26 +24,6 @@ class OpenAIModeration implements INode {
this.description = 'Check whether content complies with OpenAI usage policies.' this.description = 'Check whether content complies with OpenAI usage policies.'
this.baseClasses = [this.type, ...getBaseClasses(Moderation)] this.baseClasses = [this.type, ...getBaseClasses(Moderation)]
this.inputs = [ this.inputs = [
{
label: 'Moderation Checks',
name: 'moderationConfig',
type: 'options',
default: 'useDefault',
options: [
{
label: 'OpenAI Default',
name: 'useDefault'
},
{
label: 'Use Custom Threshold Values',
name: 'useCustom'
},
{
label: 'Combine OpenAI Default with Custom Threshold Values',
name: 'combineBoth'
}
]
},
{ {
label: 'Error Message', label: 'Error Message',
name: 'moderationErrorMessage', name: 'moderationErrorMessage',
@ -51,102 +31,14 @@ class OpenAIModeration implements INode {
rows: 2, rows: 2,
default: "Cannot Process! Input violates OpenAI's content moderation policies.", default: "Cannot Process! Input violates OpenAI's content moderation policies.",
optional: true optional: true
},
{
label: 'Threshold Score - Sexual',
name: 'catSexualThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Sexual/Minors',
name: 'catSexualMinorsThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Hate',
name: 'catHateThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Hate/Threatening',
name: 'catHateThreateningThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Harassment',
name: 'catHarassmentThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Harassment/Threatening',
name: 'catHarassmentThreateningThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Self Harm',
name: 'catSelfHarmThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Self-Harm/Intent',
name: 'catSelfHarmIntentThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Self-Harm/Instructions',
name: 'catSelfHarmInstructionsThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Violence',
name: 'catViolenceThreshold',
type: 'number',
default: 0.01,
additionalParams: true
},
{
label: 'Threshold Score - Violence/Graphic',
name: 'catViolenceGraphicThreshold',
type: 'number',
default: 0.01,
additionalParams: true
} }
] ]
} }
async init(nodeData: INodeData): Promise<any> { async init(nodeData: INodeData): Promise<any> {
const runner = new OpenAIModerationRunner() const runner = new OpenAIModerationRunner()
this.inputs.forEach((value) => { const moderationErrorMessage = nodeData.inputs?.moderationErrorMessage as string
if (value.additionalParams === true) { if (moderationErrorMessage) runner.setErrorMessage(moderationErrorMessage)
// remove thePrefix - 'cat'
let categoryName = value.name.substring(3)
// remove theSuffix - 'Threshold'
categoryName = categoryName.substring(0, categoryName.length - 9)
categoryName = categoryName.substring(0, 1).toLowerCase() + categoryName.substring(1)
let categoryThreshold = nodeData.inputs ? nodeData.inputs[value.name] : value.default
runner.setParameter(categoryName, parseFloat(categoryThreshold))
} else {
runner.setParameter(value.name, nodeData.inputs ? nodeData.inputs[value.name] : value.default)
}
})
return runner return runner
} }
} }

View File

@ -3,19 +3,7 @@ import { BaseLanguageModel } from 'langchain/base_language'
import { OpenAIModerationChain } from 'langchain/chains' import { OpenAIModerationChain } from 'langchain/chains'
export class OpenAIModerationRunner implements Moderation { export class OpenAIModerationRunner implements Moderation {
private moderationConfig: string = 'useDefault'
private moderationErrorMessage: string = "Text was found that violates OpenAI's content policy." private moderationErrorMessage: string = "Text was found that violates OpenAI's content policy."
private sexual: number = 0.01
private sexualMinors: number = 0.01
private hate: number = 0.01
private hateThreatening: number = 0.01
private harassment: number = 0.01
private harassmentThreatening: number = 0.01
private selfHarm: number = 0.01
private selfHarmIntent: number = 0.01
private selfHarmInstructions: number = 0.01
private violence: number = 0.01
private violenceGraphic: number = 0.01
async checkForViolations(llm: BaseLanguageModel, input: string): Promise<string> { async checkForViolations(llm: BaseLanguageModel, input: string): Promise<string> {
const openAIApiKey = (llm as any).openAIApiKey const openAIApiKey = (llm as any).openAIApiKey
@ -31,32 +19,13 @@ export class OpenAIModerationRunner implements Moderation {
const { output: moderationOutput, results } = await moderation.call({ const { output: moderationOutput, results } = await moderation.call({
input: input input: input
}) })
if (this.moderationConfig != 'useCustom' && results[0].flagged) { if (results[0].flagged) {
throw Error(this.moderationErrorMessage) throw Error(this.moderationErrorMessage)
} }
if (this.moderationConfig != 'useDefault') {
const categoryScores = results[0].category_scores
if (
categoryScores['harassment'] > this.harassment ||
categoryScores['harassment/threatening'] > this.harassmentThreatening ||
categoryScores['self-harm'] > this.selfHarm ||
categoryScores['self-harm/intent'] > this.selfHarmIntent ||
categoryScores['self-harm/instructions'] > this.selfHarmInstructions ||
categoryScores['sexual'] > this.sexual ||
categoryScores['sexual/minors'] > this.sexualMinors ||
categoryScores['hate'] > this.hate ||
categoryScores['hate/threatening'] > this.hateThreatening ||
categoryScores['violence'] > this.violence ||
categoryScores['violence/graphic'] > this.violenceGraphic
) {
throw Error(this.moderationErrorMessage)
}
}
return moderationOutput return moderationOutput
} }
setParameter(category: string, value: number) { setErrorMessage(message: string) {
// @ts-ignore this.moderationErrorMessage = message
this[category] = value
} }
} }