ResponsibleAI - Input Moderation - Simplifying the Options for OpenAI Moderation.
This commit is contained in:
parent
ad8281e553
commit
619fb4f5c1
|
|
@ -24,26 +24,6 @@ class OpenAIModeration implements INode {
|
||||||
this.description = 'Check whether content complies with OpenAI usage policies.'
|
this.description = 'Check whether content complies with OpenAI usage policies.'
|
||||||
this.baseClasses = [this.type, ...getBaseClasses(Moderation)]
|
this.baseClasses = [this.type, ...getBaseClasses(Moderation)]
|
||||||
this.inputs = [
|
this.inputs = [
|
||||||
{
|
|
||||||
label: 'Moderation Checks',
|
|
||||||
name: 'moderationConfig',
|
|
||||||
type: 'options',
|
|
||||||
default: 'useDefault',
|
|
||||||
options: [
|
|
||||||
{
|
|
||||||
label: 'OpenAI Default',
|
|
||||||
name: 'useDefault'
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Use Custom Threshold Values',
|
|
||||||
name: 'useCustom'
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Combine OpenAI Default with Custom Threshold Values',
|
|
||||||
name: 'combineBoth'
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
label: 'Error Message',
|
label: 'Error Message',
|
||||||
name: 'moderationErrorMessage',
|
name: 'moderationErrorMessage',
|
||||||
|
|
@ -51,102 +31,14 @@ class OpenAIModeration implements INode {
|
||||||
rows: 2,
|
rows: 2,
|
||||||
default: "Cannot Process! Input violates OpenAI's content moderation policies.",
|
default: "Cannot Process! Input violates OpenAI's content moderation policies.",
|
||||||
optional: true
|
optional: true
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Sexual',
|
|
||||||
name: 'catSexualThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Sexual/Minors',
|
|
||||||
name: 'catSexualMinorsThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Hate',
|
|
||||||
name: 'catHateThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Hate/Threatening',
|
|
||||||
name: 'catHateThreateningThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Harassment',
|
|
||||||
name: 'catHarassmentThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Harassment/Threatening',
|
|
||||||
name: 'catHarassmentThreateningThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Self Harm',
|
|
||||||
name: 'catSelfHarmThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Self-Harm/Intent',
|
|
||||||
name: 'catSelfHarmIntentThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Self-Harm/Instructions',
|
|
||||||
name: 'catSelfHarmInstructionsThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Violence',
|
|
||||||
name: 'catViolenceThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: 'Threshold Score - Violence/Graphic',
|
|
||||||
name: 'catViolenceGraphicThreshold',
|
|
||||||
type: 'number',
|
|
||||||
default: 0.01,
|
|
||||||
additionalParams: true
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
async init(nodeData: INodeData): Promise<any> {
|
async init(nodeData: INodeData): Promise<any> {
|
||||||
const runner = new OpenAIModerationRunner()
|
const runner = new OpenAIModerationRunner()
|
||||||
this.inputs.forEach((value) => {
|
const moderationErrorMessage = nodeData.inputs?.moderationErrorMessage as string
|
||||||
if (value.additionalParams === true) {
|
if (moderationErrorMessage) runner.setErrorMessage(moderationErrorMessage)
|
||||||
// remove thePrefix - 'cat'
|
|
||||||
let categoryName = value.name.substring(3)
|
|
||||||
// remove theSuffix - 'Threshold'
|
|
||||||
categoryName = categoryName.substring(0, categoryName.length - 9)
|
|
||||||
categoryName = categoryName.substring(0, 1).toLowerCase() + categoryName.substring(1)
|
|
||||||
let categoryThreshold = nodeData.inputs ? nodeData.inputs[value.name] : value.default
|
|
||||||
runner.setParameter(categoryName, parseFloat(categoryThreshold))
|
|
||||||
} else {
|
|
||||||
runner.setParameter(value.name, nodeData.inputs ? nodeData.inputs[value.name] : value.default)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return runner
|
return runner
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,19 +3,7 @@ import { BaseLanguageModel } from 'langchain/base_language'
|
||||||
import { OpenAIModerationChain } from 'langchain/chains'
|
import { OpenAIModerationChain } from 'langchain/chains'
|
||||||
|
|
||||||
export class OpenAIModerationRunner implements Moderation {
|
export class OpenAIModerationRunner implements Moderation {
|
||||||
private moderationConfig: string = 'useDefault'
|
|
||||||
private moderationErrorMessage: string = "Text was found that violates OpenAI's content policy."
|
private moderationErrorMessage: string = "Text was found that violates OpenAI's content policy."
|
||||||
private sexual: number = 0.01
|
|
||||||
private sexualMinors: number = 0.01
|
|
||||||
private hate: number = 0.01
|
|
||||||
private hateThreatening: number = 0.01
|
|
||||||
private harassment: number = 0.01
|
|
||||||
private harassmentThreatening: number = 0.01
|
|
||||||
private selfHarm: number = 0.01
|
|
||||||
private selfHarmIntent: number = 0.01
|
|
||||||
private selfHarmInstructions: number = 0.01
|
|
||||||
private violence: number = 0.01
|
|
||||||
private violenceGraphic: number = 0.01
|
|
||||||
|
|
||||||
async checkForViolations(llm: BaseLanguageModel, input: string): Promise<string> {
|
async checkForViolations(llm: BaseLanguageModel, input: string): Promise<string> {
|
||||||
const openAIApiKey = (llm as any).openAIApiKey
|
const openAIApiKey = (llm as any).openAIApiKey
|
||||||
|
|
@ -31,32 +19,13 @@ export class OpenAIModerationRunner implements Moderation {
|
||||||
const { output: moderationOutput, results } = await moderation.call({
|
const { output: moderationOutput, results } = await moderation.call({
|
||||||
input: input
|
input: input
|
||||||
})
|
})
|
||||||
if (this.moderationConfig != 'useCustom' && results[0].flagged) {
|
if (results[0].flagged) {
|
||||||
throw Error(this.moderationErrorMessage)
|
throw Error(this.moderationErrorMessage)
|
||||||
}
|
}
|
||||||
if (this.moderationConfig != 'useDefault') {
|
|
||||||
const categoryScores = results[0].category_scores
|
|
||||||
if (
|
|
||||||
categoryScores['harassment'] > this.harassment ||
|
|
||||||
categoryScores['harassment/threatening'] > this.harassmentThreatening ||
|
|
||||||
categoryScores['self-harm'] > this.selfHarm ||
|
|
||||||
categoryScores['self-harm/intent'] > this.selfHarmIntent ||
|
|
||||||
categoryScores['self-harm/instructions'] > this.selfHarmInstructions ||
|
|
||||||
categoryScores['sexual'] > this.sexual ||
|
|
||||||
categoryScores['sexual/minors'] > this.sexualMinors ||
|
|
||||||
categoryScores['hate'] > this.hate ||
|
|
||||||
categoryScores['hate/threatening'] > this.hateThreatening ||
|
|
||||||
categoryScores['violence'] > this.violence ||
|
|
||||||
categoryScores['violence/graphic'] > this.violenceGraphic
|
|
||||||
) {
|
|
||||||
throw Error(this.moderationErrorMessage)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return moderationOutput
|
return moderationOutput
|
||||||
}
|
}
|
||||||
|
|
||||||
setParameter(category: string, value: number) {
|
setErrorMessage(message: string) {
|
||||||
// @ts-ignore
|
this.moderationErrorMessage = message
|
||||||
this[category] = value
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue