Winner
# Example where model response is better than ground truth.
# Make sure you pass all words in concept_set in their root form.
evaluator.add_test(
test_names=["winner_test"],
data={
"response" : "The family sits at the table with delicious food placed in front of them.",
"expected_response" : "I sit at the front of the table and enjoy my food.",
"concept_set" : ["food_N", "front_N", "sit_V", "table_N"]
},
arguments={"model": "gpt-4"},
).run()
evaluator.print_results()Last updated
Was this helpful?

