Skip to content

Commit

Permalink
Add Financial Phrasebank scenario
Browse files Browse the repository at this point in the history
Co-authored-by: Ryo Kawahara <[email protected]>
Co-authored-by: Mikio Takeuchi <[email protected]>
  • Loading branch information
3 people committed Jan 30, 2025
1 parent 92e3ee1 commit 554bc84
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 0 deletions.
23 changes: 23 additions & 0 deletions src/helm/benchmark/run_specs/enterprise_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,29 @@ def get_news_headline_spec(category: str) -> RunSpec:
)


@run_spec_function("financial_phrasebank")
def get_financial_phrasebank_spec(agreement: int = 50) -> RunSpec:
from helm.benchmark.scenarios.financial_phrasebank_scenario import FinancialPhrasebankScenario

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.financial_phrasebank_scenario.FinancialPhrasebankScenario",
args={"agreement": agreement},
)

adapter_spec = get_generation_adapter_spec(
instructions=FinancialPhrasebankScenario.INSTRUCTIONS, input_noun="Headline", output_noun="Answer"
)

return RunSpec(
name=f"financial_phrasebank:agreement={agreement}",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs()
+ _get_weighted_classification_metric_specs(labels=["positive", "neutral", "negative"]),
groups=["financial_phrasebank"],
)


# Legal


Expand Down
101 changes: 101 additions & 0 deletions src/helm/benchmark/scenarios/financial_phrasebank_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
import random
from typing import List

from helm.common.general import ensure_file_downloaded
from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
CORRECT_TAG,
TRAIN_SPLIT,
TEST_SPLIT,
Input,
Output,
)


def get_instructions():
instruction = """The dataset consists of sentences from English language financial news categorized by sentiment.
Classify the sentences into one of the 3 sentiment categories.
Possible labels:\n1. positive\n2. neutral\n3. negative"""
return instruction


class FinancialPhrasebankScenario(Scenario):
"""
A sentiment classification benchmark based on the dataset from Good Debt or Bad Debt - Detecting Semantic Orientations in Economic Texts [(Malo et al., 2013)](https://arxiv.org/abs/1307.5336).
Context:
Polar sentiment dataset of sentences from financial news. The dataset consists of 4840 sentences from English
language financial news categorized by sentiment. The dataset is divided by agreement rate of 5-8 annotators.
This release of the financial phrase bank covers a collection of 4840 sentences. The selected collection of
phrases was annotated by 16 people with adequate background knowledge on financial markets.
Given the large number of overlapping annotations (5 to 8 annotations per sentence), there are several ways
to define a majority vote based gold standard. To provide an objective comparison, the paper authors have formed 4 alternative
reference datasets based on the strength of majority agreement: 100%, 75%, 66% and 50%.
Data source:
https://huggingface.co/datasets/takala/financial_phrasebank
Reference:
P. Malo, A. Sinha, P. Korhonen, J. Wallenius, and P. Takala, “Good debt or bad debt: Detecting semantic orientations in economic texts,” Journal of the Association for Information Science and Technology, vol. 65, 2014.
https://arxiv.org/pdf/1307.5336
""" # noqa: E501

name = "financial_phrasebank"
description = "The dataset consists of 4840 sentences from English \
language financial news categorized by sentiment."
tags = ["finance", "sentiment analysis", "classification"]

INSTRUCTIONS = """The dataset consists of sentences from English language financial news categorized by sentiment.
Classify the sentences into one of the 3 sentiment categories.
Possible labels:\n1. positive\n2. neutral\n3. negative""" # noqa: E501
DATASET_URL = "https://huggingface.co/datasets/takala/financial_phrasebank/resolve/598b6aad98f7c8d67be161b12a4b5f2497e07edd/data/FinancialPhraseBank-v1.0.zip" # noqa: E501
AGREEMENT_VALUES = [50, 66, 75, 100]
TRAIN_SPLIT_SIZE = 0.7

def __init__(self, agreement: int, random_seed: int = 121):
"""The initialization of an instance.
Args:
subset: str: This argument is used to specify the ratio of annotators who agreed on the ground truth label.
The value must be one of the strings defined in
SUBSETS = ["sentences_allagree", "sentences_75agree", "sentences_66agree", "sentences_50agree"].
random_seed: int = 121: The random seed for sampling the train/test splits.
"""
super().__init__()
if agreement not in self.AGREEMENT_VALUES:
raise Exception(
f"Unknown `agreement` value: {agreement}, allowed values are {self.AGREEMENT_VALUES}".format(agreement)
)
self.agreement = agreement
self.random_seed = random_seed

def get_instances(self, output_path: str) -> List[Instance]:
data_parent_path = os.path.join(output_path, "data")
ensure_file_downloaded(
self.DATASET_URL,
data_parent_path,
unpack=True,
unpack_type="unzip",
)
file_name = "Sentences_AllAgree.txt" if self.agreement == 100 else f"Sentences_{self.agreement}Agree.txt"
data_file_path = os.path.join(data_parent_path, "FinancialPhraseBank-v1.0", file_name)
instances: List[Instance] = []
with open(data_file_path, mode="r", encoding="iso-8859-1") as f:
lines = list(f.readlines())
random.Random(self.random_seed).shuffle(lines)
train_split_index = int(len(instances) * self.TRAIN_SPLIT_SIZE)
for index, line in enumerate(lines):
sentence, label = line.strip().rsplit("@", 1)
instance = Instance(
input=Input(text=sentence),
references=[Reference(Output(text=label), tags=[CORRECT_TAG])],
split=TRAIN_SPLIT if index < train_split_index else TEST_SPLIT,
)
instances.append(instance)
return instances
18 changes: 18 additions & 0 deletions src/helm/benchmark/static/schema_enterprise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ run_groups:
category: All scenarios
subgroups:
- gold_commodity_news
- financial_phrasebank

- name: legal_scenarios
display_name: Legal Scenarios
Expand All @@ -136,6 +137,23 @@ run_groups:
subgroups:
- cti_to_mitre

- name: financial_phrasebank
display_name: Financial Phrasebank (Sentiment Classification)
description: A sentiment classification benchmark based on the dataset from Good Debt or Bad Debt - Detecting Semantic Orientations in Economic Texts [(Malo et al., 2013)](https://arxiv.org/abs/1307.5336).
metric_groups:
- accuracy
- efficiency
- general_information
environment:
main_name: classification_weighted_f1
main_split: test
taxonomy:
task: sentiment analysis
what: phrases from financial news texts and company press releases
who: annotators with adequate business education background
when: before 2013
language: English

- name: gold_commodity_news
display_name: Gold Commodity News
description: A classification benchmark based on a dataset of human-annotated gold commodity news headlines ([Sinha & Khandait, 2019](https://arxiv.org/abs/2009.04202)).
Expand Down

0 comments on commit 554bc84

Please sign in to comment.