From 2f5f6722cf138c72e69acc73c966ca9c5c200a3e Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 5 May 2024 00:53:07 +0800 Subject: [PATCH] fix eval scripts Former-commit-id: fc3743d0b82c28fbff1170761139e4fa5d2a8939 --- evaluation/ceval/ceval.py | 14 ++-- evaluation/cmmlu/cmmlu.py | 134 +++++++++++++++++++------------------- evaluation/mmlu/mmlu.py | 12 +--- 3 files changed, 74 insertions(+), 86 deletions(-) diff --git a/evaluation/ceval/ceval.py b/evaluation/ceval/ceval.py index 33005de34..4111d6b4e 100644 --- a/evaluation/ceval/ceval.py +++ b/evaluation/ceval/ceval.py @@ -19,7 +19,7 @@ import pandas as pd _CITATION = """\ @article{huang2023ceval, - title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, + title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian}, journal={arXiv preprint arXiv:2305.08322}, year={2023} @@ -133,25 +133,19 @@ class Ceval(datasets.GeneratorBasedBuilder): datasets.SplitGenerator( name=datasets.Split.TEST, gen_kwargs={ - "filepath": os.path.join( - data_dir, "test", f"{task_name}_test.csv" - ), + "filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"), }, ), datasets.SplitGenerator( name=datasets.Split.VALIDATION, gen_kwargs={ - "filepath": os.path.join( - data_dir, "val", f"{task_name}_val.csv" - ), + "filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"), }, ), datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={ - "filepath": os.path.join( - data_dir, "dev", f"{task_name}_dev.csv" - ), + "filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"), }, ), ] diff --git a/evaluation/cmmlu/cmmlu.py b/evaluation/cmmlu/cmmlu.py index 620962037..37efb3288 100644 --- a/evaluation/cmmlu/cmmlu.py +++ b/evaluation/cmmlu/cmmlu.py @@ -37,73 +37,73 @@ _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 Internatio _URL = "cmmlu.zip" task_list = [ - 'agronomy', - 'anatomy', - 'ancient_chinese', - 'arts', - 'astronomy', - 'business_ethics', - 'chinese_civil_service_exam', - 'chinese_driving_rule', - 'chinese_food_culture', - 'chinese_foreign_policy', - 'chinese_history', - 'chinese_literature', - 'chinese_teacher_qualification', - 'clinical_knowledge', - 'college_actuarial_science', - 'college_education', - 'college_engineering_hydrology', - 'college_law', - 'college_mathematics', - 'college_medical_statistics', - 'college_medicine', - 'computer_science', - 'computer_security', - 'conceptual_physics', - 'construction_project_management', - 'economics', - 'education', - 'electrical_engineering', - 'elementary_chinese', - 'elementary_commonsense', - 'elementary_information_and_technology', - 'elementary_mathematics', - 'ethnology', - 'food_science', - 'genetics', - 'global_facts', - 'high_school_biology', - 'high_school_chemistry', - 'high_school_geography', - 'high_school_mathematics', - 'high_school_physics', - 'high_school_politics', - 'human_sexuality', - 'international_law', - 'journalism', - 'jurisprudence', - 'legal_and_moral_basis', - 'logical', - 'machine_learning', - 'management', - 'marketing', - 'marxist_theory', - 'modern_chinese', - 'nutrition', - 'philosophy', - 'professional_accounting', - 'professional_law', - 'professional_medicine', - 'professional_psychology', - 'public_relations', - 'security_study', - 'sociology', - 'sports_science', - 'traditional_chinese_medicine', - 'virology', - 'world_history', - 'world_religions', + "agronomy", + "anatomy", + "ancient_chinese", + "arts", + "astronomy", + "business_ethics", + "chinese_civil_service_exam", + "chinese_driving_rule", + "chinese_food_culture", + "chinese_foreign_policy", + "chinese_history", + "chinese_literature", + "chinese_teacher_qualification", + "clinical_knowledge", + "college_actuarial_science", + "college_education", + "college_engineering_hydrology", + "college_law", + "college_mathematics", + "college_medical_statistics", + "college_medicine", + "computer_science", + "computer_security", + "conceptual_physics", + "construction_project_management", + "economics", + "education", + "electrical_engineering", + "elementary_chinese", + "elementary_commonsense", + "elementary_information_and_technology", + "elementary_mathematics", + "ethnology", + "food_science", + "genetics", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_geography", + "high_school_mathematics", + "high_school_physics", + "high_school_politics", + "human_sexuality", + "international_law", + "journalism", + "jurisprudence", + "legal_and_moral_basis", + "logical", + "machine_learning", + "management", + "marketing", + "marxist_theory", + "modern_chinese", + "nutrition", + "philosophy", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_study", + "sociology", + "sports_science", + "traditional_chinese_medicine", + "virology", + "world_history", + "world_religions", ] diff --git a/evaluation/mmlu/mmlu.py b/evaluation/mmlu/mmlu.py index 9f1bd101f..f3218c384 100644 --- a/evaluation/mmlu/mmlu.py +++ b/evaluation/mmlu/mmlu.py @@ -136,25 +136,19 @@ class MMLU(datasets.GeneratorBasedBuilder): datasets.SplitGenerator( name=datasets.Split.TEST, gen_kwargs={ - "filepath": os.path.join( - data_dir, "data", "test", f"{task_name}_test.csv" - ), + "filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"), }, ), datasets.SplitGenerator( name=datasets.Split.VALIDATION, gen_kwargs={ - "filepath": os.path.join( - data_dir, "data", "val", f"{task_name}_val.csv" - ), + "filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"), }, ), datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={ - "filepath": os.path.join( - data_dir, "data", "dev", f"{task_name}_dev.csv" - ), + "filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"), }, ), ]