add autogptq

Former-commit-id: 43321557c272862d9c6531fc48a4569cfc88e4e7
2023-07-02 20:36:37 +08:00
parent d720f67e6c
commit 2537481c34
2 changed files with 52 additions and 2 deletions
--- a/tests/auto_gptq.py
+++ b/tests/auto_gptq.py
@@ -0,0 +1,47 @@
+# coding=utf-8
+# Quantizes fine-tuned models with AutoGPTQ (https://github.com/PanQiWei/AutoGPTQ).
+# Usage: python auto_gptq.py --input_dir path_to_llama_model --output_dir path_to_quant_model --data_file alpaca.json
+# dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"])
+
+
+import fire
+from datasets import load_dataset
+from transformers import AutoTokenizer
+from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+
+
+def quantize(input_dir: str, output_dir: str, data_file: str):
+    tokenizer = AutoTokenizer.from_pretrained(input_dir, use_fast=False, padding_side="left")
+
+    def format_example(examples):
+        prefix=("A chat between a curious user and an artificial intelligence assistant. "
+                "The assistant gives helpful, detailed, and polite answers to the user's questions.")
+        texts = []
+        for i in range(len(examples["instruction"])):
+            prompt = prefix + "\n"
+            if "history" in examples:
+                for user_query, bot_resp in examples["history"][i]:
+                    prompt += "Human: {}\nAssistant: {}\n".format(user_query, bot_resp)
+            prompt += "Human: {}\nAssistant: {}".format(examples["instruction"][i], examples["output"][i])
+            texts.append(prompt)
+        return tokenizer(texts, truncation=True, max_length=1024)
+
+    dataset = load_dataset("json", data_files=data_file)["train"]
+    column_names = list(dataset.column_names)
+    dataset = dataset.select(range(1024))
+    dataset = dataset.map(format_example, batched=True, remove_columns=column_names)
+    dataset = dataset.shuffle()
+
+    quantize_config = BaseQuantizeConfig(
+        bits=4,
+        group_size=128,
+        desc_act=False
+    )
+
+    model = AutoGPTQForCausalLM.from_pretrained(input_dir, quantize_config)
+    model.quantize(dataset)
+    model.save_quantized(output_dir)
+
+
+if __name__ == "__main__":
+    fire.Fire(quantize)
--- a/tests/evaluate_zh.py
+++ b/tests/evaluate_zh.py
@@ -0,0 +1,133 @@
+# coding=utf-8
+# Evaluates fine-tuned models automatically.
+# Usage: python evaluate_zh.py --evalset ceval/ceval-exam:law --split dev --output_file result.json
+#                              --api_base http://localhost:8000/v1 --task_type choice --n_samples 100
+# dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"])
+
+
+import os
+import fire
+import json
+import openai
+from tqdm import tqdm
+from typing import Literal, Optional
+from datasets import load_dataset
+
+
+def format_example_choice(examples):
+    model_inputs = {"query": [], "label": []}
+    task_template = "请从ABCD四个选项中选出正确的选项，仅输出选项序号。\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n答案："
+    for i in range(len(examples["id"])):
+        query = task_template.format(
+            question=examples["question"][i],
+            A=examples["A"][i],
+            B=examples["B"][i],
+            C=examples["C"][i],
+            D=examples["D"][i]
+        )
+        label = examples["answer"][i]
+        model_inputs["query"].append(query)
+        model_inputs["label"].append(label)
+    return model_inputs
+
+
+def format_example_cloze(examples):
+    model_inputs = {"query": [], "label": []}
+    task_template = "请选择正确的答案填空，仅输出正确的选项。\n{question}\n选项：{A}\n{B}\n{C}\n{D}\n答案："
+    for i in range(len(examples["id"])):
+        query = task_template.format(
+            question=examples["question"][i],
+            A=examples["A"][i],
+            B=examples["B"][i],
+            C=examples["C"][i],
+            D=examples["D"][i]
+        )
+        label = examples[examples["answer"][i]][i]
+        model_inputs["query"].append(query)
+        model_inputs["label"].append(label)
+    return model_inputs
+
+
+def format_example_openqa(examples):
+    model_inputs = {"query": [], "label": []}
+    task_template = "回答以下问题：{question}\n答案："
+    for i in range(len(examples["id"])):
+        query = task_template.format(question=examples["question"][i])
+        label = examples[examples["answer"][i]][i]
+        model_inputs["query"].append(query)
+        model_inputs["label"].append(label)
+    return model_inputs
+
+
+TASK_DICT = {
+    "choice": format_example_choice,
+    "cloze": format_example_cloze,
+    "openqa": format_example_openqa
+}
+
+
+EXT2TYPE = {
+    "csv": "csv",
+    "json": "json",
+    "jsonl": "json"
+}
+
+
+def evaluate(
+        evalset: str,
+        api_base: str,
+        output_file: str,
+        split: Optional[str] = "val",
+        task_type: Optional[Literal["choice", "cloze", "openqa"]] = "choice",
+        n_samples: Optional[int] = 20
+):
+
+    openai.api_base = api_base
+    openai.api_key = "none"
+
+    if os.path.isfile(evalset):
+        dataset = load_dataset(EXT2TYPE[evalset.split(".")[-1]], data_files=evalset)["train"]
+    elif ":" in evalset:
+        evalset, subset = evalset.split(":")
+        dataset = load_dataset(evalset, subset, split=split)
+    else:
+        dataset = load_dataset(evalset, split=split)
+
+    n_samples = min(len(dataset), n_samples)
+
+    dataset = dataset.map(TASK_DICT[task_type], batched=True)
+    dataset = dataset.select(range(n_samples))
+
+    n_correct = 0
+    predictions = []
+    for example in tqdm(dataset):
+        query, label = example["query"], example["label"]
+        predict = openai.ChatCompletion.create(
+            model="default",
+            messages=[{"role": "user", "content": query}],
+            temperature=0.01,
+            top_p=0.01,
+            max_new_tokens=20
+        ).choices[0].message.content
+
+        if task_type == "choice" and predict[0].lower() == label[0].lower():
+            n_correct += 1
+        if task_type == "cloze" and label in [predict[:len(label)], predict[-len(label):]]:
+            n_correct += 1
+        if task_type == "openqa" and label in predict:
+            n_correct += 1
+
+        predictions.append({
+            "query": query,
+            "label": label,
+            "predict": predict
+        })
+
+    print("Result: {}/{}\nAccuracy: {:.2f}%".format(n_correct, n_samples, n_correct / n_samples * 100))
+
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(predictions, f, indent=2, ensure_ascii=False)
+
+
+if __name__ == "__main__":
+    fire.Fire(evaluate)