Upload CustomTextClassificationPipeline

Files changed (5) hide show

config.json CHANGED Viewed

@@ -1,8 +1,22 @@
 {
   "architectures": [
-    "MultiHeadDebertaForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -11,9 +25,10 @@
   "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,
   "max_relative_positions": -1,
-  "model_type": "deberta-v2",
   "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
   "num_hidden_layers": 6,
   "pad_token_id": 0,
   "pooler_dropout": 0,
@@ -28,7 +43,7 @@
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.45.2",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

 {
+  "_name_or_path": "./celadon",
   "architectures": [
+    "MultiHeadDebertaForSequenceClassificationModel"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "auto_map": {
+    "AutoConfig": "configuration_deberta_multi.MultiHeadDebertaV2Config",
+    "AutoModelForSequenceClassification": "modelling_deberta_multi.MultiHeadDebertaForSequenceClassificationModel"
+  },
+  "custom_pipelines": {
+    "text-classification": {
+      "impl": "custom_pipeline.CustomTextClassificationPipeline",
+      "pt": [
+        "AutoModelForSequenceClassification"
+      ],
+      "tf": []
+    }
+  },
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,
   "max_relative_positions": -1,
+  "model_type": "multi-head-deberta-for-sequence-classification",
   "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
+  "num_heads": 5,
   "num_hidden_layers": 6,
   "pad_token_id": 0,
   "pooler_dropout": 0,
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.46.2",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

configuration_deberta_multi.py ADDED Viewed

+from transformers import DebertaV2Config
+class MultiHeadDebertaV2Config(DebertaV2Config):
+    model_type = "multi-head-deberta-for-sequence-classification"
+    def __init__(self, num_heads=5, **kwargs):
+        self.num_heads = num_heads
+        super().__init__(**kwargs)

special_tokens_map.json CHANGED Viewed

@@ -1,10 +1,46 @@
 {
-  "bos_token": "[CLS]",
-  "cls_token": "[CLS]",
-  "eos_token": "[SEP]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
   "unk_token": {
     "content": "[UNK]",
     "lstrip": false,

 {
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "unk_token": {
     "content": "[UNK]",
     "lstrip": false,

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -47,12 +47,19 @@
   "do_lower_case": false,
   "eos_token": "[SEP]",
   "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "sp_model_kwargs": {},
   "split_by_punct": false,
   "tokenizer_class": "DebertaV2Tokenizer",
   "unk_token": "[UNK]",
   "vocab_type": "spm"
 }

   "do_lower_case": false,
   "eos_token": "[SEP]",
   "mask_token": "[MASK]",
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
   "sp_model_kwargs": {},
   "split_by_punct": false,
+  "stride": 0,
   "tokenizer_class": "DebertaV2Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]",
   "vocab_type": "spm"
 }