| from transformers import PretrainedConfig | |
| class EATConfig(PretrainedConfig): | |
| model_type = "eat" | |
| def __init__( | |
| self, | |
| # --- 1. Core Architecture (Dimensions) --- | |
| embed_dim=768, | |
| depth=12, | |
| num_heads=12, | |
| mlp_ratio=4.0, | |
| num_classes=527, | |
| model_variant="pretrain", # or "finetune" | |
| # --- 2. Input & Patching --- | |
| in_chans=1, | |
| patch_size=16, | |
| stride=16, | |
| fixed_positions=True, | |
| # --- 3. Normalization & Bias Structure --- | |
| qkv_bias=True, | |
| layer_norm_first=False, | |
| norm_affine=True, | |
| norm_eps=1e-6, | |
| # --- 4. Fine-Tuning Knobs (Regularization) --- | |
| drop_rate=0.0, | |
| attn_drop_rate=0.0, | |
| activation_dropout=0.0, | |
| post_mlp_drop=0.0, | |
| start_drop_path_rate=0.0, | |
| end_drop_path_rate=0.0, | |
| # --- 5. Hugging Face Extras --- | |
| **kwargs, | |
| ): | |
| super().__init__(**kwargs) | |
| # --- 1. Core Architecture --- | |
| self.embed_dim = embed_dim | |
| self.depth = depth | |
| self.num_heads = num_heads | |
| self.mlp_ratio = mlp_ratio | |
| self.num_classes = num_classes | |
| self.model_variant = model_variant | |
| # --- 2. Input & Patching --- | |
| self.in_chans = in_chans | |
| self.patch_size = patch_size | |
| self.stride = stride | |
| self.fixed_positions = fixed_positions | |
| # --- 3. Normalization & Bias --- | |
| self.qkv_bias = qkv_bias | |
| self.layer_norm_first = layer_norm_first | |
| self.norm_affine = norm_affine | |
| self.norm_eps = norm_eps | |
| # --- 4. Regularization --- | |
| self.drop_rate = drop_rate | |
| self.attn_drop_rate = attn_drop_rate | |
| self.activation_dropout = activation_dropout | |
| self.post_mlp_drop = post_mlp_drop | |
| self.start_drop_path_rate = start_drop_path_rate | |
| self.end_drop_path_rate = end_drop_path_rate |