@@ -165,10 +165,10 @@ def forward(self, x, mask):
165
165
@register_model_architecture ('transformer' , 'transformer' )
166
166
def base_architecture (args ):
167
167
args .encoder_embed_path = getattr (args , 'encoder_embed_path' , None )
168
- args .encoder_embed_dim = getattr (args , 'encoder_embed_dim' , 512 )
169
- args .encoder_ffn_embed_dim = getattr (args , 'encoder_ffn_embed_dim' , 2048 )
170
- args .encoder_layers = getattr (args , 'encoder_layers' , 1 )
171
- args .encoder_attention_heads = getattr (args , 'encoder_attention_heads' , 4 )
168
+ args .encoder_embed_dim = getattr (args , 'encoder_embed_dim' , 128 )
169
+ args .encoder_ffn_embed_dim = getattr (args , 'encoder_ffn_embed_dim' , 512 )
170
+ args .encoder_layers = getattr (args , 'encoder_layers' , 6 )
171
+ args .encoder_attention_heads = getattr (args , 'encoder_attention_heads' , 8 )
172
172
args .encoder_normalize_before = getattr (args , 'encoder_normalize_before' , False )
173
173
args .encoder_learned_pos = getattr (args , 'encoder_learned_pos' , False )
174
174
args .attention_dropout = getattr (args , 'attention_dropout' , 0. )
0 commit comments