from transformers import TrainingArguments, Trainer
class TrainingConfig: def __init__(self, config): self.config = config def create_training_arguments(self): """创建训练参数""" return TrainingArguments( output_dir=self.config.get('output_dir', './fine-tuned-model'), num_train_epochs=self.config.get('num_train_epochs', 3), per_device_train_batch_size=self.config.get('per_device_train_batch_size', 8), per_device_eval_batch_size=self.config.get('per_device_eval_batch_size', 8), gradient_accumulation_steps=self.config.get('gradient_accumulation_steps', 1), optim=self.config.get('optim', 'adamw_torch'), save_steps=self.config.get('save_steps', 500), logging_steps=self.config.get('logging_steps', 10), learning_rate=self.config.get('learning_rate', 2e-5), weight_decay=self.config.get('weight_decay', 0.001), fp16=self.config.get('fp16', False), bf16=self.config.get('bf16', True), max_grad_norm=self.config.get('max_grad_norm', 1.0), max_steps=self.config.get('max_steps', -1), warmup_ratio=self.config.get('warmup_ratio', 0.03), group_by_length=self.config.get('group_by_length', True), length_column_name=self.config.get('length_column_name', 'length'), eval_steps=self.config.get('eval_steps', 500), eval_accumulation_steps=self.config.get('eval_accumulation_steps', 1), remove_unused_columns=False, report_to=self.config.get('report_to', 'tensorboard'), ddp_timeout=self.config.get('ddp_timeout', 1800), logging_first_step=self.config.get('logging_first_step', True), load_best_model_at_end=self.config.get('load_best_model_at_end', True), metric_for_best_model=self.config.get('metric_for_best_model', 'eval_loss'), greater_is_better=self.config.get('greater_is_better', False), save_total_limit=self.config.get('save_total_limit', 2), seed=self.config.get('seed', 42), data_seed=self.config.get('data_seed', 42), torchdynamo=self.config.get('torchdynamo', None), dataloader_drop_last=self.config.get('dataloader_drop_last', False), dataloader_num_workers=self.config.get('dataloader_num_workers', 0), dataloader_pin_memory=self.config.get('dataloader_pin_memory', True), dataloader_prefetch_factor=self.config.get('dataloader_prefetch_factor', 2), label_smoothing_factor=self.config.get('label_smoothing_factor', 0.0), adafactor=self.config.get('adafactor', False), jax_dynabatch_min=self.config.get('jax_dynabatch_min', 1), jax_dynabatch_max=self.config.get('jax_dynabatch_max', 32), jax_dynabatch_step=self.config.get('jax_dynabatch_step', 1), jax_allow_mismatched_shapes=self.config.get('jax_allow_mismatched_shapes', False), torch_empty_cache_freq=self.config.get('torch_empty_cache_freq', 0), neftune_noise_alpha=self.config.get('neftune_noise_alpha', 0.0), hub_model_id=self.config.get('hub_model_id', None), hub_private_repo=self.config.get('hub_private_repo', False), push_to_hub=self.config.get('push_to_hub', False), hub_strategy=self.config.get('hub_strategy', 'checkpoint'), hub_always_push=self.config.get('hub_always_push', False), hub_revision=self.config.get('hub_revision', 'main'), hub_token=self.config.get('hub_token', None), deepspeed=self.config.get('deepspeed', None), fsdp=self.config.get('fsdp', ''), fsdp_config=self.config.get('fsdp_config', None), fsdp_transformer_layer_cls_to_wrap=self.config.get('fsdp_transformer_layer_cls_to_wrap', None), accelerator_config=self.config.get('accelerator_config', None), kwargs_handlers=self.config.get('kwargs_handlers', None), do_train=self.config.get('do_train', True), do_eval=self.config.get('do_eval', True), do_predict=self.config.get('do_predict', False), evaluation_strategy=self.config.get('evaluation_strategy', 'no'), prediction_loss_only=self.config.get('prediction_loss_only', False), ignore_data_skip=self.config.get('ignore_data_skip', False), dataloader_sort_fn=self.config.get('dataloader_sort_fn', None), skip_memory_metrics=self.config.get('skip_memory_metrics', True), push_to_hub_token=self.config.get('push_to_hub_token', None), resume_from_checkpoint=self.config.get('resume_from_checkpoint', None), metric_for_best_model_value=self.config.get('metric_for_best_model_value', None), include_inputs_for_metrics=self.config.get('include_inputs_for_metrics', False), fp16_full_eval=self.config.get('fp16_full_eval', False), ddp_find_unused_parameters=self.config.get('ddp_find_unused_parameters', None), evaluation_strategy=self.config.get('evaluation_strategy', 'no'), eval_accumulation_steps=self.config.get('eval_accumulation_steps', 1) )
|