je veux effectuer un fine-tuning de BERT sur mon jeu de données qui est divisé en train.csv et test.csv afin de prédire les traits de personnalité à travers ce jeu de données. Cependant, dans mon code, j'ai rencontré un problème au niveau de la création du modèle.
voici mon code
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel
import re
from keras.callbacks import TensorBoard
from datetime import datetime
import os
# Charger les datasets d'entraînement et de test à partir de fichiers CSV
train_df = pd.read_csv('train.csv', encoding='latin-1')
test_df = pd.read_csv('test.csv', encoding='latin-1')
# Supprimer les URLs et les symboles
train_df['STATUS'] = train_df['STATUS'].apply(lambda x: re.sub(r'http\S+', '', x))
# Étendre les contractions
contractions = {
"ain't": "am not",
"aren't": "are not",
}
def expand_contractions(text):
for contraction, expansion in contractions.items():
text = text.replace(contraction, expansion)
return text
train_df['STATUS'] = train_df['STATUS'].apply(expand_contractions)
# Supprimer les phrases contenant moins de trois mots
train_df['word_count'] = train_df['STATUS'].apply(lambda x: len(x.split()))
train_df = train_df[train_df['word_count'] >= 3]
# Sélectionner les colonnes pertinentes pour l'entrée et les labels
tr_texts = train_df['STATUS'].tolist()
ts_texts = test_df['STATUS'].tolist()
tr_labels = train_df[['cEXT', 'cNEU', 'cAGR', 'cCON', 'cOPN']].values
ts_labels = test_df[['cEXT', 'cNEU', 'cAGR', 'cCON', 'cOPN']].values
tr_labels[tr_labels == 'y'] = 1
tr_labels[tr_labels == 'n'] = 0
ts_labels[ts_labels == 'y'] = 1
ts_labels[ts_labels == 'n'] = 0
tr_labels = tr_labels.astype('float32')
ts_labels = ts_labels.astype('float32')
# Définir la taille de la séquence et le nombre de classes
SEQ_LEN = 128
NUM_CLASSES = 5
# Charger le tokenizer RoBERTa
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# Tokenize the text data
train_tokenized = tokenizer(tr_texts, max_length=SEQ_LEN, padding='max_length', truncation=True)
test_tokenized = tokenizer(ts_texts, max_length=SEQ_LEN, padding='max_length', truncation=True)
# Get input IDs and attention masks
train_input_ids = np.array(train_tokenized['input_ids'])
train_attention_masks = np.array(train_tokenized['attention_mask'])
train_token_type_ids = np.array(train_tokenized['token_type_ids'])
test_input_ids = np.array(test_tokenized['input_ids'])
test_attention_masks = np.array(test_tokenized['attention_mask'])
test_token_type_ids = np.array(test_tokenized['token_type_ids'])
# Convert to TensorFlow tensors
train_input_ids = tf.convert_to_tensor(train_input_ids)
train_attention_masks = tf.convert_to_tensor(train_attention_masks)
train_token_type_ids = tf.convert_to_tensor(train_token_type_ids)
test_input_ids = tf.convert_to_tensor(test_input_ids)
test_attention_masks = tf.convert_to_tensor(test_attention_masks)
test_token_type_ids = tf.convert_to_tensor(test_token_type_ids)
# Charger BERT avec les couches non entraînables
bert_layer = TFBertModel.from_pretrained("bert-base-uncased")
# Créer le modèle BERT pour le fine-tuning
def create_bert_finetuning_model(roberta_layer, SEQ_LEN):
# Entrées
input_ids = tf.keras.layers.Input(shape=(SEQ_LEN,), dtype=tf.int32, name='input_ids')
attention_masks = tf.keras.layers.Input(shape=(SEQ_LEN,), dtype=tf.int32, name='attention_masks')
token_type_ids = tf.keras.layers.Input(shape=(SEQ_LEN,), dtype=tf.int32, name="token_type_ids")
# Encoder RoBERTa
bert_inputs = {"input_ids": input_ids, "attention_mask": attention_masks , "token_type_ids": token_type_ids}
bert_outputs = bert_layer(bert_inputs)
# Ajouter des couches supplémentaires pour la classification
pooled_output = bert_outputs.pooler_output
dense = tf.keras.layers.Dense(512, activation='relu')(pooled_output)
dropout = tf.keras.layers.Dropout(0.2)(dense)
output = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name='output')(dropout)
# Créer le modèle
model = tf.keras.Model(inputs=[input_ids, attention_masks,token_type_ids], outputs=output)
return model
# Créer le modèle BERT pour le fine-tuning
model_finetuned_bert = create_bert_finetuning_model(bert_layer, SEQ_LEN)
model_finetuned_bert.summary()
# Compiler le modèle pour le fine-tuning
model_finetuned_bert.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
loss="categorical_crossentropy",
metrics=["accuracy"]
)
checkpoint_path = '/content/drive/MyDrive/model_finetuned_roberta.h5'
checkpoint_dir=os.path.dirname(checkpoint_path)
tensorboard_callback = TensorBoard(log_dir=checkpoint_path)
# Record the start time
start = datetime.now()
# Train the model
train_history_en_both = model_finetuned_bert.fit(
[train_input_ids, train_attention_masks,train_token_type_ids],
tr_labels,
validation_data=([test_input_ids, test_attention_masks,test_token_type_ids], ts_labels),
epochs=2,
verbose=1,
batch_size=16,
callbacks=[tensorboard_callback]
)
voici le problème
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-10-6c3213ee1de1> in <cell line: 99>()
97
98 # Créer le modèle BERT pour le fine-tuning
---> 99 model_finetuned_bert = create_bert_finetuning_model(bert_layer, SEQ_LEN)
100 model_finetuned_bert.summary()
101
3 frames
<ipython-input-10-6c3213ee1de1> in create_bert_finetuning_model(roberta_layer, SEQ_LEN)
84 # Encoder RoBERTa
85 bert_inputs = {"input_ids": input_ids, "attention_mask": attention_masks , "token_type_ids": token_type_ids}
---> 86 bert_outputs = bert_layer(bert_inputs)
87
88 # Ajouter des couches supplémentaires pour la classification
/usr/local/lib/python3.10/dist-packages/tf_keras/src/utils/traceback_utils.py in error_handler(*args, **kwargs)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
/usr/local/lib/python3.10/dist-packages/transformers/modeling_tf_utils.py in run_call_with_unpacked_inputs(self, *args, **kwargs)
425 config = self.config
426
--> 427 unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
428 return func(self, **unpacked_inputs)
429
/usr/local/lib/python3.10/dist-packages/transformers/modeling_tf_utils.py in input_processing(func, config, **kwargs)
550 continue
551 else:
--> 552 raise ValueError(f"Data of type {type(v)} is not allowed only {allowed_types} is accepted for {k}.")
553 else:
554 if tf.is_tensor(main_input) or main_input is None:
ValueError: Exception encountered when calling layer 'tf_bert_model_4' (type TFBertModel).
Data of type <class 'keras.src.engine.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for input_ids.
Call arguments received by layer 'tf_bert_model_4' (type TFBertModel):
• input_ids={'input_ids': "<KerasTensor: shape=(None, 128) dtype=int32 (created by layer 'input_ids')>", 'attention_mask': "<KerasTensor: shape=(None, 128) dtype=int32 (created by layer 'attention_masks')>", 'token_type_ids': "<KerasTensor: shape=(None, 128) dtype=int32 (created by layer 'token_type_ids')>"}
• attention_mask=None
• token_type_ids=None
• position_ids=None
• head_mask=None
• inputs_embeds=None
• encoder_hidden_states=None
• encoder_attention_mask=None
• past_key_values=None
• use_cache=None
• output_attentions=None
• output_hidden_states=None
• return_dict=None
• training=False
ValueError: Exception encountered when calling layer 'tf_bert_model_4' (type TFBertModel).
Data of type <class 'keras.src.engine.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for input_ids.
Message qui te dit pour input_dfs, il n'attend un pas un type KerasTensor, mais un des autres types listés (et qui sera sans doute aussi valable pour les autres paramètres de l'appel)
Linaki, l'excellence garantie !
My website