"""This module contains the EmotionClassifiers class, which is designed to categorise input texts into emotion categories.It uses a Huggingface transformer model trained on Bert_Large by default.The EmotionClassifiers class has the following method:@method:recognize_emotion: Adding an emotion category.@method:recognize_average_emotion_from_multiple_models: Adding an average emotion category or the most likely emotion category using multiple models."""fromaniemore.recognizers.textimportTextRecognizerfromaniemore.modelsimportHuggingFaceModelimporttorchimportpandasaspdfromtqdmimporttqdmimportgc
[документация]classEmotionRecognizer:""" This class is designed to categorize input texts into emotion categories. Attributes: - model: This attribute holds the model used for emotion recognition. It defaults to HuggingFaceModel.Text.Bert_Large, but can be set to any other compatible model during the instantiation of the class. - device: The device to use for inference. It automatically selects 'cuda' (GPU) if a compatible GPU is available and CUDA is enabled, otherwise, it falls back to 'cpu'. - text: The text to be analyzed. - df: The DataFrame containing the text to be analyzed. - text_column: The name of the column containing the text to be analyzed. """def__init__(self,model_name=HuggingFaceModel.Text.Bert_Tiny,device='cpu'):self.device=deviceself.model_name=model_name# Define the default model names to avoid repeated initializationself.default_model_names=[HuggingFaceModel.Text.Bert_Tiny,HuggingFaceModel.Text.Bert_Base,HuggingFaceModel.Text.Bert_Large,HuggingFaceModel.Text.Bert_Tiny2,]self.recognizer=TextRecognizer(model=self.model_name,device=self.device)#def init_base_recognizer(self):# self.recognizer = TextRecognizer(model=self.model_name, device=self.device)
[документация]defrecognize_emotion(self,text):""" Return the emotion for a given text. """emotion=self.recognizer.recognize(text,return_single_label=True)returnemotion
[документация]defrecognize_average_emotion_from_multiple_models(self,df,text_column,models=None,average=True):""" Calculate the prevailing emotion using multiple models for a DataFrame column. """ifmodelsisNone:models=self.default_model_nameselse:# Validate that the provided models are in the default models listformodelinmodels:ifmodelnotinself.default_model_names:raiseValueError(f"Model {model} is not a valid model. Valid models are: {self.default_model_names}")# Initialize scores DataFramescores=pd.DataFrame(0,index=df.index,columns=["happiness","sadness","anger","fear","disgust","enthusiasm","neutral"])# Process each model one by one with progress barformodel_nameintqdm(models,desc="Processing models"):try:print(f"Clearing cache and collecting garbage before loading model: {model_name}")torch.cuda.empty_cache()gc.collect()print(f"Loading model: {model_name}")recognizer=TextRecognizer(model=model_name,device=self.device)model_results=[recognizer.recognize(text,return_single_label=False)fortextindf[text_column]]foridx,resultinenumerate(model_results):foremotion,scoreinresult.items():ifaverage:scores.at[df.index[idx],emotion]+=scoreelse:scores.at[df.index[idx],emotion]=max(scores.at[df.index[idx],emotion],score)# Удаление модели из памятиdelrecognizertorch.cuda.empty_cache()# Очистка кеша CUDA (если используется GPU)gc.collect()# Сборка мусораprint(f"Model {model_name} processed and unloaded.")exceptExceptionase:print(f"Error processing model {model_name}: {e}")torch.cuda.empty_cache()gc.collect()ifaverage:# Average the scores by the number of modelsscores=scores.div(len(models))# Determine the prevailing emotion with the highest scoreprevailing_emotions=scores.idxmax(axis=1)returnprevailing_emotions