From f46e4d46ce226db800a423719f4934a7f7240ff0 Mon Sep 17 00:00:00 2001 From: Mauliana Mauliana <mauliana@euporie.informatik.uni-kl.de> Date: Wed, 19 Apr 2023 18:34:59 +0200 Subject: [PATCH] fix error regarding to time in publishing to robot --- cpp/first_interaction_flant5.xml | 102 ++--- cpp/gVerbalCues.cpp | 189 +++++++++ cpp/mFirstInteractionT5.cpp | 637 +++++++++++++++++-------------- cpp/mFirstInteractionT5.h | 6 +- 4 files changed, 592 insertions(+), 342 deletions(-) create mode 100644 cpp/gVerbalCues.cpp diff --git a/cpp/first_interaction_flant5.xml b/cpp/first_interaction_flant5.xml index 9775acd..f2661d8 100644 --- a/cpp/first_interaction_flant5.xml +++ b/cpp/first_interaction_flant5.xml @@ -10,79 +10,81 @@ <SUI_dialog> <tts module="TextToSpeechHandler" remoteport="input_text_to_speech" /> - <port name="speaker" module="TextToSpeechHandler" remoteport="speaker_name" type="string" value="Graham" /> + <port name="speaker" module="TextToSpeechHandler" remoteport="speaker_name" type="string" value="Lucy" /> <port name="activate_gesture" module="PoseHandler" remoteport="activate_gesture" type="int" value="1" /> <port name="in_pose_file_name" module="PoseHandler" remoteport="in_pose_file_name" type="string" /> <port name="generated_sentence" module="FirstInteraction" remoteport="generated_sentence" type="string"/> - <port name="sentiment" module="FirstInteraction" remoteport="sentiment" type="string"/> + <port name="sentimentT5" module="FirstInteraction" remoteport="sentimentT5" type="string"/> <mark name="greetings" /> <set_port name="in_pose_file_name" value ="Smile" /> - <prompt> Hello there! Welcome to our research lab. My name is Emah! </prompt> - <wait time="2000000" /> + <prompt> Hello there! Welcome to our research lab. My name is Emah!</prompt> + <goto mark="filler1" /> + +<mark name="filler1" /> + <wait time="11000000"/> + <set_port name="in_pose_file_name" value ="Smile" /> + <prompt> It is really nice that you are interested in talking to me </prompt> <goto mark="emo_check" /> - -<mark name="emo_check" /> - <set_port name="activate_primitive_gestures" value="0" /> - <set_port name="activate_action_units" value="0" /> - <if port="sentiment" value="8" relation="==">"joy"<goto mark="conv_happy" /></if> - <elseif port="sentiment" value="9" relation="==">"anger"<goto mark="conv_anger" /></elseif> - <elseif port="sentiment" value="10" relation="==">"disgust"<goto mark="conv_disgust" /></elseif> - <elseif port="sentiment" value="11" relation="==">"fear"<goto mark="conv_fear" /></elseif> - <elseif port="sentiment" value="12" relation="==">"sadness"<goto mark="conv_sad" /></elseif> - <elseif port="sentiment" value="13" relation="==">"surprise"<goto mark="conv_surprise" /></elseif> - <else>""<goto mark="conv_neutral" /></else> - -<mark name="conv_happy" /> - <set_port name="in_pose_file_name" value = "Smile" /> +<mark name="emo_check" /> + <wait time="900000" /> + <if port="sentimentT5" value="joy" relation="=="><goto mark="conv_happy" /></if> + <elseif port="sentimentT5" value="anger" relation="=="><goto mark="conv_anger" /></elseif> + <elseif port="sentimentT5" value="disgust" relation="=="><goto mark="conv_disgust" /></elseif> + <elseif port="sentimentT5" value="fear" relation="=="><goto mark="conv_fear" /></elseif> + <elseif port="sentimentT5" value="sadness" relation="=="><goto mark="conv_sad" /></elseif> + <elseif port="sentimentT5" value="surprise" relation="=="><goto mark="conv_surprise" /></elseif> + <elseif port="sentimentT5" value="neutral" relation="=="><goto mark="conv_neutral" /></elseif> + <elseif port="sentimentT5" value=" " relation="=="><goto mark="filler2" /></elseif> + <goto mark="emo_check" /> + +<mark name="listening"/> + <wait time="100000" /> + <goto mark="emo_check" /> + +<mark name="conv_happy" /> + <wait time="100000" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> + <goto mark="listening" /> -<mark name="conv_anger" /> - <set_port name="in_pose_file_name" value = "Frown" /> +<mark name="conv_anger" /> + <wait time="8000000" /> + <set_port name="in_pose_file_name" value = "Frown" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> + <goto mark="listening" /> -<mark name="conv_disgust" /> - <set_port name="in_pose_file_name" value = "Surprise" /> - <set_port name="in_pose_file_name" value = "Shrug" /> +<mark name="conv_disgust" /> + <wait time="8000000" /> + <set_port name="in_pose_file_name" value = "Nosey" /> + <wait time="100000" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> + <goto mark="listening" /> -<mark name="conv_fear" /> +<mark name="conv_fear" /> + <wait time="8000000" /> <set_port name="in_pose_file_name" value = "Surprise" /> + <wait time="100000" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> + <goto mark="listening" /> -<mark name="conv_sad" /> - <set_port name="in_pose_file_name" value = "Frown" /> +<mark name="conv_sad" /> + <wait time="8000000" /> + <set_port name="in_pose_file_name" value = "Thoughtful" /> + <wait time="100000" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> + <goto mark="listening" /> -<mark name="conv_surprise" /> +<mark name="conv_surprise" /> + <wait time="8000000" /> <set_port name="in_pose_file_name" value = "Surprise" /> + <wait time="500000" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> + <goto mark="listening" /> -<mark name="conv_neutral" /> - <set_port name="in_pose_file_name" value = "Thoughtful" /> +<mark name="conv_neutral" /> <prompt> %generated_sentence </prompt> - <wait time="2000000" /> - <goto mark="emo_check" /> - -<mark name="detect_response"/> - <set_port name="in_pose_file_name" value ="Thoughtful" /> - <set_port name="in_pose_file_name" value ="point" /> - <prompt> %generated_sentence </prompt> - <wait time="10000000" /> - <goto mark="greetings" /> + <goto mark="listening" /> </SUI_dialog> </dialog> diff --git a/cpp/gVerbalCues.cpp b/cpp/gVerbalCues.cpp new file mode 100644 index 0000000..69dda6f --- /dev/null +++ b/cpp/gVerbalCues.cpp @@ -0,0 +1,189 @@ +// +// You received this file as part of Finroc +// A framework for intelligent robot control +// +// Copyright (C) AG Robotersysteme TU Kaiserslautern +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +//---------------------------------------------------------------------- +/*!\file projects/emah/verbal_cues/gVerbalCues.cpp + * + * \author Sarwar Paplu + * + * \date 2021-09-13 + * + */ +//---------------------------------------------------------------------- +#include "projects/emah/verbal_cues/gVerbalCues.h" + +//---------------------------------------------------------------------- +// External includes (system with <>, local with "") +//---------------------------------------------------------------------- +#include "plugins/scheduling/tThreadContainerElement.h" + +//#include "libraries/speech_recognition/julius_app/mSpeechToTextDNN.h" +#include "libraries/speech_recognition/mSpeechRecognition.h" +#include "libraries/natural_language_processing/mTextSentiment.h" +#include "libraries/natural_language_processing/mNamedEntityRecognition.h" +#include "libraries/natural_language_processing/mBinaryRelationDetector.h" +#include "libraries/natural_language_processing/mPartsOfSpeechTagger.h" +#include "libraries/natural_language_processing/mWikipediaSearch.h" +#include "libraries/natural_language_processing/mTranslateEN2DE.h" +#include "libraries/natural_language_processing/mGrammarChecker.h" +#include "libraries/natural_language_processing/mVocabularySkills.h" +#include "libraries/natural_language_processing/mSpeechGenerator.h" +#include "libraries/natural_language_processing/mTemporalContext.h" +#include "libraries/natural_language_processing/mTranscribedTextGrabber.h" +#include "libraries/natural_language_processing/mWeatherConditions.h" +#include "libraries/natural_language_processing/mBasicMathsSkills.h" +#include "libraries/natural_language_processing/mCovidStatistics.h" +#include "libraries/natural_language_processing/mGuessNameOfCountry.h" +#include "libraries/natural_language_processing/mAudioCommands.h" + +#include "libraries/chatbot_system/mChatDistributeInput.h" +#include "projects/emah/verbal_cues/mFirstInteractionT5.h" + +//---------------------------------------------------------------------- +// Internal includes with "" +//---------------------------------------------------------------------- +//#include "projects/emah/verbal_cues/mAuditoryMemory.h" + +//---------------------------------------------------------------------- +// Debugging +//---------------------------------------------------------------------- +#include <cassert> + +//---------------------------------------------------------------------- +// Namespace usage +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// Namespace declaration +//---------------------------------------------------------------------- +namespace finroc +{ +namespace emah +{ +namespace verbal_cues +{ + +//---------------------------------------------------------------------- +// Forward declarations / typedefs / enums +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// Const values +//---------------------------------------------------------------------- +#ifdef _LIB_FINROC_PLUGINS_RUNTIME_CONSTRUCTION_ACTIONS_PRESENT_ +static const runtime_construction::tStandardCreateModuleAction<gVerbalCues> cCREATE_ACTION_FOR_G_VERBALCUES("VerbalCues"); +#endif + +//---------------------------------------------------------------------- +// Implementation +//---------------------------------------------------------------------- + +//---------------------------------------------------------------------- +// gVerbalCues constructor +//---------------------------------------------------------------------- +gVerbalCues::gVerbalCues(core::tFrameworkElement *parent, const std::string &name, + const std::string &structure_config_file) : + tSenseControlGroup(parent, name, structure_config_file, false) +{ + auto speech_recog_api = new scheduling::tThreadContainerElement<finroc::speech_recognition::mSpeechRecognition> (this, "SpeechRecognition"); + + auto first_interact = new finroc::emah::verbal_cues::mFirstInteractionT5(this , "FirstInteraction"); + + + auto chat_input_distribute = new finroc::chatbot_system::mChatDistributeInput(this); + auto speech_generator = new finroc::natural_language_processing::mSpeechGenerator(this); + + auto wiki = new scheduling::tThreadContainerElement<finroc::natural_language_processing::mWikipediaSearch>(this, "WikipediaSearch"); + auto translate_en_de = new scheduling::tThreadContainerElement<finroc::natural_language_processing::mTranslateEN2DE>(this, "TranslateEN2DE"); + auto maths_op = new finroc::natural_language_processing::mBasicMathsSkills(this); + auto pos_tagging = new scheduling::tThreadContainerElement<finroc::natural_language_processing::mPartsOfSpeechTagger> (this, "PartsOfSpeechTagger"); + auto grammar_checker = new scheduling::tThreadContainerElement<finroc::natural_language_processing::mGrammarChecker> (this, "GrammarChecker"); + auto vocab_skills = new scheduling::tThreadContainerElement<finroc::natural_language_processing::mVocabularySkills> (this, "VocabularySkills"); + //auto speech_to_text = new scheduling::tThreadContainerElement<finroc::speech_recognition::mSpeechToText>(this, "SpeechToText"); + //auto speech_to_text_dnn = new scheduling::tThreadContainerElement<finroc::speech_recognition::mSpeechToTextDNN>(this, "SpeechToTextDNN"); + auto text_sentiment = new finroc::natural_language_processing::mTextSentiment(this); + auto named_entity = new finroc::natural_language_processing::mNamedEntityRecognition(this); + auto binary_rel = new finroc::natural_language_processing::mBinaryRelationDetector(this); + new finroc::natural_language_processing::mTranscribedTextGrabber(this); + auto temporal_context = new finroc::natural_language_processing::mTemporalContext(this); + auto weather = new finroc::natural_language_processing::mWeatherConditions(this); + auto covidstatistics = new finroc::natural_language_processing::mCovidStatistics(this); + auto guess_country = new finroc::natural_language_processing::mGuessNameOfCountry(this); + + //auto auditory_mem = new mAuditoryMemory(this); + auto cmd_verbal_cues = new finroc::natural_language_processing::mAudioCommands(this); + + this->GetControllerInputs().ConnectByName(speech_generator->GetControllerInputs(), false, NULL, -1); + speech_generator->GetSensorOutputs().ConnectByName(this->GetSensorOutputs(), true, NULL, -1); + //this->GetControllerInputs().ConnectByName(auditory_mem->GetControllerInputs(), false, NULL, -1); + //auditory_mem->GetSensorOutputs().ConnectByName(this->GetSensorOutputs(), true, NULL, -1); + + this->GetSensorInputs().ConnectByName(speech_generator->GetSensorInputs(), false, NULL, -1); // for verification + + //speech_to_text_dnn->out_text_julius_second_pass.ConnectTo(chat_input_distribute->in_text_speech_recog); + speech_recog_api->output_text_speech_recog_API.ConnectTo(chat_input_distribute->in_text_speech_recog); + + speech_recog_api->output_text_speech_recog_API.ConnectTo(first_interact->in_speech_text); + chat_input_distribute->out_user_text_raw.ConnectTo(first_interact->in_speech_text); +// first_interact->out_speech_text.ConnectTo(first_interact->in_speech_text); + +// first_interact->generated_sentence.ConnectTo(text_sentiment->in_user_text_raw); +// text_sentiment->out_text_emotion.ConnectTo(first_interact->sentiment); + + + chat_input_distribute->out_user_text_raw.ConnectTo(text_sentiment->in_user_text_raw); + chat_input_distribute->out_user_text_raw.ConnectTo(named_entity->in_user_text_raw); + chat_input_distribute->out_user_text_raw.ConnectTo(binary_rel->in_user_text_raw); + chat_input_distribute->out_user_text_raw.ConnectTo(pos_tagging->in_user_text_raw); + chat_input_distribute->out_user_text_raw.ConnectTo(speech_generator->input_speech_generator); + //chat_input_distribute->out_user_text_raw.ConnectTo(auditory_mem->input_auditory_memory); + chat_input_distribute->out_user_text_raw.ConnectTo(maths_op->in_prompt_maths); + chat_input_distribute->out_user_text_raw.ConnectTo(guess_country->input_country_name); + + //speech_to_text_dnn->out_text_julius_second_pass.ConnectTo(wiki->in_text_speech_recog); + chat_input_distribute->out_user_text_raw.ConnectTo(wiki->input_text_wiki); + chat_input_distribute->out_user_text_raw.ConnectTo(translate_en_de->input_text_translate); + chat_input_distribute->out_user_text_raw.ConnectTo(grammar_checker->in_text_grammar_chk); + chat_input_distribute->out_user_text_raw.ConnectTo(cmd_verbal_cues->in_text_speech_recog); + chat_input_distribute->out_user_text_raw.ConnectTo(vocab_skills->in_text_vocab_skill); + + cmd_verbal_cues->out_activate_grammar_check.ConnectTo(grammar_checker->activate_grammar_check); // test + + temporal_context->GetControllerOutputs().ConnectByName(speech_generator->GetControllerInputs(), true, NULL, -1); + weather->GetControllerOutputs().ConnectByName(speech_generator->GetControllerInputs(), true, NULL, -1); + maths_op->GetControllerOutputs().ConnectByName(speech_generator->GetControllerInputs(), true, NULL, -1); + covidstatistics->GetControllerOutputs().ConnectByName(speech_generator->GetControllerInputs(), true, NULL, -1); + pos_tagging->GetControllerOutputs().ConnectByName(speech_generator->GetControllerInputs(), true, NULL, -1); + named_entity->GetControllerOutputs().ConnectByName(speech_generator->GetControllerInputs(), false, NULL, -1); + //named_entity->GetControllerOutputs().ConnectByName(auditory_mem->GetControllerInputs(), false, NULL, -1); +} + +//---------------------------------------------------------------------- +// gVerbalCues destructor +//---------------------------------------------------------------------- +gVerbalCues::~gVerbalCues() +{} + +//---------------------------------------------------------------------- +// End of namespace declaration +//---------------------------------------------------------------------- +} +} +} diff --git a/cpp/mFirstInteractionT5.cpp b/cpp/mFirstInteractionT5.cpp index d0126c7..4739ab3 100644 --- a/cpp/mFirstInteractionT5.cpp +++ b/cpp/mFirstInteractionT5.cpp @@ -41,7 +41,7 @@ // Debugging //---------------------------------------------------------------------- #include <cassert> - +#include <unistd.h> //---------------------------------------------------------------------- // Namespace usage //---------------------------------------------------------------------- @@ -137,12 +137,13 @@ void mFirstInteractionT5::Sense() void TraverseString(std::string &str, int N) { - // Traverse the string - for (int i = 0; i < N; i++) { + // Traverse the string + for (int i = 0; i < N; i++) + { - // Print current character - std::cout<< str[i]<< " "; - } + // Print current character + std::cout << str[i] << " "; + } } @@ -154,190 +155,227 @@ void mFirstInteractionT5::Control() { if (this->ControllerInputChanged()) { - std::string post_text; - std::string pre_text; - std::string emo_text; - std::string AUDIO_FAILED("AUDIO_FAILED"); - std::string default_text = "Sorry, I cannot hear you, could you say it again?"; - - while(this->activate_sr.Get()) - { - std::string input_sr = "python ~/finroc/sources/cpp/projects/emah/verbal_cues/etc/sr.py"; - - FILE *process; - char buff[1024]; - std::cout << "Speak after counting to 5" << std::endl; - process = popen(input_sr.c_str(), "r"); - while (fgets(buff, sizeof(buff), process)) - { - out_speech_text.Publish(buff); - } - - if (this->activate_model_t5.Get()) - { - std::string model_path = "$FINROC_HOME/sources/cpp/projects/emah/verbal_cues/etc/model/t5-onnx"; - std::string model_name = "t5"; - - FILE *process; - char buff[1024]; - auto value = in_speech_text.GetPointer(); - std::string input_message(*value); - input_message.pop_back(); - - - if(!input_message.empty()){ - if(input_message.compare(AUDIO_FAILED) == 0) { - std::cout << default_text << std::endl; - generated_sentence.Publish(default_text, rrlib::time::Now()); - sentiment.Publish("neutral", rrlib::time::Now()); - }else{ - // preprocess the text before sending to model - pre_text = preprocess(input_message); - - std::cout << model_name << std::endl; - std::cout << "current input: "+pre_text << std::endl; - - std::string input_cmd = "python ~/finroc/sources/cpp/projects/emah/verbal_cues/etc/generate_response_onnx.py --text=\"" + pre_text + "\" --model_path=\"" + model_path + "\" --model_name=\"" + model_name + "\""; - - process = popen(input_cmd.c_str(), "r"); - - while (fgets(buff, sizeof(buff), process)) - { - // post-process generated sentence - std::pair<std::string, std::string> post = postprocess(buff); - post_text = post.first; - emo_text = post.second; - - std::cout << "post-process text: "+post_text << std::endl; - std::cout << "emotion: "+emo_text << std::endl; - generated_sentence.Publish(post_text, rrlib::time::Now()); - sentiment.Publish(emo_text, rrlib::time::Now()); - } - pclose(process); - } - - } - - // print history - std::vector<std::string> story; - if (conv_history.empty()) story = hist; - else{ - story = conv_history; - story.insert(story.end(), hist.begin(), hist.end()); - } - - int j = 0; - std::string concate_history; - - std::cout << "----Conv. History----" << std::endl; - for (const std::string& i : story){ - std::cout << i << '\n'; - if(j == 0){ - concate_history = " Human: "+i; - j++; - }else{ - if (j % 2 == 0) concate_history = concate_history+"\n Human: "+i; - else concate_history = concate_history+"\n Robot: "+i; - - j++; - } - } - std::cout << "---------------------" << std::endl; - history.Publish(concate_history, rrlib::time::Now()); - - } - else if (this->activate_model_flan_t5.Get()) - { - std::string model_path = "$FINROC_HOME/sources/cpp/projects/emah/verbal_cues/etc/model/flan-t5-onnx"; - std::string model_name = "flan-t5"; - - FILE *process; - char buff[1024]; - auto value = in_speech_text.GetPointer(); - std::string input_message(*value); - input_message.pop_back(); - - if(!input_message.empty()){ - if(input_message.compare(AUDIO_FAILED) == 0){ - std::cout << default_text << std::endl; - generated_sentence.Publish(default_text, rrlib::time::Now()); - sentiment.Publish("neutral", rrlib::time::Now()); - }else { - // preprocess the text before sending to model - pre_text = preprocess(input_message); - - std::cout << model_name << std::endl; - std::cout << "current input: "+pre_text << std::endl; - - std::string input_cmd = "python ~/finroc/sources/cpp/projects/emah/verbal_cues/etc/generate_response_onnx.py --text=\"" + pre_text + "\" --model_path=\"" + model_path + "\" --model_name=\"" + model_name + "\""; - - process = popen(input_cmd.c_str(), "r"); - - while (fgets(buff, sizeof(buff), process)) - { - // post-process generated sentence - std::pair<std::string, std::string> post = postprocess(buff); - post_text = post.first; - emo_text = post.second; - - std::cout << "post-process text: "+post_text << std::endl; - std::cout << "emotion: "+emo_text << std::endl; - generated_sentence.Publish(post_text, rrlib::time::Now()); - sentiment.Publish(emo_text, rrlib::time::Now()); - } - pclose(process); - } - - } - - int j = 0; - std::string concate_history; - - // print history - std::vector<std::string> story; - if (conv_history.empty()) story = hist; - else{ - story = conv_history; - story.insert(story.end(), hist.begin(), hist.end()); - } - - std::cout << "=====Conv. History======" << std::endl; - for (const std::string& i : story){ - std::cout << i << '\n'; - if(j == 0){ - concate_history = " Human: "+i; - j++; - }else{ - if (j % 2 == 0) concate_history = concate_history+"\n Human: "+i; - else concate_history = concate_history+"\n Robot: "+i; - j++; - } - } - std::cout << "========================" << std::endl; - history.Publish(concate_history, rrlib::time::Now()); - } - } + std::string post_text; + std::string pre_text; + std::string emo_text; + std::string AUDIO_FAILED("AUDIO_FAILED"); + std::string default_text = "Sorry, I cannot hear you, could you say it again?"; + +// while(this->activate_sr.Get()) +// { +// std::string input_sr = "python ~/finroc/sources/cpp/projects/emah/verbal_cues/etc/sr.py"; +// +// FILE *process; +// char buff[1024]; +// std::cout << "Speak after counting to 5" << std::endl; +// process = popen(input_sr.c_str(), "r"); +// while (fgets(buff, sizeof(buff), process)) +// { +// out_speech_text.Publish(buff); +// } + + if (this->activate_model_t5.Get()) + { + std::string model_path = "$FINROC_HOME/sources/cpp/projects/emah/verbal_cues/etc/model/t5-onnx"; + std::string model_name = "t5"; + + FILE *process; + char buff[1024]; + auto value = in_speech_text.GetPointer(); + std::string input_message(*value); + + if (!input_message.empty()) + { + input_message.pop_back(); + if (input_message.compare(AUDIO_FAILED) == 0) + { + std::cout << default_text << std::endl; +// sentimentT5.Publish("neutral", rrlib::time::Now()); +// generated_sentence.Publish(default_text, rrlib::time::Now()); + } + else + { + // preprocess the text before sending to model + pre_text = preprocess(input_message); + + std::cout << model_name << std::endl; + std::cout << "current input: " + pre_text << std::endl; + + std::string input_cmd = "python ~/finroc/sources/cpp/projects/emah/verbal_cues/etc/generate_response_onnx.py --text=\"" + pre_text + "\" --model_path=\"" + model_path + "\" --model_name=\"" + model_name + "\""; + + process = popen(input_cmd.c_str(), "r"); + + while (fgets(buff, sizeof(buff), process)) + { + // post-process generated sentence + std::pair<std::string, std::string> post = postprocess(buff); + post_text = post.first; + emo_text = post.second; + emo_text.pop_back(); + + std::cout << "post-process text: " + post_text << std::endl; + std::cout << "emotion: " + emo_text << std::endl; + + + } + sentimentT5.Publish(emo_text); + generated_sentence.Publish(post_text); + usleep(10000); + pclose(process); + } + + } + + // print history + std::vector<std::string> story; + if (conv_history.empty()) story = hist; + else + { + story = conv_history; + story.insert(story.end(), hist.begin(), hist.end()); + } + + int j = 0; + std::string concate_history; + + std::cout << "----Conv. History----" << std::endl; + for (const std::string & i : story) + { + std::cout << i << '\n'; + if (j == 0) + { + concate_history = " Human: " + i; + j++; + } + else + { + if (j % 2 == 0) concate_history = concate_history + "\n Human: " + i; + else concate_history = concate_history + "\n Robot: " + i; + + j++; + } + } + std::cout << "---------------------" << std::endl; + history.Publish(concate_history, rrlib::time::Now()); + + } + else if (this->activate_model_flan_t5.Get()) + { + std::string model_path = "$FINROC_HOME/sources/cpp/projects/emah/verbal_cues/etc/model/flan-t5-onnx"; + std::string model_name = "flan-t5"; + + FILE *process; + char buff[1024]; + auto value = in_speech_text.GetPointer(); + std::string input_message(*value); + + if (!input_message.empty()) + { + input_message.pop_back(); + if (input_message.compare(AUDIO_FAILED) == 0) + { + std::cout << default_text << std::endl; +// sentimentT5.Publish("neutral", rrlib::time::Now()); +// generated_sentence.Publish(default_text, rrlib::time::Now()); + } + else + { + // preprocess the text before sending to model + pre_text = preprocess(input_message); + + std::cout << model_name << std::endl; + std::cout << "current input: " + pre_text << std::endl; + + std::string input_cmd = "python ~/finroc/sources/cpp/projects/emah/verbal_cues/etc/generate_response_onnx.py --text=\"" + pre_text + "\" --model_path=\"" + model_path + "\" --model_name=\"" + model_name + "\""; + + process = popen(input_cmd.c_str(), "r"); + + while (fgets(buff, sizeof(buff), process)) + { + // post-process generated sentence + std::pair<std::string, std::string> post = postprocess(buff); + post_text = post.first; + emo_text = post.second; + emo_text.pop_back(); + + std::cout << "post-process text: " + post_text << std::endl; + std::cout << "emotion: " + emo_text << std::endl; + std::cout << "emo_length: " << emo_text.length()<< std::endl; + + } +// auto now = rrlib::time::Now(); + sentimentT5.Publish(emo_text); + generated_sentence.Publish(post_text); + FINROC_LOG_PRINT(ERROR, "PUBLISHING ", post_text); + usleep(10000); + pclose(process); + } + + } + + int j = 0; + std::string concate_history; + + // print history + std::vector<std::string> story; + if (conv_history.empty()) story = hist; + else + { + story = conv_history; + story.insert(story.end(), hist.begin(), hist.end()); + } + + std::cout << "=====Conv. History======" << std::endl; + for (const std::string & i : story) + { + std::cout << i << '\n'; + if (j == 0) + { + concate_history = " Human: " + i; + j++; + } + else + { + if (j % 2 == 0) concate_history = concate_history + "\n Human: " + i; + else concate_history = concate_history + "\n Robot: " + i; + j++; + } + } + std::cout << "========================" << std::endl; + history.Publish(concate_history, rrlib::time::Now()); + } +// } + }else{ + if(*sentimentT5.GetPointer().Get() != ""){ + usleep(1000000); + sentimentT5.Publish(""); + generated_sentence.Publish(""); + FINROC_LOG_PRINT(ERROR, "PUBLISHING EMPTY SENTENCE"); + } } } std::string mFirstInteractionT5::preprocess(std::string input) { - // store user input to history - hist.push_back(input); + // store user input to history + hist.push_back(input); - int size = hist.size(); - std::string concat_input_hist = input; + int size = hist.size(); + std::string concat_input_hist = input; - // concate the current given sentence with the previous utterence - if(size > 2){ - concat_input_hist = hist.at(size-3)+' '+hist.at(size-2)+' '+hist.at(size-1); - }else if(size > 1){ - concat_input_hist = hist.at(size-2)+' '+hist.at(size-1); - } + // concate the current given sentence with the previous utterence + if (size > 2) + { + concat_input_hist = hist.at(size - 3) + ' ' + hist.at(size - 2) + ' ' + hist.at(size - 1); + } + else if (size > 1) + { + concat_input_hist = hist.at(size - 2) + ' ' + hist.at(size - 1); + } return concat_input_hist; } @@ -345,141 +383,162 @@ std::string mFirstInteractionT5::preprocess(std::string input) std::pair<std::string, std::string> mFirstInteractionT5::postprocess(std::string sentence) { - // split generated text with its sentiment label - std::string delimiter = "<emo>"; - std::string text = sentence.substr(0, sentence.find(delimiter)); - std::string emo = sentence.substr(text.length()+5); - - // get the history size - int size = hist.size(); - std::cout << "generated text: "+text << std::endl; - if(size > 2){ - if(is_similar(clean_text(text), clean_text(hist.at(size-2)))){ - std::cout << "same content more than 60% is detected: "+text << std::endl; - - if(conv_history.empty()) conv_history = hist; - else conv_history.insert(conv_history.end(), hist.begin(), hist.end()); - - // empty history - hist.clear(); - - // Start new conversation with a new topic - // Initialize the random number generator with a seed - std::mt19937 rng(std::random_device{}()); - - // Define the distribution for generating random indices - std::uniform_int_distribution<std::size_t> dist(0, question_list.size() - 1); - - // Generate a random index - std::size_t random_index = dist(rng); - - // Access the randomly selected option - std::string selected_option = question_list[random_index]; - - // store to the temporal history - hist.push_back(selected_option); - - emo = "neutral"; - return std::make_pair(selected_option, emo); - }else{ - hist.push_back(text); - } - }else{ - hist.push_back(text); - } - - return std::make_pair(text, emo); + // split generated text with its sentiment label + std::string delimiter = "<emo>"; + std::string text = sentence.substr(0, sentence.find(delimiter)); + std::string emo = sentence.substr(text.length() + 5); + + // get the history size + int size = hist.size(); + std::cout << "generated text: " + text << std::endl; + if (size > 2) + { + if (is_similar(clean_text(text), clean_text(hist.at(size - 2)))) + { + std::cout << "same content more than 60% is detected: " + text << std::endl; + + if (conv_history.empty()) conv_history = hist; + else conv_history.insert(conv_history.end(), hist.begin(), hist.end()); + + // empty history + hist.clear(); + + // Start new conversation with a new topic + // Initialize the random number generator with a seed + std::mt19937 rng(std::random_device {}()); + + // Define the distribution for generating random indices + std::uniform_int_distribution<std::size_t> dist(0, question_list.size() - 1); + + // Generate a random index + std::size_t random_index = dist(rng); + + // Access the randomly selected option + std::string selected_option = question_list[random_index]; + + // store to the temporal history + hist.push_back(selected_option); + + emo = "neutral"; + return std::make_pair(selected_option, emo); + } + else + { + hist.push_back(text); + } + } + else + { + hist.push_back(text); + } + + return std::make_pair(text, emo); } std::string mFirstInteractionT5::clean_text(std::string str) { - // Remove punctuation - str.erase(std::remove_if(str.begin(), str.end(), ispunct), str.end()); + // Remove punctuation + str.erase(std::remove_if(str.begin(), str.end(), ispunct), str.end()); - // Convert to lowercase - std::transform(str.begin(), str.end(), str.begin(), - [](unsigned char c){ return std::tolower(c); }); - return str; + // Convert to lowercase + std::transform(str.begin(), str.end(), str.begin(), + [](unsigned char c) + { + return std::tolower(c); + }); + return str; } // Convert a sentence to a vector of its words -std::vector<std::string> mFirstInteractionT5::get_words(const std::string& sentence) { - std::vector<std::string> words; - std::string word; - for (const auto& c : sentence) { - if (c == ' ') { - words.push_back(word); - word.clear(); - } else { - word += c; - } +std::vector<std::string> mFirstInteractionT5::get_words(const std::string& sentence) +{ + std::vector<std::string> words; + std::string word; + for (const auto & c : sentence) + { + if (c == ' ') + { + words.push_back(word); + word.clear(); + } + else + { + word += c; } - words.push_back(word); - return words; + } + words.push_back(word); + return words; } // Function to calculate the dot product of two vectors -float mFirstInteractionT5::dot_product(std::vector<float> a, std::vector<float> b) { - float sum = 0.0; - float size = a.size(); - for (int i = 0; i < size; i++) { - sum += a[i] * b[i]; - } - return sum; +float mFirstInteractionT5::dot_product(std::vector<float> a, std::vector<float> b) +{ + float sum = 0.0; + float size = a.size(); + for (int i = 0; i < size; i++) + { + sum += a[i] * b[i]; + } + return sum; } // Function to calculate the magnitude of a vector -float mFirstInteractionT5::magnitude(std::vector<float> v) { - float sum = 0.0; - for (float x : v) { - sum += x * x; - } - return sqrt(sum); +float mFirstInteractionT5::magnitude(std::vector<float> v) +{ + float sum = 0.0; + for (float x : v) + { + sum += x * x; + } + return sqrt(sum); } float mFirstInteractionT5::CosineSimilarity(const std::string& s1, const std::string& s2) { - auto words1 = get_words(s1); - auto words2 = get_words(s2); - - // Combine both vectors into a single set of unique words - std::vector<std::string> all_words; - all_words.reserve(words1.size() + words2.size()); - all_words.insert(all_words.end(), words1.begin(), words1.end()); - all_words.insert(all_words.end(), words2.begin(), words2.end()); - sort(all_words.begin(), all_words.end()); - all_words.erase(unique(all_words.begin(), all_words.end()), all_words.end()); - - // Create vectors to represent the two sentences - std::vector<float> v1(all_words.size(), 0.0); - std::vector<float> v2(all_words.size(), 0.0); - - // Fill in the vectors with the frequency of each word - for (std::string word : words1) { - int index = lower_bound(all_words.begin(), all_words.end(), word) - all_words.begin(); - v1[index]++; - } - for (std::string word : words2) { - int index = lower_bound(all_words.begin(), all_words.end(), word) - all_words.begin(); - v2[index]++; - } - - // Calculate the cosine similarity - float dot = dot_product(v1, v2); - float mag1 = magnitude(v1); - float mag2 = magnitude(v2); - float cosine = dot / (mag1 * mag2); - return cosine; + auto words1 = get_words(s1); + auto words2 = get_words(s2); + + // Combine both vectors into a single set of unique words + std::vector<std::string> all_words; + all_words.reserve(words1.size() + words2.size()); + all_words.insert(all_words.end(), words1.begin(), words1.end()); + all_words.insert(all_words.end(), words2.begin(), words2.end()); + sort(all_words.begin(), all_words.end()); + all_words.erase(unique(all_words.begin(), all_words.end()), all_words.end()); + + // Create vectors to represent the two sentences + std::vector<float> v1(all_words.size(), 0.0); + std::vector<float> v2(all_words.size(), 0.0); + + // Fill in the vectors with the frequency of each word + for (std::string word : words1) + { + int index = lower_bound(all_words.begin(), all_words.end(), word) - all_words.begin(); + v1[index]++; + } + for (std::string word : words2) + { + int index = lower_bound(all_words.begin(), all_words.end(), word) - all_words.begin(); + v2[index]++; + } + + // Calculate the cosine similarity + float dot = dot_product(v1, v2); + float mag1 = magnitude(v1); + float mag2 = magnitude(v2); + float cosine = dot / (mag1 * mag2); + return cosine; } // Calculate the similarity of two sentences -bool mFirstInteractionT5::is_similar(const std::string& sentence1, const std::string& sentence2){ +bool mFirstInteractionT5::is_similar(const std::string& sentence1, const std::string& sentence2) +{ - float cosine_score = CosineSimilarity(sentence1, sentence2); + float cosine_score = CosineSimilarity(sentence1, sentence2); - std::cout << "cosine similaritry score : " << cosine_score << std::endl; + std::cout << "cosine similaritry score : " << cosine_score << std::endl; - return cosine_score >= 0.6; + return cosine_score >= 0.6; } //---------------------------------------------------------------------- diff --git a/cpp/mFirstInteractionT5.h b/cpp/mFirstInteractionT5.h index 44d5284..5723db7 100644 --- a/cpp/mFirstInteractionT5.h +++ b/cpp/mFirstInteractionT5.h @@ -79,14 +79,14 @@ class mFirstInteractionT5 : public structure::tSenseControlModule // Ports (These are the only variables that may be declared public) //---------------------------------------------------------------------- public: - tControllerInput<bool> activate_sr; +// tControllerInput<bool> activate_sr; tControllerInput<bool> activate_model_t5; tControllerInput<bool> activate_model_flan_t5; tControllerInput<std::string> in_speech_text; - tControllerOutput<std::string> out_speech_text; +// tControllerOutput<std::string> out_speech_text; tControllerOutput<std::string> generated_sentence; - tControllerOutput<std::string> sentiment; + tControllerOutput<std::string> sentimentT5; tControllerOutput<std::string> history; std::vector<std::string> conv_history; -- GitLab