-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Contextual Information with Neural NLU #258
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -499,7 +499,19 @@ REVEAL: | |
- "[I almost never watch](modifier) films directed by [Claude Chabrol](directors)." | ||
- "[I often find solace with](modifier) films from the [late 2050s](year)." | ||
- "[Adventure drama](genres) films [always take me on an emotional journey](modifier)." | ||
|
||
- "[I love](modifier) watching movies on a [rainy night](time)." | ||
- "[I dislike with a passion](modifier) to watch a movie on [mondays](time)." | ||
- "[I do enjoy](modifier) a nice movie with [my wife](companion)." | ||
- "[My granddaughters](companion) and [I always watch](modifier) [Micheal Douglas](actors) movies together." | ||
- "[I absolutely adore](modifier) watching [Tom Hanks](actors) movies on [late evenings](time) with my [best friend](companion)." | ||
- "[I can't resist](modifier) watching [Meryl Streep](actors) movies on [weekend afternoons](time) with my [boyfriend](companion)." | ||
- "[I find it relaxing](modifier) to watch [Leonardo DiCaprio](actors) movies on [Sunday mornings](time) with my [significant other](companion)." | ||
- "[I'm not too keen](modifier) on watching [Emma Stone](actors) movies on [early mornings](time) with my [siblings](companion)." | ||
- "[I always look forward](modifier) to watching [Denzel Washington](actors) movies on [Friday nights](time) with my [parents](companion)." | ||
- "[I'm hooked on](modifier) watching movies with my [roommate](companion) on [weekend nights](time) on [HBO](location)." | ||
- "[I enjoy](modifier) watching [Tom Cruise](actors) movies on [Sunday afternoons](time) on [television](location)." | ||
- "[I savor](modifier) watching [Sacha Baron Cohen](actors) movies on [Thursday evenings](time) with my [friends](companion) on my [laptop](location)." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this enough samples, or do we need to extend? |
||
|
||
INQUIRE: | ||
- "What [genre](genres) does this movie fall under?" | ||
- "Is this a [comedy](genres) or [drama](genres)?" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,12 @@ class JointBERTSlot(EnumWithMapping): | |
I_PREFERENCE_KEYWORDS = auto() | ||
B_PREFERENCE_YEAR = auto() | ||
I_PREFERENCE_YEAR = auto() | ||
B_PREFERENCE_TIME = auto() | ||
I_PREFERENCE_TIME = auto() | ||
B_PREFERENCE_COMPANION = auto() | ||
I_PREFERENCE_COMPANION = auto() | ||
B_PREFERENCE_LOCATION = auto() | ||
I_PREFERENCE_LOCATION = auto() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add CONTEXT in the name so we know what these refer to. E.g., |
||
B_INQUIRE_GENRES = auto() | ||
I_INQUIRE_GENRES = auto() | ||
B_INQUIRE_RATING = auto() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,7 +60,7 @@ def generate_dacts( | |
if selected_option: | ||
return selected_option | ||
|
||
intent, slots = self.annotate_utterance(user_utterance) | ||
intent, slots, contents = self.annotate_utterance(user_utterance) | ||
intent = UserIntents[intent] | ||
|
||
constraints = [] | ||
|
@@ -90,7 +90,7 @@ def generate_dacts( | |
|
||
def annotate_utterance( | ||
self, user_utterance: UserUtterance | ||
) -> Tuple[str, list]: | ||
) -> Tuple[str, list, list]: | ||
"""Annotates the utterance with intent and slot information. | ||
|
||
Args: | ||
|
@@ -99,6 +99,7 @@ def annotate_utterance( | |
Returns: | ||
A tuple of the intent and slot information. | ||
""" | ||
available_contexts = ["PREFERENCE_COMPANION","PREFERENCE_TIME","PREFERENECE_LOCATION"] | ||
mask = [ | ||
not token.startswith("##") | ||
for token in self._tokenizer.tokenize(user_utterance.text) | ||
|
@@ -125,6 +126,7 @@ def annotate_utterance( | |
|
||
# For each starting point, find the end point (i.e., all 'I_' labels) | ||
slots_info = [] | ||
context_info = [] | ||
for start in start_indices: | ||
end = start | ||
while ( | ||
|
@@ -136,16 +138,27 @@ def annotate_utterance( | |
char_start = offset_mapping[start][0] | ||
char_end = offset_mapping[end][1] | ||
slot_value = user_utterance.text[char_start:char_end] | ||
slots_info.append( | ||
{ | ||
"slot": JointBERTSlot.from_index(slot_idxs[start]).name[2:], | ||
"value": slot_value, | ||
"start": char_start, | ||
"end": char_end, | ||
} | ||
) | ||
if JointBERTSlot.from_index(slot_idxs[start]).name[2:] in available_contexts: | ||
context_info.append( | ||
{ | ||
"context": JointBERTSlot.from_index(slot_idxs[start]).name[2:], | ||
"value": slot_value, | ||
"start": char_start, | ||
"end": char_end, | ||
} | ||
|
||
) | ||
else: | ||
slots_info.append( | ||
{ | ||
"slot": JointBERTSlot.from_index(slot_idxs[start]).name[2:], | ||
"value": slot_value, | ||
"start": char_start, | ||
"end": char_end, | ||
} | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a need to differentiate between "context" and "slot"? We want to use them in a similar way, so the extraction can be the same. "context" should be un the slot name if you address my comment in |
||
|
||
return intent, slots_info | ||
return intent, slots_info, context_info | ||
|
||
def get_constraint_operator(self, text: str) -> Operator: | ||
"""Gets the operator based on the text. Only supports negation for now. | ||
|
@@ -163,6 +176,9 @@ def get_constraint_operator(self, text: str) -> Operator: | |
if __name__ == "__main__": | ||
nlu = NeuralNLU(None) | ||
|
||
user_utterance = UserUtterance("I like space movies") | ||
intent, slots_info, context_info = nlu.annotate_utterance(user_utterance) | ||
""" | ||
class DS: | ||
item_in_focus = None | ||
|
||
|
@@ -173,3 +189,4 @@ class DS: | |
) | ||
|
||
print([str(da) for da in da]) | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This entire block ( |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
from unittest.mock import MagicMock, Mock, patch | ||
|
||
import pytest | ||
|
||
from moviebot.nlu.neural_nlu import NeuralNLU | ||
from moviebot.core.utterance.utterance import UserUtterance | ||
from moviebot.core.core_types import DialogueOptions | ||
from moviebot.dialogue_manager.dialogue_state import DialogueState | ||
from tests.mocks.mock_data_loader import MockDataLoader | ||
|
||
from moviebot.dialogue_manager.dialogue_act import DialogueAct | ||
from moviebot.core.intents.user_intents import UserIntents | ||
from moviebot.nlu.annotation.item_constraint import ItemConstraint | ||
from moviebot.core.intents.agent_intents import AgentIntents | ||
from moviebot.nlu.annotation.operator import Operator | ||
from moviebot.nlu.annotation.values import Values | ||
|
||
|
||
@pytest.fixture | ||
def dialogue_state(): | ||
dialogue_state = Mock() | ||
dialogue_state.item_in_focus = None | ||
dialogue_state.last_agent_dacts = [] | ||
return dialogue_state | ||
|
||
@pytest.fixture | ||
@patch("moviebot.nlu.user_intents_checker.DataLoader", new=MockDataLoader) | ||
def nlu(): | ||
config = { | ||
"domain": "", | ||
"database": "", | ||
"slot_values_path": "", | ||
"tag_words_slots_path": "", | ||
} | ||
nlu = NeuralNLU(config) | ||
return nlu | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"last_dacts", [[], [DialogueAct(AgentIntents.ACKNOWLEDGE, [])]] | ||
) | ||
def test_generate_dacts(nlu,dialogue_state,last_dacts): | ||
user_utterance = UserUtterance("I want to watch an action movie") | ||
dialogue_state.last_agent_dacts = last_dacts | ||
options = {} | ||
|
||
dacts = nlu.generate_dacts(user_utterance, options, dialogue_state) | ||
|
||
assert len(dacts) == 1 | ||
assert dacts[0].intent == UserIntents.REVEAL | ||
|
||
def test_annotate_utterance(nlu): | ||
user_utterance = UserUtterance("I want to watch an action movie") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 2 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I want" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "action" | ||
|
||
user_utterance = UserUtterance("Thank you for the recommendations,goodbye") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "BYE" | ||
assert len(slots_info) == 0 | ||
|
||
user_utterance = UserUtterance("Is this a comedy movie ?") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "INQUIRE" | ||
assert len(slots_info) == 1 | ||
assert slots_info[0]["slot"] == "INQUIRE_GENRES" | ||
assert slots_info[0]["value"] == "comedy" | ||
|
||
|
||
user_utterance = UserUtterance("Recommend me a movie with Brad Pitt") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REMOVE_PREFERENCE" | ||
assert len(slots_info) == 1 | ||
assert slots_info[0]["slot"] == "PREFERENCE_ACTORS" | ||
assert slots_info[0]["value"] == "Brad Pitt" | ||
|
||
|
||
user_utterance = UserUtterance("I like space movies") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 2 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I like" | ||
assert slots_info[1]["slot"] == "PREFERENCE_KEYWORDS" | ||
assert slots_info[1]["value"] == "space" | ||
|
||
user_utterance = UserUtterance("I hate horror movies") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 2 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I hate" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "horror" | ||
assert len(context_info) == 0 | ||
|
||
|
||
user_utterance = UserUtterance("I love watching movies on a rainy night") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 1 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I love" | ||
assert len(context_info) == 1 | ||
assert context_info[0]["context"] == "PREFERENCE_TIME" | ||
assert context_info[0]["value"] == "rainy night" | ||
|
||
user_utterance = UserUtterance("I love watching drama movies on a rainy night") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 2 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I love" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "drama" | ||
assert len(context_info) == 1 | ||
assert context_info[0]["context"] == "PREFERENCE_TIME" | ||
assert context_info[0]["value"] == "rainy night" | ||
|
||
user_utterance = UserUtterance("I really enjoy watching drama movies on a rainy night") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 2 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I really enjoy" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "drama" | ||
assert len(context_info) == 1 | ||
assert context_info[0]["context"] == "PREFERENCE_TIME" | ||
assert context_info[0]["value"] == "rainy night" | ||
|
||
user_utterance = UserUtterance("I'll be going on a date with my girlfriend this thursday evening and I need to find a good horror movie.") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "UNK" | ||
assert len(slots_info) == 1 | ||
assert slots_info[0]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[0]["value"] == "horror" | ||
assert len(context_info) == 2 | ||
assert context_info[0]["context"] == "PREFERENCE_COMPANION" | ||
assert context_info[0]["value"] == "girlfriend" | ||
assert context_info[1]["context"] == "PREFERENCE_TIME" | ||
assert context_info[1]["value"] == "thursday evening" | ||
|
||
user_utterance = UserUtterance("I hate christmas movies") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 2 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I hate" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "christmas" | ||
assert len(context_info) == 0 | ||
|
||
user_utterance = UserUtterance("I hate watching movies at christmas") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "REVEAL" | ||
assert len(slots_info) == 1 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I hate" | ||
assert len(context_info) == 1 | ||
assert context_info[0]["context"] == "PREFERENCE_TIME" | ||
assert context_info[0]["value"] == "christmas" | ||
|
||
user_utterance = UserUtterance("I am looking for an action movie to watch with my siblings for the sunday afternoon.We love Brad Pitt. Can you recommend me something ?") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "UNK" | ||
assert len(slots_info) == 3 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I am" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "action" | ||
assert slots_info[2]["slot"] == "PREFERENCE_ACTORS" | ||
assert slots_info[2]["value"] == "Brad Pitt" | ||
assert len(context_info) == 2 | ||
assert context_info[0]["context"] == "PREFERENCE_COMPANION" | ||
assert context_info[0]["value"] == "siblings" | ||
assert context_info[1]["context"] == "PREFERENCE_TIME" | ||
assert context_info[1]["value"] == "sunday afternoon" | ||
|
||
user_utterance = UserUtterance("I am looking for an action movie to watch with my siblings.We love Brad Pitt. Can you recommend me something ?") | ||
intent, slots_info,context_info = nlu.annotate_utterance(user_utterance) | ||
assert intent == "UNK" | ||
assert len(slots_info) == 3 | ||
assert slots_info[0]["slot"] == "PREFERENCE_MODIFIER" | ||
assert slots_info[0]["value"] == "I am looking" | ||
assert slots_info[1]["slot"] == "PREFERENCE_GENRES" | ||
assert slots_info[1]["value"] == "action" | ||
assert slots_info[2]["slot"] == "PREFERENCE_ACTORS" | ||
assert slots_info[2]["value"] == "Brad Pitt" | ||
assert len(context_info) == 1 | ||
assert context_info[0]["context"] == "PREFERENCE_COMPANION" | ||
assert context_info[0]["value"] == "siblings" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe
slots_annotation
is used to fetch information from the database so it might break in the current version. Do we need to extend this list in this PR?