Started on training data and wrote script for fetching knowledge base data from tripsit

2025-12-24 19:27:55 +01:00 · 2021-09-29 02:28:58 +02:00 · 2021-09-29 02:28:58 +02:00 · 1f885928c7
commit 1f885928c7
parent 7ac76d212e
10 changed files with 37379 additions and 191 deletions
--- a/.gitignore
+++ b/.gitignore
@ -127,3 +127,6 @@ dmypy.json

 # Pyre type checker
 .pyre/
+
+models/
+.vscode
--- a/config.yml
+++ b/config.yml
@ -1,36 +1,49 @@
-# Configuration for Rasa NLU.
-# https://rasa.com/docs/rasa/nlu/components/
 language: en
-
 pipeline:
-# # No configuration for the NLU pipeline was provided. The following default pipeline was used to train your model.
-# # If you'd like to customize it, uncomment and adjust the pipeline.
-# # See https://rasa.com/docs/rasa/tuning-your-model for more information.
-#   - name: WhitespaceTokenizer
-#   - name: RegexFeaturizer
-#   - name: LexicalSyntacticFeaturizer
-#   - name: CountVectorsFeaturizer
-#   - name: CountVectorsFeaturizer
-#     analyzer: char_wb
-#     min_ngram: 1
-#     max_ngram: 4
-#   - name: DIETClassifier
-#     epochs: 100
-#   - name: EntitySynonymMapper
-#   - name: ResponseSelector
-#     epochs: 100
-#   - name: FallbackClassifier
-#     threshold: 0.3
-#     ambiguity_threshold: 0.1
-
-# Configuration for Rasa Core.
-# https://rasa.com/docs/rasa/core/policies/
+  - name: WhitespaceTokenizer
+    token_pattern: (?u)\b\w+\b
+  - name: RegexFeaturizer
+  - name: LexicalSyntacticFeaturizer
+  - name: CountVectorsFeaturizer
+#    OOV_token: oov
+  - name: CountVectorsFeaturizer
+    analyzer: char_wb
+    min_ngram: 1
+    max_ngram: 4
+  - name: DIETClassifier
+    epochs: 100
+    ranking_length: 10
+#  - name: DucklingEntityExtractor
+#    url: http://localhost:8000
+#    dimensions:
+#      - email
+#      - number
+#      - amount-of-money
+  - name: EntitySynonymMapper
+  - name: ResponseSelector
+    retrieval_intent: out_of_scope
+    scale_loss: false
+    epochs: 100
+  - name: ResponseSelector
+    retrieval_intent: faq
+    scale_loss: false
+    epochs: 100
+  - name: ResponseSelector
+    retrieval_intent: chitchat
+    scale_loss: false
+    epochs: 100
+  - name: FallbackClassifier
+    threshold: 0.7
 policies:
-# # No configuration for policies was provided. The following default policies were used to train your model.
-# # If you'd like to customize them, uncomment and adjust the policies.
-# # See https://rasa.com/docs/rasa/policies for more information.
-#   - name: MemoizationPolicy
-#   - name: TEDPolicy
-#     max_history: 5
-#     epochs: 100
-#   - name: RulePolicy
+  - name: RulePolicy
+    core_fallback_threshold: 0.3
+    core_fallback_action_name: "action_default_fallback"
+    enable_fallback_prediction: True
+  - max_history: 6
+    name: AugmentedMemoizationPolicy
+  - name: TEDPolicy
+    max_history: 10
+    epochs: 20
+    batch_size:
+      - 32
+      - 64
--- a/data/nlu.yml
+++ b/data/nlu.yml
--- a/data/rules.yml
+++ b/data/rules.yml
@ -4,10 +4,20 @@ rules:

 - rule: Say goodbye anytime the user says goodbye
  steps:
-  - intent: goodbye
+  - intent: bye
  - action: utter_goodbye

- rule: Say 'I am a bot' anytime the user challenges
+- rule: Chitchat
  steps:
-  - intent: bot_challenge
-  - action: utter_iamabot
+  - intent: chitchat
+  - action: utter_chitchat
+
+- rule: FAQs
+  steps:
+  - intent: faq
+  - action: utter_faq
+
+- rule: OOS
+  steps:
+  - intent: out_of_scope
+  - action: utter_out_of_scope
--- a/data/stories.yml
+++ b/data/stories.yml
@ -2,29 +2,23 @@ version: "2.0"

 stories:

- story: happy path
+- story: simple greet
  steps:
  - intent: greet
  - action: utter_greet
-  - intent: mood_great
-  - action: utter_happy

- story: sad path 1
+- story: happy interaction
  steps:
  - intent: greet
  - action: utter_greet
-  - intent: mood_unhappy
-  - action: utter_cheer_up
-  - action: utter_did_that_help
-  - intent: affirm
-  - action: utter_happy
+  - intent: faq
+  - action: utter_faq

- story: sad path 2
+- story: interactive_story_1
  steps:
  - intent: greet
  - action: utter_greet
-  - intent: mood_unhappy
-  - action: utter_cheer_up
-  - action: utter_did_that_help
-  - intent: deny
-  - action: utter_goodbye
+  - intent: chitchat
+  - action: utter_chitchat
+  - intent: faq
+  - action: utter_faq
--- a/domain.yml
+++ b/domain.yml
@ -1,34 +1,139 @@
-version: "2.0"
-
-intents:
-  - greet
-  - goodbye
-  - affirm
-  - deny
-  - mood_great
-  - mood_unhappy
-  - bot_challenge
-
-responses:
-  utter_greet:
-  - text: "Hey! How are you?"
-
-  utter_cheer_up:
-  - text: "Here is something to cheer you up:"
-    image: "https://i.imgur.com/nGF1K8f.jpg"
-
-  utter_did_that_help:
-  - text: "Did that help you?"
-
-  utter_happy:
-  - text: "Great, carry on!"
-
-  utter_goodbye:
-  - text: "Bye"
-
-  utter_iamabot:
-  - text: "I am a bot, powered by Rasa."
-
+version: '2.0'
+config:
+  store_entities_as_slots: true
 session_config:
  session_expiration_time: 60
  carry_over_slots_to_new_session: true
+intents:
+- greet
+- chitchat:
+    is_retrieval_intent: true
+- faq:
+    is_retrieval_intent: true
+- out_of_scope:
+    is_retrieval_intent: true
+- affirm
+- deny
+- bye
+- react_positive
+- react_negative
+- explain
+- thank
+- help
+- contact
+- inform
+- restart
+
+entities:
+- language
+- location
+- name
+
+slots:
+  name:
+    type: text
+    influence_conversation: true
+
+responses:
+  utter_ask_name:
+  - text: Hey! My name is Pablo, I am a bot, whats your name?
+  utter_greet_by_name:
+  - text: Nice to meet you {name}! How are you doing?
+  utter_greet:
+  - text: Nice to meet you! How are you doing?
+  utter_cheer_up:
+  - text: 'Here is something to cheer you up:'
+    image: https://i.imgur.com/nGF1K8f.jpg
+  utter_did_that_help:
+  - text: Did that help you?
+  utter_happy:
+  - text: Great!, how can I help you today? I can tell you about our company, how we can help you or why you should consider implementign a chatbot into your business!
+  utter_goodbye:
+  - text: Bye! Was nice chatting to you, if you'd like you can restart this conversation by simply telling me too.
+  utter_chitchat/ask_howdoing:
+  - text: I'm great! Thanks for asking.
+  - text: I'm good, thanks!
+  - text: A little bit too warm, otherwise fine.
+  - text: A little bit cold, otherwise fine.
+  utter_chitchat/ask_howold:
+  - text: Old enough to be a bot!
+  - text: '42'
+  - text: Age is just an issue of mind over matter. If you don’t mind, it doesn’t matter.
+  - text: My first git commit was many moons ago.
+  - text: Why do you ask? Are my wrinkles showing?
+  - text: I've hit the age where I actively try to forget how old I am.
+  utter_chitchat/ask_isbot:
+  - text: Yep, I'm a bot!
+  - text: Yes, I'm a bot.
+  - text: Yep, you guessed it, I'm a bot!
+  - text: I am indeed a bot 🤖
+  utter_chitchat/ask_ishuman:
+  - text: I'm not a human, I'm a bot! 🤖
+  utter_chitchat/ask_restaurant:
+  - text: I'm sorry, I can't recommend you a restaurant as I usually cook at home.
+  - text: I'm sorry, I'm not getting taste buds for another few updates.
+  - text: I'd need some more data. If you lick the monitor perhaps I can evaluate your taste buds.
+  utter_chitchat/ask_time:
+  - text: It's the most wonderful time of the year!
+  - text: It's party time!
+  - text: Time is a human construct, you'll have to tell me.
+  - text: It's five o'clock somewhere!
+  - text: "In an ever expanding universe, the real question is: what time isn't it?"
+  - text: That's hard to say -- it's different all over the world!
+  utter_chitchat/ask_languagesbot:
+  - text: I can spell baguette in French, but unfortunately English is the only language I can answer you in.
+  - text: I am in the process of learning, but at the moment I can only speak English.
+  - text: Binary code and the language of love. And English.
+  - text: I was written in Python, but for your convenience I'll translate to English.
+  utter_chitchat/ask_weather:
+  - text: I don't know about where you live, but in my world it's always sunny 🔆
+  - text: It's getting pretty chilly!
+  - text: Where I'm from, it's almost never-leaving-the-house weather.
+  - text: Winter is coming ⚔️
+  utter_chitchat/ask_whatismyname:
+  - text: It's probably the one that your parents chose for you.
+  - text: I'd tell you, but there's restricted access to that chunk of memory.
+  - text: Believe it or not, I actually am not spying on your personal information.
+  - text: You're the second person now to ask me that. Rihanna was the first.
+  utter_chitchat/ask_whatspossible:
+  - text: You can ask me about how to get started with Rasa, the difference between Rasa and Rasa X, subscribing to our newsletter or booking a sales call.
+  utter_chitchat/ask_wherefrom:
+  - text: My developers come from all over the world!
+  - text: I was taught not to give out my address on the internet.
+  - text: My address starts with github.com.
+  utter_chitchat/ask_whoami:
+  - text: I hope you are being yourself.
+  - text: Who do you think you are?
+  - text: Unfortunately I haven't been programmed with the amount of necessary philosophy knowledge to answer that.
+  utter_chitchat/ask_whoisit:
+  - text: I'm Pablo, the drug education bot! 🐦
+  utter_chitchat/nicetomeetyou:
+  - text: Likewise!
+  - text: Thank you. It is a pleasure to meet you as well!
+  - text: It is nice to meet you too!
+  - text: Pleased to meet you too!
+  - text: It's always a pleasure to meet new people!
+  - text: Nice to meet you too! Happy to be of help.
+  utter_chitchat/telljoke:
+  - text: Why are eggs not very much into jokes? - Because they could crack up.
+  - text: What's a tree's favorite drink? - Root beer!
+  - text: Why do the French like to eat snails so much? - They can't stand fast food.
+  - text: Why did the robot get angry? - Because someone kept pushing its buttons.
+  - text: What do you call a pirate droid? - Arrrr-2-D2
+  - text: Why did the robot cross the road? - Because he was programmed to.
+  utter_chitchat/confirm_presence:
+  - text: Sure am!
+  utter_faq/drugs_safe:
+  - text: That depends on which you are using and, most importantly, how you are using them...
+  utter_faq/drugs_legal:
+  - text: Probably but it depends on where you are and what drugs
+
+  utter_out_of_scope/non_english:
+  - text: No hablo english
+  utter_out_of_scope/other:
+  - text: I cant do that
+actions:
+- utter_chitchat
+- utter_faq
+- utter_greet
+- utter_out_of_scope
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+rasa
+pandas
--- a/tripsit/getAllDrugs.json
+++ b/tripsit/getAllDrugs.json
--- a/tripsit/getAllDrugs.py
+++ b/tripsit/getAllDrugs.py
@ -0,0 +1,27 @@
+import requests, json
+import pandas as pd
+
+# API URL
+url = "http://tripbot.tripsit.me/api/tripsit/getAllDrugs"
+r = requests.get(url)
+data = r.json()
+
+# Format dict and load into df
+data = json.dumps(data["data"][0], indent=2, sort_keys=False, ensure_ascii=False)
+df = pd.DataFrame.from_dict(json.loads(data), orient="index")
+
+# Add id for each drug for rasa
+id = []
+for x in range(0, len(df)):
+    id.append(x)
+df["id"] = id
+
+# Write to JSON file
+with open("tripsit/getAllDrugs.json", "w") as fp:
+    # Clean NaN values
+    clean_data = {
+        k1: {k: v for k, v in v1.items() if v == v and v is not None}
+        for k1, v1 in df.to_dict("index").items()
+    }
+    # Set ensure_ascii to false to ensure we can keep greek letters (like alpha)
+    fp.write(json.dumps(clean_data, indent=2, ensure_ascii=False))
--- a/tripsit/get_data.ipynb
+++ b/tripsit/get_data.ipynb
@ -1,37 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "source": [
-    "import pandas as pd"
-   ],
-   "outputs": [],
-   "metadata": {}
-  }
- ],
- "metadata": {
-  "orig_nbformat": 4,
-  "language_info": {
-   "name": "python",
-   "version": "3.8.11",
-   "mimetype": "text/x-python",
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "pygments_lexer": "ipython3",
-   "nbconvert_exporter": "python",
-   "file_extension": ".py"
-  },
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.9.7 64-bit ('pablo-bot': conda)"
-  },
-  "interpreter": {
-   "hash": "9b8925f46c4eb81faf51dd413b2890c72e106e6d9cdc933e302e3dbb556b2244"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}