This commit is contained in:
Kian Channon 2021-10-14 12:34:07 +02:00
parent 1e6cbc386a
commit 8c7d91d097
14 changed files with 53552 additions and 4385 deletions

View File

@ -1,27 +1,12 @@
# This files contains your custom actions which can be used to run
# custom Python code.
#
# See this guide on how to implement these action:
# https://rasa.com/docs/rasa/custom-actions
from typing import Any, Text, Dict, List
from rasa_sdk import Action, Tracker
from rasa_sdk.executor import CollectingDispatcher
from rasa_sdk.knowledge_base.storage import InMemoryKnowledgeBase
from rasa_sdk.knowledge_base.actions import ActionQueryKnowledgeBase
# This is a simple example for a custom action which utters "Hello World!"
# from typing import Any, Text, Dict, List
#
# from rasa_sdk import Action, Tracker
# from rasa_sdk.executor import CollectingDispatcher
#
#
# class ActionHelloWorld(Action):
#
# def name(self) -> Text:
# return "action_hello_world"
#
# def run(self, dispatcher: CollectingDispatcher,
# tracker: Tracker,
# domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
#
# dispatcher.utter_message(text="Hello World!")
#
# return []
class KnowledgeBaseAction(ActionQueryKnowledgeBase):
def __init__(self):
knowledge_base = InMemoryKnowledgeBase("ts_pn_data/substances_data.json")
super().__init__(knowledge_base)

View File

@ -1,7 +1,7 @@
language: en
pipeline:
- name: WhitespaceTokenizer
token_pattern: (?u)\b\w+\b
# token_pattern: (?u)\b\w+\b
- name: RegexFeaturizer
use_word_boundaries: false
case_sensitive: false

File diff suppressed because it is too large Load Diff

View File

@ -68,7 +68,7 @@ nlu:
- 2-PA
- 2-phenylacetamide
- 2-PTA
- 2-(p-tolyl)acetamide
- 2-acetamide
- 4-methyl-2-pa
- 25B-NBOH
- 25bnboh
@ -786,7 +786,7 @@ nlu:
- n,n-diallyltryptamine
- Datura
- jimson weed
- Datura (botany)
- Datura
- angel's trumpets
- devil's trumpets
- devil's weed
@ -1064,8 +1064,8 @@ nlu:
- Hydrocodone
- hydro
- vicodin
- vicodin (with paracetamol)
- zohydro er (extended-release)
- vicodin
- zohydro er
- Hydromorphone
- dilaudid
- diluadid
@ -1074,7 +1074,7 @@ nlu:
- Hydroxyzine
- atarax
- vistaril
- Hyoscyamus niger (botany)
- Hyoscyamus niger
- henbane
- stinking nightshade
- Ibogaine
@ -1116,7 +1116,7 @@ nlu:
- kratom
- kratum
- mitragyna speciosa
- กระท่อม (thai)
- กระท่อม
- Theanine
- l-theanine
- l-γ-glutamylethylamide and n5-ethyl-l-glutamine
@ -1161,7 +1161,7 @@ nlu:
- lambda
- Mandragora
- mandrake
- Mandragora officinarum (botany)
- Mandragora officinarum
- mandrake
- Marinol
- cesamet
@ -1513,7 +1513,7 @@ nlu:
- Picamilon
- Pinazepam
- domar
- Piper nigrum (botany)
- Piper nigrum
- black pepper
- green pepper
- peppercorn
@ -1656,7 +1656,7 @@ nlu:
- enerion
- sulbut
- youvitan
- Tabernanthe iboga (botany)
- Tabernanthe iboga
- iboga
- Tapentadol
- nucynta
@ -1829,7 +1829,7 @@ nlu:
- 2-phenylacetamide
- synonym: 2-PTA
examples: |
- 2-(p-tolyl)acetamide
- 2-acetamide
- 4-methyl-2-pa
- synonym: 25B-NBOH
examples: |
@ -2648,7 +2648,7 @@ nlu:
- synonym: Datura
examples: |
- jimson weed
- synonym: Datura (botany)
- synonym: Datura
examples: |
- angel's trumpets
- devil's trumpets
@ -2964,8 +2964,8 @@ nlu:
examples: |
- hydro
- vicodin
- vicodin (with paracetamol)
- zohydro er (extended-release)
- vicodin
- zohydro er
- synonym: Hydromorphone
examples: |
- dilaudid
@ -2976,7 +2976,7 @@ nlu:
examples: |
- atarax
- vistaril
- synonym: Hyoscyamus niger (botany)
- synonym: Hyoscyamus niger
examples: |
- henbane
- stinking nightshade
@ -3018,7 +3018,7 @@ nlu:
- kratom
- kratum
- mitragyna speciosa
- กระท่อม (thai)
- กระท่อม
- synonym: Theanine
examples: |
- l-theanine
@ -3074,7 +3074,7 @@ nlu:
- synonym: Mandragora
examples: |
- mandrake
- synonym: Mandragora officinarum (botany)
- synonym: Mandragora officinarum
examples: |
- mandrake
- synonym: Marinol
@ -3477,7 +3477,7 @@ nlu:
- synonym: Pinazepam
examples: |
- domar
- synonym: Piper nigrum (botany)
- synonym: Piper nigrum
examples: |
- black pepper
- green pepper
@ -3639,7 +3639,7 @@ nlu:
- enerion
- sulbut
- youvitan
- synonym: Tabernanthe iboga (botany)
- synonym: Tabernanthe iboga
examples: |
- iboga
- synonym: Tapentadol

View File

@ -22,7 +22,3 @@ rules:
- intent: out_of_scope
- action: utter_out_of_scope
- rule: what is drug
steps:
- intent: what_is_substance
- action: utter_what_is_stubstace

View File

@ -21,4 +21,22 @@ stories:
- intent: chitchat
- action: utter_chitchat
- intent: faq
- action: utter_faq
- action: utter_faq
- story: knowledge base happy path
steps:
- intent: greet
- action: utter_greet
- intent: query_objects
- action: action_query_knowledge_base
- intent: bye
- action: utter_goodbye
- story: knowledge base hqappy path
steps:
- intent: greet
- action: utter_greet
- intent: query_attributes
- action: action_query_knowledge_base
- intent: bye
- action: utter_goodbye

View File

@ -1,6 +1,5 @@
version: '2.0'
config:
store_entities_as_slots: true
session_config:
session_expiration_time: 60
carry_over_slots_to_new_session: true
@ -23,23 +22,35 @@ intents:
- contact
- inform
- restart
- what_is_substance
- query_attributes
- query_objects
entities:
- language
- location
- name
- substance
- substances
- object_type
- attribute
- mention
slots:
name:
type: text
influence_conversation: true
substance:
substances:
type: text
influence_conversation: true
auto_fill: true
influence_conversation: false
object_type:
type: text
influence_conversation: false
mention:
type: text
influence_conversation: false
attribute:
type: text
influence_conversation: false
responses:
utter_ask_name:
@ -53,6 +64,9 @@ responses:
image: https://i.imgur.com/nGF1K8f.jpg
utter_did_that_help:
- text: Did that help you?
utter_ask_rephrase:
- text: "Sorry, I'm not sure I understand. Can you rephrase?"
- text: "Can you please rephrase? I did not got that."
utter_happy:
- text: Great!, how can I help you today? I can tell you about our company, how we can help you or why you should consider implementign a chatbot into your business!
utter_goodbye:
@ -134,8 +148,7 @@ responses:
- text: That depends on which you are using and, most importantly, how you are using them...
utter_faq/drugs_legal:
- text: Probably but it depends on where you are and what drugs
utter_what_is_stubstace:
- text: It is {substance}
utter_out_of_scope/non_english:
- text: No hablo english
@ -146,3 +159,4 @@ actions:
- utter_faq
- utter_greet
- utter_out_of_scope
- action_query_knowledge_base

View File

@ -10,8 +10,8 @@
# Server which runs your custom actions.
# https://rasa.com/docs/rasa/custom-actions
#action_endpoint:
# url: "http://localhost:5055/webhook"
action_endpoint:
url: "http://localhost:5055/webhook"
# Tracker store which is used to store the conversations.
# By default the conversations are stored in memory.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,186 @@
from secrets import choice, randbelow
import re
class DataGen:
def __init__(self, substances):
self.substance_data = substances
self.names = []
for x in substances["substances"]:
self.names.append(re.sub(r"\(.*\)", "", x["name"]))
for y in x["aliases"]:
self.names.append(re.sub(r"\(.*\)", "", y))
def parse(self, intent_name, intent_list):
intent_str = f"- intent: {intent_name}\n examples: |\n"
for x in intent_list:
intent_str = "".join([intent_str, f" - {x}\n"])
return intent_str
def query_objects(self):
intents = []
# General object queries
templates = [
'list all [drugs]{"entity": "object_type", "value": "substances"}',
'what [drug]{"entity": "object_type", "value": "substances"} is safe?',
'what [substance]{"entity": "object_type", "value": "substances"} is legal',
'list all [substances]{"entity": "object_type", "value": "substances"}',
'can you list the [substances]{"entity": "object_type", "value": "substances"}?',
]
for x in templates:
intents.append(x)
return self.parse(intent_name="query_objects", intent_list=intents)
def query_attributes(self):
intents = []
doses = [
"0.5mg",
"100mg",
"600mg",
"1 gram",
"one gram",
"5 grams",
"55mg",
"fifty milligrams",
"ten mgs",
"1g",
"4.5g",
"359mg",
]
for name in self.names:
unlikely_chance = randbelow(10)
templates = [
f"what is [{name}](substances)?",
f"whats [{name}](substances)",
f"what's [{name}](substances)?",
'what is the [toxic dose]{"entity": "attribute", "value": "toxicity"} '
f"of [{name}](substances)?",
f"what is the [toxicity](attribute) of [{name}](substances)?",
'how [toxic]{"entity": "attribute", "value": "toxicity"} '
f"is [{name}](substances)",
'how [safe]{"entity": "attribute", "value": "toxicity"} '
f"is [{name}](substances)",
'how [dangerous]{"entity": "attribute", "value": "toxicity"} '
f"is [{name}](substances)?",
'how [addictive]{"entity": "attribute", "value": "addictionPotential"} '
f"is [{name}](substances)?",
f"is [{name}](substances) "
'[addictive]{"entity": "attribute", "value": "addictionPotential"}?',
f"is [{name}](substances) "
'[addicting]{"entity": "attribute", "value": "addictionPotential"}',
f"is [{name}](substances) "
'[safe]{"entity": "attribute", "value": "toxicity"}?',
'[how much]{"entity": "attribute", "value": "roas"} '
f"[{name}](substances) should i take?",
'[how much]{"entity": "attribute", "value": "roas"} '
f"[{name}](substances)?",
'[how many]{"entity": "attribute", "value": "roas"} '
f"[{name}](substances) should i have?",
'what [dose]{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances) should i take?",
'what [dose]{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances) do i need",
'what [dosage]{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances) is enough?",
'[routes of administration]{"entity": "attribute", "value": "roas"} for '
f"[{name}](substances)?",
'[roas]{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances)",
f"how much [{name}](substances) should i "
'[snort]{"entity": "attribute", "value": "roas"}?',
f"how much [{name}](substances) can i take "
'[orally]{"entity": "attribute", "value": "roas"}?',
f"can i take [{name}](substances) via "
'[oral]{"entity": "attribute", "value": "roas"} administration?',
'[how long]{"entity": "attribute", "value": "roas"} does '
f"[{name}](substances) last?",
'[duration]{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances)",
'what is the [duration]{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances)?",
'[how long]{"entity": "attribute", "value": "roas"} does '
f"[{name}](substances) take to peak?",
'[interactions]{"entity": "attribute", "value": "interactions"} of '
f"[{name}](substances)",
f"[{name}](substances) "
'[interactions]{"entity": "attribute", "value": "interactions"}',
f"what does [{name}](substances) "
'[interact]{"entity": "attribute", "value": "interactions"} with?',
f"what drugs does [{name}](substances) "
'[interact]{"entity": "attribute", "value": "interactions"} with?',
'what substance [mixes]{"entity": "attribute", "value": "interactions"} with '
f"[{name}](substances)",
'what substances [mix]{"entity": "attribute", "value": "interactions"} with '
f"[{name}](substances)",
'what [interacts]{"entity": "attribute", "value": "interactions"} with '
f"[{name}](substances)?",
'what [synergizes]{"entity": "attribute", "value": "interactions"} with '
f"[{name}](substances)",
'what [mixes]{"entity": "attribute", "value": "interactions"} well with '
f"[{name}](substances)?",
'what kind of [chemical]{"entity": "attribute", "value": "chemicalClass"} is '
f"[{name}](substances)?",
'what family of [chemicals]{"entity": "attribute", "value": "chemicalClass"} is '
f"[{name}](substances) in?",
'what [chemical family]{"entity": "attribute", "value": "chemicalClass"} is '
f"[{name}](substances)?",
'[chemical type]{"entity": "attribute", "value": "chemicalClass"} of '
f"[{name}](substances)",
f"[{name}](substances) "
'[chemical class]{"entity": "attribute", "value": "chemicalClass"}',
f"[{name}](substances) is what "
'type of [chemical]{"entity": "attribute", "value": "chemicalClass"}?',
'what [class]{"entity": "attribute", "value": "psychoactiveClass"} is '
f"[{name}](substances) in?",
'what drug [class]{"entity": "attribute", "value": "psychoactiveClass"} is '
f"[{name}](substances) in?",
'what [family]{"entity": "attribute", "value": "psychoactiveClass"} of drugs is '
f"[{name}](substances) in?",
f"[{name}](substances) "
'[class]{"entity": "attribute", "value": "psychoactiveClass"}',
'what [kind of drug]{"entity": "attribute", "value": "psychoactiveClass"} is '
f"[{name}](substances)?",
f"is [{choice(doses)}]"
'{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances) enough?",
f"can i take [{choice(doses)}]"
'{"entity": "attribute", "value": "roas"} of '
f"[{name}](substances)",
]
intents.append(choice(templates))
if unlikely_chance > 3:
mention_templates = [
f"[{name}](substances)?",
f"[{name}](substances) is what?",
'is [it](mention) safe to [mix]{"entity": "attribute", "value": "interactions"} with '
f"[{name}](substances)?",
'what substances [mix]{"entity": "attribute", "value": "interactions"} with [it](mention)',
'what does [it](mention) [interact]{"entity": "attribute", "value": "interactions"} with?',
'what [family]{"entity": "attribute", "value": "psychoactiveClass"} of drugs is [that](mention)in?',
'what is [its](mention) [class]{"entity": "attribute", "value": "psychoactiveClass"}',
'[chemical type]{"entity": "attribute", "value": "chemicalClass"} of [that](mention)?',
f"[{name}](substances) "
'[chemical class]{"entity": "attribute", "value": "chemicalClass"}',
f"is [{choice(doses)}]"
'{"entity": "attribute", "value": "roas"} of [that](mention) okay?',
f"is [{choice(doses)}]"
'{"entity": "attribute", "value": "roas"} of [it](mention) too much?',
f"is [{choice(doses)}]"
'{"entity": "attribute", "value": "roas"} of [it](mention) enough?',
'how [toxic]{"entity": "attribute", "value": "toxicity"} is [it](mention)',
'how [safe]{"entity": "attribute", "value": "toxicity"} is [that](mention)',
]
intents.append(choice(mention_templates))
return self.parse(intent_name="query_attributes", intent_list=intents)
def combo_gen(self):
combo_str = ""
combo_str = "".join([combo_str, self.query_objects(), self.query_attributes()])
return combo_str

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,7 @@ import json
import os
import re
import traceback
from intentGen import intentGen
from TrainingDataGen import DataGen
headers = {
"Access-Control-Allow-Origin": "*",
@ -174,6 +174,7 @@ def pw_clean_common_name(name):
name = re.sub(r'"?\[\d*\]$', "", name)
name = re.sub(r"\s*More names\.$", "", name)
name = re.sub(r"\.$", "", name)
name = re.sub(r"\(.*\)", "", name)
return name.strip()
@ -190,29 +191,19 @@ if os.path.exists("ts_pn_data/_cached_pw_substances.json"):
pw_substance_data = json.load(f)
if not len(pw_substance_data):
offset = 0
pw_substance_urls_query = (
f"{{substances(limit: 250 offset: {offset}) {{name url}}}}"
)
pw_substance_urls_query = """
{
substances(limit: 11000) {
name
url
}
}
"""
pw_substance_urls_data = ps_client.execute(query=pw_substance_urls_query,)["data"][
pw_substance_urls_data = ps_client.execute(query=pw_substance_urls_query)["data"][
"substances"
]
offset = 252
while offset <= 340:
pw_substance_urls_query = (
f"{{substances(limit: 1 offset: {offset}) {{name url}}}}"
)
offset += 1
temp_data = ps_client.execute(query=pw_substance_urls_query,)["data"][
"substances"
]
print(temp_data)
if temp_data is None:
continue
pw_substance_urls_data.extend(temp_data)
for idx, substance in enumerate(pw_substance_urls_data):
try:
url = substance["url"]
@ -240,6 +231,7 @@ if not len(pw_substance_data):
else set()
)
cleaned_common_names.add(substance["name"])
print(cleaned_common_names)
# don't include name in list of other common names
common_names = sorted(filter(lambda n: n != name, cleaned_common_names))
@ -365,7 +357,7 @@ if not len(pw_substance_data):
print(traceback.format_exc())
exit(1)
with open(f"ts_pn_data/_cached_pw_substances.json", "w") as f:
with open(f"ts_pn_data/_cached_pw_substances.json", "w", encoding="utf-8") as f:
f.write(json.dumps(pw_substance_data, indent=2, ensure_ascii=False))
# combine tripsit and psychonautwiki data
@ -480,7 +472,10 @@ for name in all_substance_names:
continue
dose_levels.append(
{"name": dose_level, "value": value_string,}
{
"name": dose_level,
"value": value_string,
}
)
if len(dose_levels):
@ -551,6 +546,13 @@ for name in all_substance_names:
interactions.append(combo_data)
interactions = sorted(interactions, key=lambda i: i["name"])
if classes != None:
chemical_class = classes["chemical"]
psychoactive_class = classes["psychoactive"]
else:
chemical_class = None
psychoactive_class = None
substance_data.append(
{
"id": x,
@ -562,6 +564,8 @@ for name in all_substance_names:
"summary": summary,
"reagents": test_kits,
"classes": classes,
"chemicalClass": chemical_class,
"psychoactiveClass": psychoactive_class,
"toxicity": toxicity,
"addictionPotential": addiction_potential,
"tolerance": tolerance,
@ -576,19 +580,21 @@ for name in all_substance_names:
substances_json = {}
substances_json["substances"] = substance_data
with open(f"ts_pn_data/substances_data.json", "w") as f:
with open(f"ts_pn_data/substances_data.json", "w", encoding="utf-8") as f:
json.dump(substances_json, fp=f, ensure_ascii=False, indent=2)
substance_aliases = {}
with open("data/lookups/substances.yml", "w") as fp:
with open("data/lookups/substances.yml", "w", encoding="utf-8") as fp:
# Lookup Table
fp.write("""version: "2.0"\nnlu:\n- lookup: substance\n examples: |\n""")
for drug in substances_json["substances"]:
fp.write(f" - {drug['name']}\n")
name = re.sub(r"\(.*\)", "", drug["name"])
fp.write(f" - {name}\n")
# Add aliases to lookup table too
for y in drug["aliases"]:
y = re.sub(r"\(.*\)", "", y)
# Check for "or" in aliases and remove
if " or " in y:
aliases = y.split(" or ")
@ -603,25 +609,27 @@ with open("data/lookups/substances.yml", "w") as fp:
# Synonyms to map aliases to one entity
for drug in substances_json["substances"]:
# Skip adding synonym if there are no aliases
substance_aliases[drug["name"]] = []
name = re.sub(r"\(.*\)", "", drug["name"])
substance_aliases[name] = []
if drug["aliases"] == []:
continue
fp.write(f"- synonym: {drug['name']}\n examples: |\n")
fp.write(f"- synonym: {name}\n examples: |\n")
for y in drug["aliases"]:
y = re.sub(r"\(.*\)", "", y)
# Check for "or" in aliases and remove
if " or " in y:
aliases = y.split(" or ")
fp.write(f" - {aliases[0]}\n")
fp.write(f" - {aliases[1]}\n")
substance_aliases[drug["name"]].append(aliases[0])
substance_aliases[drug["name"]].append(aliases[1])
substance_aliases[name].append(aliases[0])
substance_aliases[name].append(aliases[1])
elif "or " in y:
aliases = y.split("or ")
fp.write(f" - {aliases[1]}\n")
substance_aliases[drug["name"]].append(aliases[1])
substance_aliases[name].append(aliases[1])
else:
fp.write(f" - {y}\n")
substance_aliases[drug["name"]].append(y)
substance_aliases[name].append(y)
with open("ts_pn_data/generated_intents.yml", "w") as fp:
fp.write(intentGen(substance_aliases).what_is())
with open("ts_pn_data/generated_intents.yml", "w", encoding="utf-8") as fp:
fp.write(DataGen(substances_json).combo_gen())

View File

@ -1,38 +0,0 @@
from secrets import choice, randbelow
class intentGen:
def __init__(self, substances):
self.names = []
for x in substances:
self.names.append(x)
if not substances[x] == []:
for y in substances[x]:
self.names.append(y)
def parse(self, intent_name, intent_list):
intent_str = f"- intent: {intent_name}\n examples: |\n"
for x in intent_list:
intent_str = "".join([intent_str, f" - {x}\n"])
return intent_str
def what_is(self):
what_is_intents = []
for name in self.names:
unlikely_chance = randbelow(10)
templates = [
f"what is [{name}](substance)?",
f"what is [{name}](substance)",
f"whats [{name}](substance)",
f"what's [{name}](substance)?",
f"what [{name}](substance)",
]
what_is_intents.append(choice(templates))
if unlikely_chance > 6:
unlikely_templates = [
f"[{name}](substance)?",
f"[{name}](substance) is what?",
f"[{name}](substance) is?",
]
what_is_intents.append(choice(unlikely_templates))
return self.parse("what_is_substance", intent_list=what_is_intents)

File diff suppressed because it is too large Load Diff