mirror of
https://github.com/pyt0xic/pablo-bot.git
synced 2024-11-22 02:09:27 +01:00
Changes
This commit is contained in:
parent
1e6cbc386a
commit
8c7d91d097
@ -1,27 +1,12 @@
|
||||
# This files contains your custom actions which can be used to run
|
||||
# custom Python code.
|
||||
#
|
||||
# See this guide on how to implement these action:
|
||||
# https://rasa.com/docs/rasa/custom-actions
|
||||
from typing import Any, Text, Dict, List
|
||||
|
||||
from rasa_sdk import Action, Tracker
|
||||
from rasa_sdk.executor import CollectingDispatcher
|
||||
from rasa_sdk.knowledge_base.storage import InMemoryKnowledgeBase
|
||||
from rasa_sdk.knowledge_base.actions import ActionQueryKnowledgeBase
|
||||
|
||||
|
||||
# This is a simple example for a custom action which utters "Hello World!"
|
||||
|
||||
# from typing import Any, Text, Dict, List
|
||||
#
|
||||
# from rasa_sdk import Action, Tracker
|
||||
# from rasa_sdk.executor import CollectingDispatcher
|
||||
#
|
||||
#
|
||||
# class ActionHelloWorld(Action):
|
||||
#
|
||||
# def name(self) -> Text:
|
||||
# return "action_hello_world"
|
||||
#
|
||||
# def run(self, dispatcher: CollectingDispatcher,
|
||||
# tracker: Tracker,
|
||||
# domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
|
||||
#
|
||||
# dispatcher.utter_message(text="Hello World!")
|
||||
#
|
||||
# return []
|
||||
class KnowledgeBaseAction(ActionQueryKnowledgeBase):
|
||||
def __init__(self):
|
||||
knowledge_base = InMemoryKnowledgeBase("ts_pn_data/substances_data.json")
|
||||
super().__init__(knowledge_base)
|
||||
|
@ -1,7 +1,7 @@
|
||||
language: en
|
||||
pipeline:
|
||||
- name: WhitespaceTokenizer
|
||||
token_pattern: (?u)\b\w+\b
|
||||
# token_pattern: (?u)\b\w+\b
|
||||
- name: RegexFeaturizer
|
||||
use_word_boundaries: false
|
||||
case_sensitive: false
|
||||
|
4944
data/kb_query.yml
4944
data/kb_query.yml
File diff suppressed because it is too large
Load Diff
@ -68,7 +68,7 @@ nlu:
|
||||
- 2-PA
|
||||
- 2-phenylacetamide
|
||||
- 2-PTA
|
||||
- 2-(p-tolyl)acetamide
|
||||
- 2-acetamide
|
||||
- 4-methyl-2-pa
|
||||
- 25B-NBOH
|
||||
- 25bnboh
|
||||
@ -786,7 +786,7 @@ nlu:
|
||||
- n,n-diallyltryptamine
|
||||
- Datura
|
||||
- jimson weed
|
||||
- Datura (botany)
|
||||
- Datura
|
||||
- angel's trumpets
|
||||
- devil's trumpets
|
||||
- devil's weed
|
||||
@ -1064,8 +1064,8 @@ nlu:
|
||||
- Hydrocodone
|
||||
- hydro
|
||||
- vicodin
|
||||
- vicodin (with paracetamol)
|
||||
- zohydro er (extended-release)
|
||||
- vicodin
|
||||
- zohydro er
|
||||
- Hydromorphone
|
||||
- dilaudid
|
||||
- diluadid
|
||||
@ -1074,7 +1074,7 @@ nlu:
|
||||
- Hydroxyzine
|
||||
- atarax
|
||||
- vistaril
|
||||
- Hyoscyamus niger (botany)
|
||||
- Hyoscyamus niger
|
||||
- henbane
|
||||
- stinking nightshade
|
||||
- Ibogaine
|
||||
@ -1116,7 +1116,7 @@ nlu:
|
||||
- kratom
|
||||
- kratum
|
||||
- mitragyna speciosa
|
||||
- กระท่อม (thai)
|
||||
- กระท่อม
|
||||
- Theanine
|
||||
- l-theanine
|
||||
- l-γ-glutamylethylamide and n5-ethyl-l-glutamine
|
||||
@ -1161,7 +1161,7 @@ nlu:
|
||||
- lambda
|
||||
- Mandragora
|
||||
- mandrake
|
||||
- Mandragora officinarum (botany)
|
||||
- Mandragora officinarum
|
||||
- mandrake
|
||||
- Marinol
|
||||
- cesamet
|
||||
@ -1513,7 +1513,7 @@ nlu:
|
||||
- Picamilon
|
||||
- Pinazepam
|
||||
- domar
|
||||
- Piper nigrum (botany)
|
||||
- Piper nigrum
|
||||
- black pepper
|
||||
- green pepper
|
||||
- peppercorn
|
||||
@ -1656,7 +1656,7 @@ nlu:
|
||||
- enerion
|
||||
- sulbut
|
||||
- youvitan
|
||||
- Tabernanthe iboga (botany)
|
||||
- Tabernanthe iboga
|
||||
- iboga
|
||||
- Tapentadol
|
||||
- nucynta
|
||||
@ -1829,7 +1829,7 @@ nlu:
|
||||
- 2-phenylacetamide
|
||||
- synonym: 2-PTA
|
||||
examples: |
|
||||
- 2-(p-tolyl)acetamide
|
||||
- 2-acetamide
|
||||
- 4-methyl-2-pa
|
||||
- synonym: 25B-NBOH
|
||||
examples: |
|
||||
@ -2648,7 +2648,7 @@ nlu:
|
||||
- synonym: Datura
|
||||
examples: |
|
||||
- jimson weed
|
||||
- synonym: Datura (botany)
|
||||
- synonym: Datura
|
||||
examples: |
|
||||
- angel's trumpets
|
||||
- devil's trumpets
|
||||
@ -2964,8 +2964,8 @@ nlu:
|
||||
examples: |
|
||||
- hydro
|
||||
- vicodin
|
||||
- vicodin (with paracetamol)
|
||||
- zohydro er (extended-release)
|
||||
- vicodin
|
||||
- zohydro er
|
||||
- synonym: Hydromorphone
|
||||
examples: |
|
||||
- dilaudid
|
||||
@ -2976,7 +2976,7 @@ nlu:
|
||||
examples: |
|
||||
- atarax
|
||||
- vistaril
|
||||
- synonym: Hyoscyamus niger (botany)
|
||||
- synonym: Hyoscyamus niger
|
||||
examples: |
|
||||
- henbane
|
||||
- stinking nightshade
|
||||
@ -3018,7 +3018,7 @@ nlu:
|
||||
- kratom
|
||||
- kratum
|
||||
- mitragyna speciosa
|
||||
- กระท่อม (thai)
|
||||
- กระท่อม
|
||||
- synonym: Theanine
|
||||
examples: |
|
||||
- l-theanine
|
||||
@ -3074,7 +3074,7 @@ nlu:
|
||||
- synonym: Mandragora
|
||||
examples: |
|
||||
- mandrake
|
||||
- synonym: Mandragora officinarum (botany)
|
||||
- synonym: Mandragora officinarum
|
||||
examples: |
|
||||
- mandrake
|
||||
- synonym: Marinol
|
||||
@ -3477,7 +3477,7 @@ nlu:
|
||||
- synonym: Pinazepam
|
||||
examples: |
|
||||
- domar
|
||||
- synonym: Piper nigrum (botany)
|
||||
- synonym: Piper nigrum
|
||||
examples: |
|
||||
- black pepper
|
||||
- green pepper
|
||||
@ -3639,7 +3639,7 @@ nlu:
|
||||
- enerion
|
||||
- sulbut
|
||||
- youvitan
|
||||
- synonym: Tabernanthe iboga (botany)
|
||||
- synonym: Tabernanthe iboga
|
||||
examples: |
|
||||
- iboga
|
||||
- synonym: Tapentadol
|
||||
|
@ -22,7 +22,3 @@ rules:
|
||||
- intent: out_of_scope
|
||||
- action: utter_out_of_scope
|
||||
|
||||
- rule: what is drug
|
||||
steps:
|
||||
- intent: what_is_substance
|
||||
- action: utter_what_is_stubstace
|
@ -21,4 +21,22 @@ stories:
|
||||
- intent: chitchat
|
||||
- action: utter_chitchat
|
||||
- intent: faq
|
||||
- action: utter_faq
|
||||
- action: utter_faq
|
||||
|
||||
- story: knowledge base happy path
|
||||
steps:
|
||||
- intent: greet
|
||||
- action: utter_greet
|
||||
- intent: query_objects
|
||||
- action: action_query_knowledge_base
|
||||
- intent: bye
|
||||
- action: utter_goodbye
|
||||
|
||||
- story: knowledge base hqappy path
|
||||
steps:
|
||||
- intent: greet
|
||||
- action: utter_greet
|
||||
- intent: query_attributes
|
||||
- action: action_query_knowledge_base
|
||||
- intent: bye
|
||||
- action: utter_goodbye
|
32
domain.yml
32
domain.yml
@ -1,6 +1,5 @@
|
||||
version: '2.0'
|
||||
config:
|
||||
store_entities_as_slots: true
|
||||
|
||||
session_config:
|
||||
session_expiration_time: 60
|
||||
carry_over_slots_to_new_session: true
|
||||
@ -23,23 +22,35 @@ intents:
|
||||
- contact
|
||||
- inform
|
||||
- restart
|
||||
- what_is_substance
|
||||
- query_attributes
|
||||
- query_objects
|
||||
|
||||
|
||||
entities:
|
||||
- language
|
||||
- location
|
||||
- name
|
||||
- substance
|
||||
- substances
|
||||
- object_type
|
||||
- attribute
|
||||
- mention
|
||||
|
||||
slots:
|
||||
name:
|
||||
type: text
|
||||
influence_conversation: true
|
||||
substance:
|
||||
substances:
|
||||
type: text
|
||||
influence_conversation: true
|
||||
auto_fill: true
|
||||
influence_conversation: false
|
||||
object_type:
|
||||
type: text
|
||||
influence_conversation: false
|
||||
mention:
|
||||
type: text
|
||||
influence_conversation: false
|
||||
attribute:
|
||||
type: text
|
||||
influence_conversation: false
|
||||
|
||||
responses:
|
||||
utter_ask_name:
|
||||
@ -53,6 +64,9 @@ responses:
|
||||
image: https://i.imgur.com/nGF1K8f.jpg
|
||||
utter_did_that_help:
|
||||
- text: Did that help you?
|
||||
utter_ask_rephrase:
|
||||
- text: "Sorry, I'm not sure I understand. Can you rephrase?"
|
||||
- text: "Can you please rephrase? I did not got that."
|
||||
utter_happy:
|
||||
- text: Great!, how can I help you today? I can tell you about our company, how we can help you or why you should consider implementign a chatbot into your business!
|
||||
utter_goodbye:
|
||||
@ -134,8 +148,7 @@ responses:
|
||||
- text: That depends on which you are using and, most importantly, how you are using them...
|
||||
utter_faq/drugs_legal:
|
||||
- text: Probably but it depends on where you are and what drugs
|
||||
utter_what_is_stubstace:
|
||||
- text: It is {substance}
|
||||
|
||||
|
||||
utter_out_of_scope/non_english:
|
||||
- text: No hablo english
|
||||
@ -146,3 +159,4 @@ actions:
|
||||
- utter_faq
|
||||
- utter_greet
|
||||
- utter_out_of_scope
|
||||
- action_query_knowledge_base
|
||||
|
@ -10,8 +10,8 @@
|
||||
# Server which runs your custom actions.
|
||||
# https://rasa.com/docs/rasa/custom-actions
|
||||
|
||||
#action_endpoint:
|
||||
# url: "http://localhost:5055/webhook"
|
||||
action_endpoint:
|
||||
url: "http://localhost:5055/webhook"
|
||||
|
||||
# Tracker store which is used to store the conversations.
|
||||
# By default the conversations are stored in memory.
|
||||
|
45374
substances_data.json
45374
substances_data.json
File diff suppressed because it is too large
Load Diff
186
ts_pn_data/TrainingDataGen.py
Normal file
186
ts_pn_data/TrainingDataGen.py
Normal file
@ -0,0 +1,186 @@
|
||||
from secrets import choice, randbelow
|
||||
import re
|
||||
|
||||
|
||||
class DataGen:
|
||||
def __init__(self, substances):
|
||||
self.substance_data = substances
|
||||
self.names = []
|
||||
for x in substances["substances"]:
|
||||
self.names.append(re.sub(r"\(.*\)", "", x["name"]))
|
||||
for y in x["aliases"]:
|
||||
self.names.append(re.sub(r"\(.*\)", "", y))
|
||||
|
||||
def parse(self, intent_name, intent_list):
|
||||
intent_str = f"- intent: {intent_name}\n examples: |\n"
|
||||
for x in intent_list:
|
||||
intent_str = "".join([intent_str, f" - {x}\n"])
|
||||
return intent_str
|
||||
|
||||
def query_objects(self):
|
||||
intents = []
|
||||
|
||||
# General object queries
|
||||
|
||||
templates = [
|
||||
'list all [drugs]{"entity": "object_type", "value": "substances"}',
|
||||
'what [drug]{"entity": "object_type", "value": "substances"} is safe?',
|
||||
'what [substance]{"entity": "object_type", "value": "substances"} is legal',
|
||||
'list all [substances]{"entity": "object_type", "value": "substances"}',
|
||||
'can you list the [substances]{"entity": "object_type", "value": "substances"}?',
|
||||
]
|
||||
for x in templates:
|
||||
intents.append(x)
|
||||
|
||||
return self.parse(intent_name="query_objects", intent_list=intents)
|
||||
|
||||
def query_attributes(self):
|
||||
intents = []
|
||||
doses = [
|
||||
"0.5mg",
|
||||
"100mg",
|
||||
"600mg",
|
||||
"1 gram",
|
||||
"one gram",
|
||||
"5 grams",
|
||||
"55mg",
|
||||
"fifty milligrams",
|
||||
"ten mgs",
|
||||
"1g",
|
||||
"4.5g",
|
||||
"359mg",
|
||||
]
|
||||
|
||||
for name in self.names:
|
||||
unlikely_chance = randbelow(10)
|
||||
templates = [
|
||||
f"what is [{name}](substances)?",
|
||||
f"whats [{name}](substances)",
|
||||
f"what's [{name}](substances)?",
|
||||
'what is the [toxic dose]{"entity": "attribute", "value": "toxicity"} '
|
||||
f"of [{name}](substances)?",
|
||||
f"what is the [toxicity](attribute) of [{name}](substances)?",
|
||||
'how [toxic]{"entity": "attribute", "value": "toxicity"} '
|
||||
f"is [{name}](substances)",
|
||||
'how [safe]{"entity": "attribute", "value": "toxicity"} '
|
||||
f"is [{name}](substances)",
|
||||
'how [dangerous]{"entity": "attribute", "value": "toxicity"} '
|
||||
f"is [{name}](substances)?",
|
||||
'how [addictive]{"entity": "attribute", "value": "addictionPotential"} '
|
||||
f"is [{name}](substances)?",
|
||||
f"is [{name}](substances) "
|
||||
'[addictive]{"entity": "attribute", "value": "addictionPotential"}?',
|
||||
f"is [{name}](substances) "
|
||||
'[addicting]{"entity": "attribute", "value": "addictionPotential"}',
|
||||
f"is [{name}](substances) "
|
||||
'[safe]{"entity": "attribute", "value": "toxicity"}?',
|
||||
'[how much]{"entity": "attribute", "value": "roas"} '
|
||||
f"[{name}](substances) should i take?",
|
||||
'[how much]{"entity": "attribute", "value": "roas"} '
|
||||
f"[{name}](substances)?",
|
||||
'[how many]{"entity": "attribute", "value": "roas"} '
|
||||
f"[{name}](substances) should i have?",
|
||||
'what [dose]{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances) should i take?",
|
||||
'what [dose]{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances) do i need",
|
||||
'what [dosage]{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances) is enough?",
|
||||
'[routes of administration]{"entity": "attribute", "value": "roas"} for '
|
||||
f"[{name}](substances)?",
|
||||
'[roas]{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances)",
|
||||
f"how much [{name}](substances) should i "
|
||||
'[snort]{"entity": "attribute", "value": "roas"}?',
|
||||
f"how much [{name}](substances) can i take "
|
||||
'[orally]{"entity": "attribute", "value": "roas"}?',
|
||||
f"can i take [{name}](substances) via "
|
||||
'[oral]{"entity": "attribute", "value": "roas"} administration?',
|
||||
'[how long]{"entity": "attribute", "value": "roas"} does '
|
||||
f"[{name}](substances) last?",
|
||||
'[duration]{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances)",
|
||||
'what is the [duration]{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances)?",
|
||||
'[how long]{"entity": "attribute", "value": "roas"} does '
|
||||
f"[{name}](substances) take to peak?",
|
||||
'[interactions]{"entity": "attribute", "value": "interactions"} of '
|
||||
f"[{name}](substances)",
|
||||
f"[{name}](substances) "
|
||||
'[interactions]{"entity": "attribute", "value": "interactions"}',
|
||||
f"what does [{name}](substances) "
|
||||
'[interact]{"entity": "attribute", "value": "interactions"} with?',
|
||||
f"what drugs does [{name}](substances) "
|
||||
'[interact]{"entity": "attribute", "value": "interactions"} with?',
|
||||
'what substance [mixes]{"entity": "attribute", "value": "interactions"} with '
|
||||
f"[{name}](substances)",
|
||||
'what substances [mix]{"entity": "attribute", "value": "interactions"} with '
|
||||
f"[{name}](substances)",
|
||||
'what [interacts]{"entity": "attribute", "value": "interactions"} with '
|
||||
f"[{name}](substances)?",
|
||||
'what [synergizes]{"entity": "attribute", "value": "interactions"} with '
|
||||
f"[{name}](substances)",
|
||||
'what [mixes]{"entity": "attribute", "value": "interactions"} well with '
|
||||
f"[{name}](substances)?",
|
||||
'what kind of [chemical]{"entity": "attribute", "value": "chemicalClass"} is '
|
||||
f"[{name}](substances)?",
|
||||
'what family of [chemicals]{"entity": "attribute", "value": "chemicalClass"} is '
|
||||
f"[{name}](substances) in?",
|
||||
'what [chemical family]{"entity": "attribute", "value": "chemicalClass"} is '
|
||||
f"[{name}](substances)?",
|
||||
'[chemical type]{"entity": "attribute", "value": "chemicalClass"} of '
|
||||
f"[{name}](substances)",
|
||||
f"[{name}](substances) "
|
||||
'[chemical class]{"entity": "attribute", "value": "chemicalClass"}',
|
||||
f"[{name}](substances) is what "
|
||||
'type of [chemical]{"entity": "attribute", "value": "chemicalClass"}?',
|
||||
'what [class]{"entity": "attribute", "value": "psychoactiveClass"} is '
|
||||
f"[{name}](substances) in?",
|
||||
'what drug [class]{"entity": "attribute", "value": "psychoactiveClass"} is '
|
||||
f"[{name}](substances) in?",
|
||||
'what [family]{"entity": "attribute", "value": "psychoactiveClass"} of drugs is '
|
||||
f"[{name}](substances) in?",
|
||||
f"[{name}](substances) "
|
||||
'[class]{"entity": "attribute", "value": "psychoactiveClass"}',
|
||||
'what [kind of drug]{"entity": "attribute", "value": "psychoactiveClass"} is '
|
||||
f"[{name}](substances)?",
|
||||
f"is [{choice(doses)}]"
|
||||
'{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances) enough?",
|
||||
f"can i take [{choice(doses)}]"
|
||||
'{"entity": "attribute", "value": "roas"} of '
|
||||
f"[{name}](substances)",
|
||||
]
|
||||
|
||||
intents.append(choice(templates))
|
||||
if unlikely_chance > 3:
|
||||
mention_templates = [
|
||||
f"[{name}](substances)?",
|
||||
f"[{name}](substances) is what?",
|
||||
'is [it](mention) safe to [mix]{"entity": "attribute", "value": "interactions"} with '
|
||||
f"[{name}](substances)?",
|
||||
'what substances [mix]{"entity": "attribute", "value": "interactions"} with [it](mention)',
|
||||
'what does [it](mention) [interact]{"entity": "attribute", "value": "interactions"} with?',
|
||||
'what [family]{"entity": "attribute", "value": "psychoactiveClass"} of drugs is [that](mention)in?',
|
||||
'what is [its](mention) [class]{"entity": "attribute", "value": "psychoactiveClass"}',
|
||||
'[chemical type]{"entity": "attribute", "value": "chemicalClass"} of [that](mention)?',
|
||||
f"[{name}](substances) "
|
||||
'[chemical class]{"entity": "attribute", "value": "chemicalClass"}',
|
||||
f"is [{choice(doses)}]"
|
||||
'{"entity": "attribute", "value": "roas"} of [that](mention) okay?',
|
||||
f"is [{choice(doses)}]"
|
||||
'{"entity": "attribute", "value": "roas"} of [it](mention) too much?',
|
||||
f"is [{choice(doses)}]"
|
||||
'{"entity": "attribute", "value": "roas"} of [it](mention) enough?',
|
||||
'how [toxic]{"entity": "attribute", "value": "toxicity"} is [it](mention)',
|
||||
'how [safe]{"entity": "attribute", "value": "toxicity"} is [that](mention)',
|
||||
]
|
||||
|
||||
intents.append(choice(mention_templates))
|
||||
|
||||
return self.parse(intent_name="query_attributes", intent_list=intents)
|
||||
|
||||
def combo_gen(self):
|
||||
combo_str = ""
|
||||
combo_str = "".join([combo_str, self.query_objects(), self.query_attributes()])
|
||||
return combo_str
|
File diff suppressed because it is too large
Load Diff
@ -33,7 +33,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
from intentGen import intentGen
|
||||
from TrainingDataGen import DataGen
|
||||
|
||||
headers = {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
@ -174,6 +174,7 @@ def pw_clean_common_name(name):
|
||||
name = re.sub(r'"?\[\d*\]$', "", name)
|
||||
name = re.sub(r"\s*More names\.$", "", name)
|
||||
name = re.sub(r"\.$", "", name)
|
||||
name = re.sub(r"\(.*\)", "", name)
|
||||
return name.strip()
|
||||
|
||||
|
||||
@ -190,29 +191,19 @@ if os.path.exists("ts_pn_data/_cached_pw_substances.json"):
|
||||
pw_substance_data = json.load(f)
|
||||
|
||||
if not len(pw_substance_data):
|
||||
offset = 0
|
||||
pw_substance_urls_query = (
|
||||
f"{{substances(limit: 250 offset: {offset}) {{name url}}}}"
|
||||
)
|
||||
pw_substance_urls_query = """
|
||||
{
|
||||
substances(limit: 11000) {
|
||||
name
|
||||
url
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
pw_substance_urls_data = ps_client.execute(query=pw_substance_urls_query,)["data"][
|
||||
pw_substance_urls_data = ps_client.execute(query=pw_substance_urls_query)["data"][
|
||||
"substances"
|
||||
]
|
||||
|
||||
offset = 252
|
||||
while offset <= 340:
|
||||
pw_substance_urls_query = (
|
||||
f"{{substances(limit: 1 offset: {offset}) {{name url}}}}"
|
||||
)
|
||||
offset += 1
|
||||
temp_data = ps_client.execute(query=pw_substance_urls_query,)["data"][
|
||||
"substances"
|
||||
]
|
||||
print(temp_data)
|
||||
if temp_data is None:
|
||||
continue
|
||||
pw_substance_urls_data.extend(temp_data)
|
||||
|
||||
for idx, substance in enumerate(pw_substance_urls_data):
|
||||
try:
|
||||
url = substance["url"]
|
||||
@ -240,6 +231,7 @@ if not len(pw_substance_data):
|
||||
else set()
|
||||
)
|
||||
cleaned_common_names.add(substance["name"])
|
||||
print(cleaned_common_names)
|
||||
# don't include name in list of other common names
|
||||
common_names = sorted(filter(lambda n: n != name, cleaned_common_names))
|
||||
|
||||
@ -365,7 +357,7 @@ if not len(pw_substance_data):
|
||||
print(traceback.format_exc())
|
||||
exit(1)
|
||||
|
||||
with open(f"ts_pn_data/_cached_pw_substances.json", "w") as f:
|
||||
with open(f"ts_pn_data/_cached_pw_substances.json", "w", encoding="utf-8") as f:
|
||||
f.write(json.dumps(pw_substance_data, indent=2, ensure_ascii=False))
|
||||
|
||||
# combine tripsit and psychonautwiki data
|
||||
@ -480,7 +472,10 @@ for name in all_substance_names:
|
||||
continue
|
||||
|
||||
dose_levels.append(
|
||||
{"name": dose_level, "value": value_string,}
|
||||
{
|
||||
"name": dose_level,
|
||||
"value": value_string,
|
||||
}
|
||||
)
|
||||
|
||||
if len(dose_levels):
|
||||
@ -551,6 +546,13 @@ for name in all_substance_names:
|
||||
interactions.append(combo_data)
|
||||
interactions = sorted(interactions, key=lambda i: i["name"])
|
||||
|
||||
if classes != None:
|
||||
chemical_class = classes["chemical"]
|
||||
psychoactive_class = classes["psychoactive"]
|
||||
else:
|
||||
chemical_class = None
|
||||
psychoactive_class = None
|
||||
|
||||
substance_data.append(
|
||||
{
|
||||
"id": x,
|
||||
@ -562,6 +564,8 @@ for name in all_substance_names:
|
||||
"summary": summary,
|
||||
"reagents": test_kits,
|
||||
"classes": classes,
|
||||
"chemicalClass": chemical_class,
|
||||
"psychoactiveClass": psychoactive_class,
|
||||
"toxicity": toxicity,
|
||||
"addictionPotential": addiction_potential,
|
||||
"tolerance": tolerance,
|
||||
@ -576,19 +580,21 @@ for name in all_substance_names:
|
||||
|
||||
substances_json = {}
|
||||
substances_json["substances"] = substance_data
|
||||
with open(f"ts_pn_data/substances_data.json", "w") as f:
|
||||
with open(f"ts_pn_data/substances_data.json", "w", encoding="utf-8") as f:
|
||||
json.dump(substances_json, fp=f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
substance_aliases = {}
|
||||
|
||||
with open("data/lookups/substances.yml", "w") as fp:
|
||||
with open("data/lookups/substances.yml", "w", encoding="utf-8") as fp:
|
||||
# Lookup Table
|
||||
fp.write("""version: "2.0"\nnlu:\n- lookup: substance\n examples: |\n""")
|
||||
for drug in substances_json["substances"]:
|
||||
fp.write(f" - {drug['name']}\n")
|
||||
name = re.sub(r"\(.*\)", "", drug["name"])
|
||||
fp.write(f" - {name}\n")
|
||||
# Add aliases to lookup table too
|
||||
for y in drug["aliases"]:
|
||||
y = re.sub(r"\(.*\)", "", y)
|
||||
# Check for "or" in aliases and remove
|
||||
if " or " in y:
|
||||
aliases = y.split(" or ")
|
||||
@ -603,25 +609,27 @@ with open("data/lookups/substances.yml", "w") as fp:
|
||||
# Synonyms to map aliases to one entity
|
||||
for drug in substances_json["substances"]:
|
||||
# Skip adding synonym if there are no aliases
|
||||
substance_aliases[drug["name"]] = []
|
||||
name = re.sub(r"\(.*\)", "", drug["name"])
|
||||
substance_aliases[name] = []
|
||||
if drug["aliases"] == []:
|
||||
continue
|
||||
fp.write(f"- synonym: {drug['name']}\n examples: |\n")
|
||||
fp.write(f"- synonym: {name}\n examples: |\n")
|
||||
for y in drug["aliases"]:
|
||||
y = re.sub(r"\(.*\)", "", y)
|
||||
# Check for "or" in aliases and remove
|
||||
if " or " in y:
|
||||
aliases = y.split(" or ")
|
||||
fp.write(f" - {aliases[0]}\n")
|
||||
fp.write(f" - {aliases[1]}\n")
|
||||
substance_aliases[drug["name"]].append(aliases[0])
|
||||
substance_aliases[drug["name"]].append(aliases[1])
|
||||
substance_aliases[name].append(aliases[0])
|
||||
substance_aliases[name].append(aliases[1])
|
||||
elif "or " in y:
|
||||
aliases = y.split("or ")
|
||||
fp.write(f" - {aliases[1]}\n")
|
||||
substance_aliases[drug["name"]].append(aliases[1])
|
||||
substance_aliases[name].append(aliases[1])
|
||||
else:
|
||||
fp.write(f" - {y}\n")
|
||||
substance_aliases[drug["name"]].append(y)
|
||||
substance_aliases[name].append(y)
|
||||
|
||||
with open("ts_pn_data/generated_intents.yml", "w") as fp:
|
||||
fp.write(intentGen(substance_aliases).what_is())
|
||||
with open("ts_pn_data/generated_intents.yml", "w", encoding="utf-8") as fp:
|
||||
fp.write(DataGen(substances_json).combo_gen())
|
||||
|
@ -1,38 +0,0 @@
|
||||
from secrets import choice, randbelow
|
||||
|
||||
|
||||
class intentGen:
|
||||
def __init__(self, substances):
|
||||
self.names = []
|
||||
for x in substances:
|
||||
self.names.append(x)
|
||||
if not substances[x] == []:
|
||||
for y in substances[x]:
|
||||
self.names.append(y)
|
||||
|
||||
def parse(self, intent_name, intent_list):
|
||||
intent_str = f"- intent: {intent_name}\n examples: |\n"
|
||||
for x in intent_list:
|
||||
intent_str = "".join([intent_str, f" - {x}\n"])
|
||||
return intent_str
|
||||
|
||||
def what_is(self):
|
||||
what_is_intents = []
|
||||
for name in self.names:
|
||||
unlikely_chance = randbelow(10)
|
||||
templates = [
|
||||
f"what is [{name}](substance)?",
|
||||
f"what is [{name}](substance)",
|
||||
f"whats [{name}](substance)",
|
||||
f"what's [{name}](substance)?",
|
||||
f"what [{name}](substance)",
|
||||
]
|
||||
what_is_intents.append(choice(templates))
|
||||
if unlikely_chance > 6:
|
||||
unlikely_templates = [
|
||||
f"[{name}](substance)?",
|
||||
f"[{name}](substance) is what?",
|
||||
f"[{name}](substance) is?",
|
||||
]
|
||||
what_is_intents.append(choice(unlikely_templates))
|
||||
return self.parse("what_is_substance", intent_list=what_is_intents)
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user