mirror of
https://github.com/pyt0xic/pablo-bot.git
synced 2024-11-25 15:19:33 +01:00
added first intent and lookup table(entities and synonyms)
This commit is contained in:
parent
faefd94f2a
commit
1e6cbc386a
@ -9,7 +9,12 @@ Still alot of work to be done but for those wanting to test anyway, see below
|
|||||||
$ rasa train
|
$ rasa train
|
||||||
$ rasa shell
|
$ rasa shell
|
||||||
|
|
||||||
|
|
||||||
Or use --debug flag to help you understand what's going on
|
Or use --debug flag to help you understand what's going on
|
||||||
|
|
||||||
$ rasa shell --debug
|
$ rasa shell --debug
|
||||||
|
|
||||||
|
## Credits
|
||||||
|
|
||||||
|
Thanks to the [PsychoautWiki](https://psychonautwiki.org/wiki/Main_Page) and [TripSit](https://tripsit.me) for the data.
|
||||||
|
|
||||||
|
Special thanks to [NoahSaso](https://github.com/NoahSaso) for his [scraper](https://github.com/NoahSaso/merge-psychonautwiki-tripsit-data)
|
||||||
|
@ -3,6 +3,8 @@ pipeline:
|
|||||||
- name: WhitespaceTokenizer
|
- name: WhitespaceTokenizer
|
||||||
token_pattern: (?u)\b\w+\b
|
token_pattern: (?u)\b\w+\b
|
||||||
- name: RegexFeaturizer
|
- name: RegexFeaturizer
|
||||||
|
use_word_boundaries: false
|
||||||
|
case_sensitive: false
|
||||||
- name: LexicalSyntacticFeaturizer
|
- name: LexicalSyntacticFeaturizer
|
||||||
- name: CountVectorsFeaturizer
|
- name: CountVectorsFeaturizer
|
||||||
# OOV_token: oov
|
# OOV_token: oov
|
||||||
@ -11,7 +13,7 @@ pipeline:
|
|||||||
min_ngram: 1
|
min_ngram: 1
|
||||||
max_ngram: 4
|
max_ngram: 4
|
||||||
- name: DIETClassifier
|
- name: DIETClassifier
|
||||||
epochs: 100
|
epochs: 10
|
||||||
ranking_length: 10
|
ranking_length: 10
|
||||||
# - name: DucklingEntityExtractor
|
# - name: DucklingEntityExtractor
|
||||||
# url: http://localhost:8000
|
# url: http://localhost:8000
|
||||||
|
2130
data/kb_query.yml
Normal file
2130
data/kb_query.yml
Normal file
File diff suppressed because it is too large
Load Diff
3740
data/lookups/substances.yml
Normal file
3740
data/lookups/substances.yml
Normal file
File diff suppressed because it is too large
Load Diff
@ -21,3 +21,8 @@ rules:
|
|||||||
steps:
|
steps:
|
||||||
- intent: out_of_scope
|
- intent: out_of_scope
|
||||||
- action: utter_out_of_scope
|
- action: utter_out_of_scope
|
||||||
|
|
||||||
|
- rule: what is drug
|
||||||
|
steps:
|
||||||
|
- intent: what_is_substance
|
||||||
|
- action: utter_what_is_stubstace
|
@ -23,16 +23,23 @@ intents:
|
|||||||
- contact
|
- contact
|
||||||
- inform
|
- inform
|
||||||
- restart
|
- restart
|
||||||
|
- what_is_substance
|
||||||
|
|
||||||
|
|
||||||
entities:
|
entities:
|
||||||
- language
|
- language
|
||||||
- location
|
- location
|
||||||
- name
|
- name
|
||||||
|
- substance
|
||||||
|
|
||||||
slots:
|
slots:
|
||||||
name:
|
name:
|
||||||
type: text
|
type: text
|
||||||
influence_conversation: true
|
influence_conversation: true
|
||||||
|
substance:
|
||||||
|
type: text
|
||||||
|
influence_conversation: true
|
||||||
|
auto_fill: true
|
||||||
|
|
||||||
responses:
|
responses:
|
||||||
utter_ask_name:
|
utter_ask_name:
|
||||||
@ -127,6 +134,8 @@ responses:
|
|||||||
- text: That depends on which you are using and, most importantly, how you are using them...
|
- text: That depends on which you are using and, most importantly, how you are using them...
|
||||||
utter_faq/drugs_legal:
|
utter_faq/drugs_legal:
|
||||||
- text: Probably but it depends on where you are and what drugs
|
- text: Probably but it depends on where you are and what drugs
|
||||||
|
utter_what_is_stubstace:
|
||||||
|
- text: It is {substance}
|
||||||
|
|
||||||
utter_out_of_scope/non_english:
|
utter_out_of_scope/non_english:
|
||||||
- text: No hablo english
|
- text: No hablo english
|
||||||
|
0
substances_data.json
Normal file
0
substances_data.json
Normal file
2128
ts_pn_data/generated_intents.yml
Normal file
2128
ts_pn_data/generated_intents.yml
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,25 @@
|
|||||||
|
"""MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2021 Noah Saso
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE."""
|
||||||
|
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# downloads and exports data on all substances from psychonautwiki and tripsit factsheets, combining to form master list with standardized format
|
# downloads and exports data on all substances from psychonautwiki and tripsit factsheets, combining to form master list with standardized format
|
||||||
@ -6,12 +28,12 @@
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from time import time, sleep
|
|
||||||
from python_graphql_client import GraphqlClient
|
from python_graphql_client import GraphqlClient
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
|
from intentGen import intentGen
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Access-Control-Allow-Origin": "*",
|
"Access-Control-Allow-Origin": "*",
|
||||||
@ -552,7 +574,54 @@ for name in all_substance_names:
|
|||||||
|
|
||||||
# output
|
# output
|
||||||
|
|
||||||
|
substances_json = {}
|
||||||
|
substances_json["substances"] = substance_data
|
||||||
|
with open(f"ts_pn_data/substances_data.json", "w") as f:
|
||||||
|
json.dump(substances_json, fp=f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
substances_json = json.dumps(substance_data, indent=2, ensure_ascii=False)
|
|
||||||
with open(f"ts_pn_data/substances_{time()}.json", "w") as f:
|
substance_aliases = {}
|
||||||
f.write(substances_json)
|
|
||||||
|
with open("data/lookups/substances.yml", "w") as fp:
|
||||||
|
# Lookup Table
|
||||||
|
fp.write("""version: "2.0"\nnlu:\n- lookup: substance\n examples: |\n""")
|
||||||
|
for drug in substances_json["substances"]:
|
||||||
|
fp.write(f" - {drug['name']}\n")
|
||||||
|
# Add aliases to lookup table too
|
||||||
|
for y in drug["aliases"]:
|
||||||
|
# Check for "or" in aliases and remove
|
||||||
|
if " or " in y:
|
||||||
|
aliases = y.split(" or ")
|
||||||
|
fp.write(f" - {aliases[0]}\n")
|
||||||
|
fp.write(f" - {aliases[1]}\n")
|
||||||
|
elif "or " in y:
|
||||||
|
aliases = y.split("or ")
|
||||||
|
fp.write(f" - {aliases[1]}\n")
|
||||||
|
else:
|
||||||
|
fp.write(f" - {y}\n")
|
||||||
|
fp.write("\n")
|
||||||
|
# Synonyms to map aliases to one entity
|
||||||
|
for drug in substances_json["substances"]:
|
||||||
|
# Skip adding synonym if there are no aliases
|
||||||
|
substance_aliases[drug["name"]] = []
|
||||||
|
if drug["aliases"] == []:
|
||||||
|
continue
|
||||||
|
fp.write(f"- synonym: {drug['name']}\n examples: |\n")
|
||||||
|
for y in drug["aliases"]:
|
||||||
|
# Check for "or" in aliases and remove
|
||||||
|
if " or " in y:
|
||||||
|
aliases = y.split(" or ")
|
||||||
|
fp.write(f" - {aliases[0]}\n")
|
||||||
|
fp.write(f" - {aliases[1]}\n")
|
||||||
|
substance_aliases[drug["name"]].append(aliases[0])
|
||||||
|
substance_aliases[drug["name"]].append(aliases[1])
|
||||||
|
elif "or " in y:
|
||||||
|
aliases = y.split("or ")
|
||||||
|
fp.write(f" - {aliases[1]}\n")
|
||||||
|
substance_aliases[drug["name"]].append(aliases[1])
|
||||||
|
else:
|
||||||
|
fp.write(f" - {y}\n")
|
||||||
|
substance_aliases[drug["name"]].append(y)
|
||||||
|
|
||||||
|
with open("ts_pn_data/generated_intents.yml", "w") as fp:
|
||||||
|
fp.write(intentGen(substance_aliases).what_is())
|
||||||
|
38
ts_pn_data/intentGen.py
Normal file
38
ts_pn_data/intentGen.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from secrets import choice, randbelow
|
||||||
|
|
||||||
|
|
||||||
|
class intentGen:
|
||||||
|
def __init__(self, substances):
|
||||||
|
self.names = []
|
||||||
|
for x in substances:
|
||||||
|
self.names.append(x)
|
||||||
|
if not substances[x] == []:
|
||||||
|
for y in substances[x]:
|
||||||
|
self.names.append(y)
|
||||||
|
|
||||||
|
def parse(self, intent_name, intent_list):
|
||||||
|
intent_str = f"- intent: {intent_name}\n examples: |\n"
|
||||||
|
for x in intent_list:
|
||||||
|
intent_str = "".join([intent_str, f" - {x}\n"])
|
||||||
|
return intent_str
|
||||||
|
|
||||||
|
def what_is(self):
|
||||||
|
what_is_intents = []
|
||||||
|
for name in self.names:
|
||||||
|
unlikely_chance = randbelow(10)
|
||||||
|
templates = [
|
||||||
|
f"what is [{name}](substance)?",
|
||||||
|
f"what is [{name}](substance)",
|
||||||
|
f"whats [{name}](substance)",
|
||||||
|
f"what's [{name}](substance)?",
|
||||||
|
f"what [{name}](substance)",
|
||||||
|
]
|
||||||
|
what_is_intents.append(choice(templates))
|
||||||
|
if unlikely_chance > 6:
|
||||||
|
unlikely_templates = [
|
||||||
|
f"[{name}](substance)?",
|
||||||
|
f"[{name}](substance) is what?",
|
||||||
|
f"[{name}](substance) is?",
|
||||||
|
]
|
||||||
|
what_is_intents.append(choice(unlikely_templates))
|
||||||
|
return self.parse("what_is_substance", intent_list=what_is_intents)
|
File diff suppressed because it is too large
Load Diff
45374
ts_pn_data/substances_data.json
Normal file
45374
ts_pn_data/substances_data.json
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user