mirror of
https://github.com/pyt0xic/pablo-bot.git
synced 2024-11-22 02:09:27 +01:00
Merge pull request #7 from pyt0xic/develop
added first intent and lookup table(entities and synonyms)
This commit is contained in:
commit
e12443707c
13
README.md
13
README.md
@ -6,10 +6,15 @@ Still alot of work to be done but for those wanting to test anyway, see below
|
||||
|
||||
## How to run
|
||||
|
||||
$ rasa train
|
||||
$ rasa shell
|
||||
$ rasa train
|
||||
$ rasa shell
|
||||
|
||||
Or use --debug flag to help you understand what's going on
|
||||
|
||||
Or use --debug flag to help you understand what's going on
|
||||
|
||||
$ rasa shell --debug
|
||||
|
||||
## Credits
|
||||
|
||||
Thanks to the [PsychoautWiki](https://psychonautwiki.org/wiki/Main_Page) and [TripSit](https://tripsit.me) for the data.
|
||||
|
||||
Special thanks to [NoahSaso](https://github.com/NoahSaso) for his [scraper](https://github.com/NoahSaso/merge-psychonautwiki-tripsit-data)
|
||||
|
@ -3,6 +3,8 @@ pipeline:
|
||||
- name: WhitespaceTokenizer
|
||||
token_pattern: (?u)\b\w+\b
|
||||
- name: RegexFeaturizer
|
||||
use_word_boundaries: false
|
||||
case_sensitive: false
|
||||
- name: LexicalSyntacticFeaturizer
|
||||
- name: CountVectorsFeaturizer
|
||||
# OOV_token: oov
|
||||
@ -11,7 +13,7 @@ pipeline:
|
||||
min_ngram: 1
|
||||
max_ngram: 4
|
||||
- name: DIETClassifier
|
||||
epochs: 100
|
||||
epochs: 10
|
||||
ranking_length: 10
|
||||
# - name: DucklingEntityExtractor
|
||||
# url: http://localhost:8000
|
||||
|
2130
data/kb_query.yml
Normal file
2130
data/kb_query.yml
Normal file
File diff suppressed because it is too large
Load Diff
3740
data/lookups/substances.yml
Normal file
3740
data/lookups/substances.yml
Normal file
File diff suppressed because it is too large
Load Diff
@ -20,4 +20,9 @@ rules:
|
||||
- rule: OOS
|
||||
steps:
|
||||
- intent: out_of_scope
|
||||
- action: utter_out_of_scope
|
||||
- action: utter_out_of_scope
|
||||
|
||||
- rule: what is drug
|
||||
steps:
|
||||
- intent: what_is_substance
|
||||
- action: utter_what_is_stubstace
|
@ -23,16 +23,23 @@ intents:
|
||||
- contact
|
||||
- inform
|
||||
- restart
|
||||
- what_is_substance
|
||||
|
||||
|
||||
entities:
|
||||
- language
|
||||
- location
|
||||
- name
|
||||
- substance
|
||||
|
||||
slots:
|
||||
name:
|
||||
type: text
|
||||
influence_conversation: true
|
||||
substance:
|
||||
type: text
|
||||
influence_conversation: true
|
||||
auto_fill: true
|
||||
|
||||
responses:
|
||||
utter_ask_name:
|
||||
@ -127,6 +134,8 @@ responses:
|
||||
- text: That depends on which you are using and, most importantly, how you are using them...
|
||||
utter_faq/drugs_legal:
|
||||
- text: Probably but it depends on where you are and what drugs
|
||||
utter_what_is_stubstace:
|
||||
- text: It is {substance}
|
||||
|
||||
utter_out_of_scope/non_english:
|
||||
- text: No hablo english
|
||||
|
0
substances_data.json
Normal file
0
substances_data.json
Normal file
2128
ts_pn_data/generated_intents.yml
Normal file
2128
ts_pn_data/generated_intents.yml
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,25 @@
|
||||
"""MIT License
|
||||
|
||||
Copyright (c) 2021 Noah Saso
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE."""
|
||||
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# downloads and exports data on all substances from psychonautwiki and tripsit factsheets, combining to form master list with standardized format
|
||||
@ -6,12 +28,12 @@
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from time import time, sleep
|
||||
from python_graphql_client import GraphqlClient
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
from intentGen import intentGen
|
||||
|
||||
headers = {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
@ -552,7 +574,54 @@ for name in all_substance_names:
|
||||
|
||||
# output
|
||||
|
||||
substances_json = {}
|
||||
substances_json["substances"] = substance_data
|
||||
with open(f"ts_pn_data/substances_data.json", "w") as f:
|
||||
json.dump(substances_json, fp=f, ensure_ascii=False, indent=2)
|
||||
|
||||
substances_json = json.dumps(substance_data, indent=2, ensure_ascii=False)
|
||||
with open(f"ts_pn_data/substances_{time()}.json", "w") as f:
|
||||
f.write(substances_json)
|
||||
|
||||
substance_aliases = {}
|
||||
|
||||
with open("data/lookups/substances.yml", "w") as fp:
|
||||
# Lookup Table
|
||||
fp.write("""version: "2.0"\nnlu:\n- lookup: substance\n examples: |\n""")
|
||||
for drug in substances_json["substances"]:
|
||||
fp.write(f" - {drug['name']}\n")
|
||||
# Add aliases to lookup table too
|
||||
for y in drug["aliases"]:
|
||||
# Check for "or" in aliases and remove
|
||||
if " or " in y:
|
||||
aliases = y.split(" or ")
|
||||
fp.write(f" - {aliases[0]}\n")
|
||||
fp.write(f" - {aliases[1]}\n")
|
||||
elif "or " in y:
|
||||
aliases = y.split("or ")
|
||||
fp.write(f" - {aliases[1]}\n")
|
||||
else:
|
||||
fp.write(f" - {y}\n")
|
||||
fp.write("\n")
|
||||
# Synonyms to map aliases to one entity
|
||||
for drug in substances_json["substances"]:
|
||||
# Skip adding synonym if there are no aliases
|
||||
substance_aliases[drug["name"]] = []
|
||||
if drug["aliases"] == []:
|
||||
continue
|
||||
fp.write(f"- synonym: {drug['name']}\n examples: |\n")
|
||||
for y in drug["aliases"]:
|
||||
# Check for "or" in aliases and remove
|
||||
if " or " in y:
|
||||
aliases = y.split(" or ")
|
||||
fp.write(f" - {aliases[0]}\n")
|
||||
fp.write(f" - {aliases[1]}\n")
|
||||
substance_aliases[drug["name"]].append(aliases[0])
|
||||
substance_aliases[drug["name"]].append(aliases[1])
|
||||
elif "or " in y:
|
||||
aliases = y.split("or ")
|
||||
fp.write(f" - {aliases[1]}\n")
|
||||
substance_aliases[drug["name"]].append(aliases[1])
|
||||
else:
|
||||
fp.write(f" - {y}\n")
|
||||
substance_aliases[drug["name"]].append(y)
|
||||
|
||||
with open("ts_pn_data/generated_intents.yml", "w") as fp:
|
||||
fp.write(intentGen(substance_aliases).what_is())
|
||||
|
38
ts_pn_data/intentGen.py
Normal file
38
ts_pn_data/intentGen.py
Normal file
@ -0,0 +1,38 @@
|
||||
from secrets import choice, randbelow
|
||||
|
||||
|
||||
class intentGen:
|
||||
def __init__(self, substances):
|
||||
self.names = []
|
||||
for x in substances:
|
||||
self.names.append(x)
|
||||
if not substances[x] == []:
|
||||
for y in substances[x]:
|
||||
self.names.append(y)
|
||||
|
||||
def parse(self, intent_name, intent_list):
|
||||
intent_str = f"- intent: {intent_name}\n examples: |\n"
|
||||
for x in intent_list:
|
||||
intent_str = "".join([intent_str, f" - {x}\n"])
|
||||
return intent_str
|
||||
|
||||
def what_is(self):
|
||||
what_is_intents = []
|
||||
for name in self.names:
|
||||
unlikely_chance = randbelow(10)
|
||||
templates = [
|
||||
f"what is [{name}](substance)?",
|
||||
f"what is [{name}](substance)",
|
||||
f"whats [{name}](substance)",
|
||||
f"what's [{name}](substance)?",
|
||||
f"what [{name}](substance)",
|
||||
]
|
||||
what_is_intents.append(choice(templates))
|
||||
if unlikely_chance > 6:
|
||||
unlikely_templates = [
|
||||
f"[{name}](substance)?",
|
||||
f"[{name}](substance) is what?",
|
||||
f"[{name}](substance) is?",
|
||||
]
|
||||
what_is_intents.append(choice(unlikely_templates))
|
||||
return self.parse("what_is_substance", intent_list=what_is_intents)
|
File diff suppressed because it is too large
Load Diff
45374
ts_pn_data/substances_data.json
Normal file
45374
ts_pn_data/substances_data.json
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user