mirror of
				https://github.com/pyt0xic/pablo-bot.git
				synced 2025-10-31 18:57:25 +01:00 
			
		
		
		
	Using a better script to get drug data and updated requirements.txt
This commit is contained in:
		
							parent
							
								
									8243a5ba0f
								
							
						
					
					
						commit
						faefd94f2a
					
				| @ -1,2 +1,4 @@ | ||||
| rasa | ||||
| pandas | ||||
| pandas | ||||
| beautifulsoup4 | ||||
| python-graphql-client | ||||
|  | ||||
							
								
								
									
										35201
									
								
								tripsit/getAllDrugs.json
									
									
									
									
									
								
							
							
						
						
									
										35201
									
								
								tripsit/getAllDrugs.json
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,27 +0,0 @@ | ||||
| import requests, json | ||||
| import pandas as pd | ||||
| 
 | ||||
| # API URL | ||||
| url = "http://tripbot.tripsit.me/api/tripsit/getAllDrugs" | ||||
| r = requests.get(url) | ||||
| data = r.json() | ||||
| 
 | ||||
| # Format dict and load into df | ||||
| data = json.dumps(data["data"][0], indent=2, sort_keys=False, ensure_ascii=False) | ||||
| df = pd.DataFrame.from_dict(json.loads(data), orient="index") | ||||
| 
 | ||||
| # Add id for each drug for rasa | ||||
| id = [] | ||||
| for x in range(0, len(df)): | ||||
|     id.append(x) | ||||
| df["id"] = id | ||||
| 
 | ||||
| # Write to JSON file | ||||
| with open("tripsit/getAllDrugs.json", "w") as fp: | ||||
|     # Clean NaN values | ||||
|     clean_data = { | ||||
|         k1: {k: v for k, v in v1.items() if v == v and v is not None} | ||||
|         for k1, v1 in df.to_dict("index").items() | ||||
|     } | ||||
|     # Set ensure_ascii to false to ensure we can keep greek letters (like alpha) | ||||
|     fp.write(json.dumps(clean_data, indent=2, ensure_ascii=False)) | ||||
							
								
								
									
										22162
									
								
								ts_pn_data/_cached_pw_substances.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22162
									
								
								ts_pn_data/_cached_pw_substances.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										558
									
								
								ts_pn_data/getData.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										558
									
								
								ts_pn_data/getData.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,558 @@ | ||||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| # downloads and exports data on all substances from psychonautwiki and tripsit factsheets, combining to form master list with standardized format | ||||
| # prioritizes psychonautwiki ROA info (dose/duration) over tripsit factsheets | ||||
| # pip3 install beautifulsoup4 requests python-graphql-client | ||||
| 
 | ||||
| import requests | ||||
| from bs4 import BeautifulSoup | ||||
| from time import time, sleep | ||||
| from python_graphql_client import GraphqlClient | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import traceback | ||||
| 
 | ||||
| headers = { | ||||
|     "Access-Control-Allow-Origin": "*", | ||||
|     "Access-Control-Allow-Methods": "GET", | ||||
|     "Access-Control-Allow-Headers": "Content-Type", | ||||
|     "Access-Control-Max-Age": "3600", | ||||
|     "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0", | ||||
| } | ||||
| 
 | ||||
| ts_api_url = "https://tripbot.tripsit.me/api/tripsit/getAllDrugs" | ||||
| ps_api_url = "https://api.psychonautwiki.org" | ||||
| ps_client = GraphqlClient(endpoint=ps_api_url, headers=headers) | ||||
| 
 | ||||
| 
 | ||||
| def substance_name_match(name, substance): | ||||
|     """check if name matches any value in keys we care about""" | ||||
|     lower_name = name.lower() | ||||
|     return any( | ||||
|         [ | ||||
|             lower_name == substance[key].lower() | ||||
|             for key in ["name", "pretty_name"] | ||||
|             if key in substance | ||||
|         ] | ||||
|         + [lower_name == alias.lower() for alias in substance.get("aliases", [])] | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def find_substance_in_data(data, name): | ||||
|     return next((s for s in data if substance_name_match(name, s)), None) | ||||
| 
 | ||||
| 
 | ||||
| roa_name_aliases = { | ||||
|     "iv": ["intravenous"], | ||||
|     "intravenous": ["iv"], | ||||
|     "im": ["intramuscular"], | ||||
|     "intramuscular": ["im"], | ||||
|     "insufflated": ["snorted"], | ||||
|     "snorted": ["insufflated"], | ||||
|     "vaporized": ["vapourized"], | ||||
|     "vapourized": ["vaporized"], | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| def roa_matches_name(roa, name): | ||||
|     aliases = roa_name_aliases.get(name.lower(), []) | ||||
|     return roa["name"].lower() == name.lower() or roa["name"].lower() in aliases | ||||
| 
 | ||||
| 
 | ||||
| # get tripsit data | ||||
| 
 | ||||
| 
 | ||||
| ts_dose_order = ["Threshold", "Light", "Common", "Strong", "Heavy"] | ||||
| ts_combo_ignore = ["benzos"]  # duplicate | ||||
| # prettify names in interaction list | ||||
| ts_combo_transformations = { | ||||
|     "lsd": "LSD", | ||||
|     "mushrooms": "Mushrooms", | ||||
|     "dmt": "DMT", | ||||
|     "mescaline": "Mescaline", | ||||
|     "dox": "DOx", | ||||
|     "nbomes": "NBOMes", | ||||
|     "2c-x": "2C-x", | ||||
|     "2c-t-x": "2C-T-x", | ||||
|     "amt": "aMT", | ||||
|     "5-meo-xxt": "5-MeO-xxT", | ||||
|     "cannabis": "Cannabis", | ||||
|     "ketamine": "Ketamine", | ||||
|     "mxe": "MXE", | ||||
|     "dxm": "DXM", | ||||
|     "pcp": "PCP", | ||||
|     "nitrous": "Nitrous", | ||||
|     "amphetamines": "Amphetamines", | ||||
|     "mdma": "MDMA", | ||||
|     "cocaine": "Cocaine", | ||||
|     "caffeine": "Caffeine", | ||||
|     "alcohol": "Alcohol", | ||||
|     "ghb/gbl": "GHB/GBL", | ||||
|     "opioids": "Opioids", | ||||
|     "tramadol": "Tramadol", | ||||
|     "benzodiazepines": "Benzodiazepines", | ||||
|     "maois": "MAOIs", | ||||
|     "ssris": "SSRIs", | ||||
| } | ||||
| 
 | ||||
| ts_response = requests.get(ts_api_url) | ||||
| ts_data = ts_response.json()["data"][0] | ||||
| 
 | ||||
| ts_substances_data = list(ts_data.values()) | ||||
| 
 | ||||
| 
 | ||||
| # TS has durations split over a few keys, so this finds or creates the duration for the associated ROA | ||||
| # and adds a new line item | ||||
| def ts_add_formatted_duration(ts_roas, formatted_duration, duration_name): | ||||
|     units = formatted_duration.get("_unit", "") or "" | ||||
|     if "_unit" in formatted_duration: | ||||
|         formatted_duration.pop("_unit") | ||||
| 
 | ||||
|     def add_to_roa(roa, value): | ||||
|         if "duration" not in roa: | ||||
|             roa["duration"] = [] | ||||
| 
 | ||||
|         roa["duration"].append({"name": duration_name, "value": value}) | ||||
| 
 | ||||
|     for roa_name, value in formatted_duration.items(): | ||||
|         value_string = f"{value} {units}".strip() | ||||
| 
 | ||||
|         # if value present (i.e. just one value for all ROA doses provided above), apply to all ROAs | ||||
|         if roa_name == "value": | ||||
|             # if TS did not add any doses, do nothing with this value | ||||
|             # we could theoretically apply this to all PW doses with missing durations, but we can't be sure | ||||
|             # if it applies to all ROAs, so just ignore | ||||
|             if not len(ts_roas): | ||||
|                 break | ||||
| 
 | ||||
|             for ts_roa in ts_roas: | ||||
|                 add_to_roa(ts_roa, value_string) | ||||
| 
 | ||||
|         # add to matching ROA or create new ROA if doesn't exist | ||||
|         else: | ||||
|             ts_roa = next( | ||||
|                 (ts_roa for ts_roa in ts_roas if roa_matches_name(ts_roa, roa_name)), | ||||
|                 None, | ||||
|             ) | ||||
|             # if ROA doesn't exist, make new | ||||
|             if not ts_roa: | ||||
|                 ts_roa = {"name": roa_name} | ||||
|                 ts_roas.append(ts_roa) | ||||
| 
 | ||||
|             add_to_roa(ts_roa, value_string) | ||||
| 
 | ||||
| 
 | ||||
| # get psychonautwiki data | ||||
| 
 | ||||
| 
 | ||||
| def pw_clean_common_name(name): | ||||
|     name = re.sub(r'^"', "", name) | ||||
|     name = re.sub(r'"$', "", name) | ||||
|     name = re.sub(r'"?\[\d*\]$', "", name) | ||||
|     name = re.sub(r"\s*More names\.$", "", name) | ||||
|     name = re.sub(r"\.$", "", name) | ||||
|     return name.strip() | ||||
| 
 | ||||
| 
 | ||||
| def pw_should_skip(name, soup): | ||||
|     return ( | ||||
|         name.startswith("Experience:") or len(soup.find_all(text="Common names")) == 0 | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| pw_substance_data = [] | ||||
| 
 | ||||
| if os.path.exists("ts_pn_data/_cached_pw_substances.json"): | ||||
|     with open("ts_pn_data/_cached_pw_substances.json") as f: | ||||
|         pw_substance_data = json.load(f) | ||||
| 
 | ||||
| if not len(pw_substance_data): | ||||
|     offset = 0 | ||||
|     pw_substance_urls_query = ( | ||||
|         f"{{substances(limit: 250 offset: {offset}) {{name url}}}}" | ||||
|     ) | ||||
| 
 | ||||
|     pw_substance_urls_data = ps_client.execute(query=pw_substance_urls_query,)["data"][ | ||||
|         "substances" | ||||
|     ] | ||||
| 
 | ||||
|     offset = 252 | ||||
|     while offset <= 340: | ||||
|         pw_substance_urls_query = ( | ||||
|             f"{{substances(limit: 1 offset: {offset}) {{name url}}}}" | ||||
|         ) | ||||
|         offset += 1 | ||||
|         temp_data = ps_client.execute(query=pw_substance_urls_query,)["data"][ | ||||
|             "substances" | ||||
|         ] | ||||
|         print(temp_data) | ||||
|         if temp_data is None: | ||||
|             continue | ||||
|         pw_substance_urls_data.extend(temp_data) | ||||
| 
 | ||||
|     for idx, substance in enumerate(pw_substance_urls_data): | ||||
|         try: | ||||
|             url = substance["url"] | ||||
|             substance_req = requests.get(url, headers) | ||||
|             substance_soup = BeautifulSoup(substance_req.content, "html.parser") | ||||
| 
 | ||||
|             name = substance_soup.find("h1", id="firstHeading").text | ||||
|             if pw_should_skip(name, substance_soup): | ||||
|                 print(f"Skipping {name} ({idx + 1} / {len(pw_substance_urls_data)})") | ||||
|                 continue | ||||
| 
 | ||||
|             # get aliases text | ||||
|             common_names_str = substance_soup.find_all(text="Common names") | ||||
| 
 | ||||
|             cleaned_common_names = ( | ||||
|                 set( | ||||
|                     map( | ||||
|                         pw_clean_common_name, | ||||
|                         common_names_str[0] | ||||
|                         .parent.find_next_sibling("td") | ||||
|                         .text.split(", "), | ||||
|                     ) | ||||
|                 ) | ||||
|                 if len(common_names_str) > 0 | ||||
|                 else set() | ||||
|             ) | ||||
|             cleaned_common_names.add(substance["name"]) | ||||
|             # don't include name in list of other common names | ||||
|             common_names = sorted(filter(lambda n: n != name, cleaned_common_names)) | ||||
| 
 | ||||
|             # scrape ROAs from page | ||||
| 
 | ||||
|             def get_data_starting_at_row(curr_row): | ||||
|                 rows = [] | ||||
|                 while curr_row.find("th", {"class": "ROARowHeader"}): | ||||
|                     row = {} | ||||
|                     row["name"] = ( | ||||
|                         curr_row.find("th", {"class": "ROARowHeader"}).find("a").text | ||||
|                     ) | ||||
| 
 | ||||
|                     row_values = curr_row.find("td", {"class": "RowValues"}) | ||||
| 
 | ||||
|                     row_value_text = row_values.find_all(text=True, recursive=False) | ||||
|                     if len(row_value_text): | ||||
|                         row["value"] = "".join(row_value_text).strip() | ||||
|                     else: | ||||
|                         row["value"] = None | ||||
| 
 | ||||
|                     row_note = row_values.find("span") | ||||
|                     if row_note: | ||||
|                         row["note"] = re.sub(r"\s*\[\d*\]$", "", row_note.text).strip() | ||||
| 
 | ||||
|                     rows.append(row) | ||||
| 
 | ||||
|                     curr_row = curr_row.find_next("tr") | ||||
|                 return rows, curr_row | ||||
| 
 | ||||
|             roas = [] | ||||
| 
 | ||||
|             dose_charts = substance_soup.find_all("tr", {"class": "dosechart"}) | ||||
|             for dose_chart in dose_charts: | ||||
|                 table = dose_chart.parent.parent | ||||
|                 roa_name = table.find("tr").find("a").text | ||||
|                 if not roa_name: | ||||
|                     continue | ||||
| 
 | ||||
|                 roa = { | ||||
|                     "name": roa_name, | ||||
|                     "dosage": [], | ||||
|                     "duration": [], | ||||
|                 } | ||||
| 
 | ||||
|                 # dosage | ||||
| 
 | ||||
|                 curr_row = dose_chart.find_next("tr") | ||||
|                 roa["dosage"], curr_row = get_data_starting_at_row(curr_row) | ||||
| 
 | ||||
|                 # extract bioavailability | ||||
|                 if len(roa["dosage"]) and roa["dosage"][0]["name"] == "Bioavailability": | ||||
|                     bioavailability = roa["dosage"].pop(0) | ||||
|                     roa["bioavailability"] = bioavailability["value"] | ||||
| 
 | ||||
|                 # duration | ||||
| 
 | ||||
|                 if curr_row.find("th", {"class": "ROASubHeader"}): | ||||
|                     curr_row = curr_row.find_next("tr") | ||||
|                     roa["duration"], _ = get_data_starting_at_row(curr_row) | ||||
| 
 | ||||
|                 if not len(roa["dosage"]): | ||||
|                     roa["dosage"] = None | ||||
|                 if not len(roa["duration"]): | ||||
|                     roa["duration"] = None | ||||
| 
 | ||||
|                 roas.append(roa) | ||||
| 
 | ||||
|             # query PS API for more data on substance | ||||
| 
 | ||||
|             query = ( | ||||
|                 """ | ||||
|                 { | ||||
|                     substances(query: "%s") { | ||||
|                         name | ||||
|                         class { | ||||
|                             chemical | ||||
|                             psychoactive | ||||
|                         } | ||||
|                         tolerance { | ||||
|                             full | ||||
|                             half | ||||
|                             zero | ||||
|                         } | ||||
|                         toxicity | ||||
|                         addictionPotential | ||||
|                         crossTolerances | ||||
|                     } | ||||
|                 } | ||||
|             """ | ||||
|                 % substance["name"] | ||||
|             ) | ||||
| 
 | ||||
|             data = ps_client.execute(query=query)["data"]["substances"] | ||||
|             if len(data) == 0: | ||||
|                 continue | ||||
|             elif len(data) > 1: | ||||
|                 # should never happen? | ||||
|                 print(f"{name} has more than one dataset... investigate why") | ||||
| 
 | ||||
|             data = data[0] | ||||
|             if "name" in data: | ||||
|                 data.pop("name") | ||||
| 
 | ||||
|             pw_substance_data.append( | ||||
|                 { | ||||
|                     "url": url, | ||||
|                     "name": name, | ||||
|                     "aliases": common_names, | ||||
|                     "roas": roas, | ||||
|                     "data": data, | ||||
|                 } | ||||
|             ) | ||||
|             print( | ||||
|                 f"Done with {name} [{len(roas)} ROA(s)] ({idx + 1} / {len(pw_substance_urls_data)})" | ||||
|             ) | ||||
| 
 | ||||
|         except KeyboardInterrupt: | ||||
|             print("\nScrape canceled") | ||||
|             exit(0) | ||||
|         except: | ||||
|             print(f"{name} failed:") | ||||
|             print(traceback.format_exc()) | ||||
|             exit(1) | ||||
| 
 | ||||
|     with open(f"ts_pn_data/_cached_pw_substances.json", "w") as f: | ||||
|         f.write(json.dumps(pw_substance_data, indent=2, ensure_ascii=False)) | ||||
| 
 | ||||
| # combine tripsit and psychonautwiki data | ||||
| 
 | ||||
| 
 | ||||
| all_substance_names = sorted( | ||||
|     set( | ||||
|         list(map(lambda s: s.get("name", "").lower(), pw_substance_data)) | ||||
|         + list(map(lambda s: s.get("name", "").lower(), ts_substances_data)) | ||||
|     ) | ||||
| ) | ||||
| substance_data = [] | ||||
| x = 0 | ||||
| 
 | ||||
| for name in all_substance_names: | ||||
|     # find PW substance | ||||
|     pw_substance = find_substance_in_data(pw_substance_data, name) | ||||
|     # remove to get rid of duplicates in final output | ||||
|     if pw_substance: | ||||
|         pw_substance_data.remove(pw_substance) | ||||
|     else: | ||||
|         pw_substance = {} | ||||
| 
 | ||||
|     # find TS substance | ||||
|     ts_substance = find_substance_in_data(ts_substances_data, name) | ||||
|     # remove to get rid of duplicates in final output | ||||
|     if ts_substance: | ||||
|         ts_substances_data.remove(ts_substance) | ||||
|     else: | ||||
|         ts_substance = {} | ||||
| 
 | ||||
|     # if no substance found in either dataset, skip | ||||
|     if not pw_substance and not ts_substance: | ||||
|         continue | ||||
| 
 | ||||
|     ts_properties = ts_substance.get("properties", {}) | ||||
| 
 | ||||
|     # url will always exist for psychonautwiki substance, so tripsit substance must exist if url is None | ||||
|     url = pw_substance.get("url") or f"https://drugs.tripsit.me/{ts_substance['name']}" | ||||
| 
 | ||||
|     ts_links = ts_substance.get("links", {}) | ||||
|     experiences_url = ts_links.get("experiences") | ||||
| 
 | ||||
|     # pick display name from available substances found from both datasets | ||||
|     names = list( | ||||
|         filter( | ||||
|             lambda n: n is not None and len(n) > 0, | ||||
|             [pw_substance.get("name"), ts_substance.get("pretty_name")], | ||||
|         ) | ||||
|     ) | ||||
|     # people use shorter names | ||||
|     name = min(names, key=len) | ||||
| 
 | ||||
|     # lowercase list of all names, excluding chosen name above | ||||
|     aliases = set( | ||||
|         map( | ||||
|             lambda n: n.lower(), | ||||
|             filter( | ||||
|                 lambda n: n is not None and len(n) > 0, | ||||
|                 [pw_substance.get("name"), ts_substance.get("pretty_name")] | ||||
|                 + pw_substance.get("aliases", []) | ||||
|                 + ts_substance.get("aliases", []), | ||||
|             ), | ||||
|         ) | ||||
|     ) | ||||
|     if name.lower() in aliases: | ||||
|         aliases.remove(name.lower()) | ||||
|     aliases = sorted(aliases) | ||||
| 
 | ||||
|     summary = ts_properties.get("summary", "").strip() | ||||
|     if not len(summary): | ||||
|         summary = None | ||||
| 
 | ||||
|     test_kits = ts_properties.get("test-kits", "").strip() | ||||
|     if not len(test_kits): | ||||
|         test_kits = None | ||||
| 
 | ||||
|     ts_bioavailability_str = ts_properties.get("bioavailability", "").strip() | ||||
|     ts_bioavailability = {} | ||||
|     if len(ts_bioavailability_str): | ||||
|         matches = re.findall( | ||||
|             r"([a-zA-Z\/]+)[.:\s]+([0-9\.%\s\+/\-]+)", ts_bioavailability_str | ||||
|         ) | ||||
|         if len(matches): | ||||
|             for roa_name, value in matches: | ||||
|                 ts_bioavailability[roa_name.lower()] = value.strip(". \t") | ||||
| 
 | ||||
|     pw_data = pw_substance.get("data", {}) | ||||
| 
 | ||||
|     classes = pw_data.get("class") | ||||
|     toxicity = pw_data.get("toxicity") | ||||
|     addiction_potential = pw_data.get("addictionPotential") | ||||
|     tolerance = pw_data.get("tolerance") | ||||
|     cross_tolerances = pw_data.get("crossTolerances") | ||||
| 
 | ||||
|     roas = [] | ||||
| 
 | ||||
|     # get PW ROAs | ||||
|     pw_roas = pw_substance.get("roas", []) | ||||
| 
 | ||||
|     # process TS ROAs | ||||
|     ts_roas = [] | ||||
| 
 | ||||
|     # TS ROA dosage | ||||
|     ts_formatted_dose = ts_substance.get("formatted_dose") | ||||
|     if ts_formatted_dose: | ||||
|         for roa_name, dose_data in ts_formatted_dose.items(): | ||||
|             dose_levels = [] | ||||
|             for dose_level in ts_dose_order: | ||||
|                 value_string = dose_data.get(dose_level) | ||||
|                 if value_string is None: | ||||
|                     continue | ||||
| 
 | ||||
|                 dose_levels.append( | ||||
|                     {"name": dose_level, "value": value_string,} | ||||
|                 ) | ||||
| 
 | ||||
|             if len(dose_levels): | ||||
|                 ts_roas.append({"name": roa_name, "dosage": dose_levels}) | ||||
| 
 | ||||
|     # TS ROA durations | ||||
|     ts_formatted_onset = ts_substance.get("formatted_onset") | ||||
|     if ts_formatted_onset: | ||||
|         ts_add_formatted_duration(ts_roas, ts_formatted_onset, "Onset") | ||||
| 
 | ||||
|     ts_formatted_duration = ts_substance.get("formatted_duration") | ||||
|     if ts_formatted_duration: | ||||
|         ts_add_formatted_duration(ts_roas, ts_formatted_duration, "Duration") | ||||
| 
 | ||||
|     ts_formatted_aftereffects = ts_substance.get("formatted_aftereffects") | ||||
|     if ts_formatted_aftereffects: | ||||
|         ts_add_formatted_duration(ts_roas, ts_formatted_aftereffects, "After effects") | ||||
| 
 | ||||
|     # merge PW and TS ROAs | ||||
|     # prioritize PW for ROAs but use TS to fill in gaps | ||||
| 
 | ||||
|     roas.extend(pw_roas) | ||||
|     for ts_roa in ts_roas: | ||||
|         existing_roa = next( | ||||
|             (roa for roa in roas if roa_matches_name(roa, ts_roa["name"])), None | ||||
|         ) | ||||
|         # if ROA does not exist, add | ||||
|         if not existing_roa: | ||||
|             existing_roa = ts_roa | ||||
|             roas.append(existing_roa) | ||||
|             # we want bioavailability from below, so don't skip | ||||
| 
 | ||||
|         # if ROA does not already have bioavailability, try to get from TS | ||||
|         if not existing_roa.get("bioavailability"): | ||||
|             name_lower = ts_roa["name"].lower() | ||||
|             name_aliases = roa_name_aliases.get(name_lower, []) | ||||
| 
 | ||||
|             alias_found = next( | ||||
|                 (name_alias in ts_bioavailability for name_alias in name_aliases), None | ||||
|             ) | ||||
|             # TS has bioavailability if name or any name alias is found | ||||
|             if name_lower in ts_bioavailability or alias_found: | ||||
|                 existing_roa["bioavailability"] = ts_bioavailability.get( | ||||
|                     name_lower | ||||
|                 ) or ts_bioavailability.get(alias_found) | ||||
| 
 | ||||
|         # if existing ROA is missing dosage and TS has dosage, add | ||||
|         if (not existing_roa.get("dosage") or not len(existing_roa["dosage"])) and ( | ||||
|             "dosage" in ts_roa and ts_roa["dosage"] and len(ts_roa["dosage"]) | ||||
|         ): | ||||
|             existing_roa["dosage"] = ts_roa["dosage"] | ||||
| 
 | ||||
|         # if existing ROA is missing duration and TS has duration, add | ||||
|         if (not existing_roa.get("duration") or not len(existing_roa["duration"])) and ( | ||||
|             "duration" in ts_roa and ts_roa["duration"] and len(ts_roa["duration"]) | ||||
|         ): | ||||
|             existing_roa["duration"] = ts_roa["duration"] | ||||
| 
 | ||||
|     interactions = None | ||||
|     combos = ts_substance.get("combos") | ||||
|     if combos: | ||||
|         interactions = [] | ||||
|         for key, combo_data in combos.items(): | ||||
|             if key in ts_combo_ignore: | ||||
|                 continue | ||||
| 
 | ||||
|             combo_data["name"] = ts_combo_transformations[key] | ||||
|             interactions.append(combo_data) | ||||
|         interactions = sorted(interactions, key=lambda i: i["name"]) | ||||
| 
 | ||||
|     substance_data.append( | ||||
|         { | ||||
|             "id": x, | ||||
|             "name": name, | ||||
|             "aliases": list(aliases), | ||||
|             "aliasesStr": ",".join(aliases), | ||||
|             "url": url, | ||||
|             "experiencesUrl": experiences_url, | ||||
|             "summary": summary, | ||||
|             "reagents": test_kits, | ||||
|             "classes": classes, | ||||
|             "toxicity": toxicity, | ||||
|             "addictionPotential": addiction_potential, | ||||
|             "tolerance": tolerance, | ||||
|             "crossTolerances": cross_tolerances, | ||||
|             "roas": roas, | ||||
|             "interactions": interactions, | ||||
|         } | ||||
|     ) | ||||
|     x += 1 | ||||
| 
 | ||||
| # output | ||||
| 
 | ||||
| 
 | ||||
| substances_json = json.dumps(substance_data, indent=2, ensure_ascii=False) | ||||
| with open(f"ts_pn_data/substances_{time()}.json", "w") as f: | ||||
|     f.write(substances_json) | ||||
							
								
								
									
										45372
									
								
								ts_pn_data/substances_1632949161.9573479.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45372
									
								
								ts_pn_data/substances_1632949161.9573479.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 pyt0xic
						pyt0xic