import requests from bs4 import BeautifulSoup import random __author__ = "jarbasAI" class Erowid(object): @staticmethod def extract_experience_text(text): try: begin_delimiter = '' begin = text.index(begin_delimiter) + len(begin_delimiter) end = text.index('') return text[begin:end].strip().replace("
", "\n").replace("
", "\n").replace("\n\n", " ") except ValueError: return '' @staticmethod def _extract_list(base_url): response = requests.get(base_url).text soup = BeautifulSoup(response, "lxml") table = soup.find('table', {'class': 'topic-chart-surround'}) categories = table.find_all("tr", {'class': 'topic-surround'})[1:] fields = [] for cat in categories: chem_data = {} name = cat.find("td", {'class': 'topic-name'}) chem_data["url"] = base_url + name.find("a")["href"] chem_data["name"] = name.getText().strip().lower() chem_data["other_names"] = cat.find("td", {'class': 'topic-common'}).getText().strip().lower() chem_data["effects"] = cat.find("td", {'class': 'topic-desc'}).getText().strip().lower() fields.append(chem_data) return fields @staticmethod def get_experience(exp_id): base_url = 'https://erowid.org/experiences/exp.php' url = base_url+"?ID="+str(exp_id) data = {"exp_id": exp_id, "url": url} try: response = requests.get(url).text experience = Erowid.extract_experience_text(response) soup = BeautifulSoup(response, "lxml") drug = soup.find('div', {'class': 'substance'}).getText().strip().lower().replace("/", ", ") experience_data = soup.find('table', {'class': 'footdata'}).getText().strip().lower().split("\n") data["drug"] = drug data["experience"] = experience data["year"] = experience_data[0].split("expid:")[0].replace("exp year: ", "").strip() data["gender"] = experience_data[1].replace("gender: ", "").strip() data["age"] = experience_data[2].replace("age at time of experience: ", "").strip() data["date"] = experience_data[3].replace("published: ", "").split("views:")[0].strip() data["dosage"] = [] dosage_table = soup.find('table', {'class': 'dosechart'}) ts = dosage_table.find_all("td", {'align': 'right'}) ammount = dosage_table.find_all("td", {'class': 'dosechart-amount'}) method = dosage_table.find_all("td", {'class': 'dosechart-method'}) substance= dosage_table.find_all("td", {'class': 'dosechart-substance'}) form = dosage_table.find_all("td", {'class': 'dosechart-form'}) for i in range(len(ts)): dosage_data = {} dosage_data["time"] = ts[i].getText().lower().replace("dose:", "").strip() dosage_data["ammount"] = ammount[i].getText().strip().lower() dosage_data["method"] = method[i].getText().strip().lower() dosage_data["substance"] = substance[i].getText().strip().lower() dosage_data["form"] = form[i].getText().strip().lower() data["dosage"].append(dosage_data) except Exception as e: return None return data @staticmethod def get_categories(): base_url = 'https://erowid.org/experiences/exp_list.shtml' response = requests.get(base_url).text categories = [] for sub in response.split("")] categories.append(sub) return categories @staticmethod def get_chemicals(): base_url = 'https://erowid.org/chemicals/' return Erowid._extract_list(base_url) @staticmethod def get_plants(): base_url = 'https://erowid.org/plants/' return Erowid._extract_list(base_url) @staticmethod def get_herbs(): base_url = 'https://erowid.org/herbs/' return Erowid._extract_list(base_url) @staticmethod def get_smarts(): base_url = 'https://erowid.org/smarts/' return Erowid._extract_list(base_url) @staticmethod def get_animals(): base_url = 'https://erowid.org/animals/' return Erowid._extract_list(base_url) @staticmethod def parse_page(url): base_url = url if ".shtml" in base_url: base_url = "/".join(base_url.split("/")[:-1]) + "/" data = {"url": base_url} response = requests.get(url).text soup = BeautifulSoup(response, "lxml") data["name"] = soup.find('div', {'class': 'title-section'}).getText().strip().lower() picture = soup.find('div', {'class': "summary-card-topic-image"}).find("img") if picture: picture = base_url + picture["src"] else: picture = "" data["picture"] = picture data["other_names"] = [n.strip().lower() for n in soup.find('div', {'class': 'sum-common-name'}).getText().split(";")] data["description"] = soup.find('div', {'class': "sum-description"}).getText() info = soup.find('div', {'class': "summary-card-icon-surround"}).find_all("a") urls = {} for i in info: url = base_url + i["href"] name = i.find("img")["alt"].strip().lower() urls[name] = url data["info"] = urls if "/chem" in url or "/pharms" in url or "/smarts" in url: data["chem_name"] = soup.find('div', {'class': "sum-chem-name"}).getText() data["effects"] = soup.find('div', {'class': "sum-effects"}).getText() elif "/animals" in url or "/plants" in url: animal_data = soup.find_all('div', {'class': "fgs-row"}) data["family"] = animal_data[0].find('div', {'class': "family"}).getText() data["genus"] = animal_data[1].find('div', {'class': "genus"}).getText() data["species"] = animal_data[2].find('div', {'class': "species"}).getText() data["effects"] = soup.find('div', {'class': "sum-effects"}).getText() elif "/herbs" in url: animal_data = soup.find_all('div', {'class': "fgs-row"}) data["family"] = animal_data[0].find('div', {'class': "family"}).getText() data["genus"] = animal_data[1].find('div', {'class': "genus"}).getText() data["species"] = animal_data[2].find('div', {'class': "species"}).getText() data["uses"] = soup.find('div', {'class': "sum-uses"}).getText() return data @staticmethod def random_experience(): exp = None while exp is None: exp = Erowid.get_experience(random.randint(1, 111451)) return exp