

from bs4 import BeautifulSoup
import requests

url = ("https://raw.githubusercontent.com/"
       "joelgrus/data/master/getting-data.html")

html = requests.get(url).text
print(html)

print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

soup = BeautifulSoup(html, 'html5lib')
print(soup)

print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

##########################################################
# find the first <p> tag (and its contents) you can use:

x = soup.find('h1')
print(x)


first_paragraph = soup.find('p')
print(first_paragraph)

##########################################################
# Looks like  soup.p  is a decorator for soup.find('p')
##########################################################
first_paragraph = soup.p
print(first_paragraph)

###################################################
# Test 

text = soup.find('p').text
print(text)

###################################################
# Split text into words

words = soup.find('p').text.split()
print(words)

# get multiple tags at once:
all_paragraphs = soup.find_all('p') 	# or just soup('p')
print("\nall_paragraphs:\n", all_paragraphs)

paragraphs_with_ids = [p for p in soup('p') if p.get('id')]
print("\nparagraphs_with_ids:\n", paragraphs_with_ids)


