Optimize BeautifulSoup input field parsing

This commit is contained in:
Mads Marquart
2019-07-03 11:09:41 +02:00
parent 4f2a24848e
commit 1e6222f46a
2 changed files with 9 additions and 7 deletions

View File

@@ -12,6 +12,10 @@ from . import _util, _exception
FB_DTSG_REGEX = re.compile(r'name="fb_dtsg" value="(.*?)"') FB_DTSG_REGEX = re.compile(r'name="fb_dtsg" value="(.*?)"')
def find_input_fields(html):
return bs4.BeautifulSoup(html, "html.parser", parse_only=bs4.SoupStrainer("input"))
def session_factory(user_agent=None): def session_factory(user_agent=None):
session = requests.session() session = requests.session()
session.headers["Referer"] = "https://www.facebook.com" session.headers["Referer"] = "https://www.facebook.com"
@@ -21,7 +25,7 @@ def session_factory(user_agent=None):
def _2fa_helper(session, code, r): def _2fa_helper(session, code, r):
soup = bs4.BeautifulSoup(r.text, "html.parser") soup = find_input_fields(r.text)
data = dict() data = dict()
url = "https://m.facebook.com/login/checkpoint/" url = "https://m.facebook.com/login/checkpoint/"
@@ -106,12 +110,10 @@ class State(object):
def login(cls, email, password, user_agent=None): def login(cls, email, password, user_agent=None):
session = session_factory(user_agent=user_agent) session = session_factory(user_agent=user_agent)
soup = bs4.BeautifulSoup( soup = find_input_fields(session.get("https://m.facebook.com/").text)
session.get("https://m.facebook.com/").text, "html.parser"
)
data = dict( data = dict(
(elem["name"], elem["value"]) (elem["name"], elem["value"])
for elem in soup.findAll("input") for elem in soup
if elem.has_attr("value") and elem.has_attr("name") if elem.has_attr("value") and elem.has_attr("name")
) )
data["email"] = email data["email"] = email
@@ -157,7 +159,7 @@ class State(object):
def from_session(cls, session): def from_session(cls, session):
r = session.get(_util.prefix_url("/")) r = session.get(_util.prefix_url("/"))
soup = bs4.BeautifulSoup(r.text, "html.parser") soup = find_input_fields(r.text)
fb_dtsg_element = soup.find("input", {"name": "fb_dtsg"}) fb_dtsg_element = soup.find("input", {"name": "fb_dtsg"})
if fb_dtsg_element: if fb_dtsg_element:

View File

@@ -16,7 +16,7 @@ requires = [
"aenum", "aenum",
"attrs~=18.2.0", "attrs~=18.2.0",
"requests", "requests",
"beautifulsoup4", "beautifulsoup4~=4.0",
] ]
description-file = "README.rst" description-file = "README.rst"
classifiers = [ classifiers = [