Merge pull request #507 from carpedm20/refactor-limits

Refactor method limits
This commit is contained in:
Mads Marquart
2020-01-14 22:13:58 +01:00
committed by GitHub
7 changed files with 267 additions and 156 deletions

View File

@@ -1,4 +1,3 @@
import itertools
import fbchat import fbchat
session = fbchat.Session.login("<email>", "<password>") session = fbchat.Session.login("<email>", "<password>")
@@ -62,7 +61,9 @@ print("thread's name: {}".format(thread.name))
# Here should be an example of `getUnread` # Here should be an example of `getUnread`
# Print image url for 20 last images from thread. # Print image url for up to 20 last images from thread.
images = thread.fetch_images() images = list(thread.fetch_images(limit=20))
for image in itertools.islice(image, 20): for image in images:
print(image.large_preview_url) if isinstance(image, fbchat.ImageAttachment):
url = c.fetch_image_url(image.id)
print(url)

View File

@@ -3,7 +3,7 @@ import time
import requests import requests
from ._core import log from ._core import log
from . import _util, _graphql, _session, _poll, _user from . import _util, _graphql, _session, _poll, _user, _page, _group, _thread, _message
from ._exception import FBchatException, FBchatFacebookError from ._exception import FBchatException, FBchatFacebookError
from ._thread import ThreadLocation from ._thread import ThreadLocation
@@ -24,7 +24,7 @@ from ._quick_reply import (
) )
from ._plan import PlanData from ._plan import PlanData
from typing import Sequence from typing import Sequence, Iterable, Tuple, Optional
class Client: class Client:
@@ -78,7 +78,7 @@ class Client:
users.append(_user.UserData._from_all_fetch(self.session, data)) users.append(_user.UserData._from_all_fetch(self.session, data))
return users return users
def search_for_users(self, name, limit=10): def search_for_users(self, name: str, limit: int) -> Iterable[_user.UserData]:
"""Find and get users by their name. """Find and get users by their name.
Args: Args:
@@ -87,92 +87,71 @@ class Client:
Returns: Returns:
list: `User` objects, ordered by relevance list: `User` objects, ordered by relevance
Raises:
FBchatException: If request failed
""" """
params = {"search": name, "limit": limit} params = {"search": name, "limit": limit}
(j,) = self.session._graphql_requests( (j,) = self.session._graphql_requests(
_graphql.from_query(_graphql.SEARCH_USER, params) _graphql.from_query(_graphql.SEARCH_USER, params)
) )
return [ return (
UserData._from_graphql(self.session, node) UserData._from_graphql(self.session, node)
for node in j[name]["users"]["nodes"] for node in j[name]["users"]["nodes"]
] )
def search_for_pages(self, name, limit=10): def search_for_pages(self, name: str, limit: int) -> Iterable[_page.PageData]:
"""Find and get pages by their name. """Find and get pages by their name.
Args: Args:
name: Name of the page name: Name of the page
limit: The max. amount of pages to fetch
Returns:
list: `Page` objects, ordered by relevance
Raises:
FBchatException: If request failed
""" """
params = {"search": name, "limit": limit} params = {"search": name, "limit": limit}
(j,) = self.session._graphql_requests( (j,) = self.session._graphql_requests(
_graphql.from_query(_graphql.SEARCH_PAGE, params) _graphql.from_query(_graphql.SEARCH_PAGE, params)
) )
return [ return (
PageData._from_graphql(self.session, node) PageData._from_graphql(self.session, node)
for node in j[name]["pages"]["nodes"] for node in j[name]["pages"]["nodes"]
] )
def search_for_groups(self, name, limit=10): def search_for_groups(self, name: str, limit: int) -> Iterable[_group.GroupData]:
"""Find and get group threads by their name. """Find and get group threads by their name.
Args: Args:
name: Name of the group thread name: Name of the group thread
limit: The max. amount of groups to fetch limit: The max. amount of groups to fetch
Returns:
list: `Group` objects, ordered by relevance
Raises:
FBchatException: If request failed
""" """
params = {"search": name, "limit": limit} params = {"search": name, "limit": limit}
(j,) = self.session._graphql_requests( (j,) = self.session._graphql_requests(
_graphql.from_query(_graphql.SEARCH_GROUP, params) _graphql.from_query(_graphql.SEARCH_GROUP, params)
) )
return [ return (
GroupData._from_graphql(self.session, node) GroupData._from_graphql(self.session, node)
for node in j["viewer"]["groups"]["nodes"] for node in j["viewer"]["groups"]["nodes"]
] )
def search_for_threads(self, name, limit=10): def search_for_threads(self, name: str, limit: int) -> Iterable[_thread.ThreadABC]:
"""Find and get threads by their name. """Find and get threads by their name.
Args: Args:
name: Name of the thread name: Name of the thread
limit: The max. amount of groups to fetch limit: The max. amount of threads to fetch
Returns:
list: `User`, `Group` and `Page` objects, ordered by relevance
Raises:
FBchatException: If request failed
""" """
params = {"search": name, "limit": limit} params = {"search": name, "limit": limit}
(j,) = self.session._graphql_requests( (j,) = self.session._graphql_requests(
_graphql.from_query(_graphql.SEARCH_THREAD, params) _graphql.from_query(_graphql.SEARCH_THREAD, params)
) )
rtn = []
for node in j[name]["threads"]["nodes"]: for node in j[name]["threads"]["nodes"]:
if node["__typename"] == "User": if node["__typename"] == "User":
rtn.append(UserData._from_graphql(self.session, node)) yield UserData._from_graphql(self.session, node)
elif node["__typename"] == "MessageThread": elif node["__typename"] == "MessageThread":
# MessageThread => Group thread # MessageThread => Group thread
rtn.append(GroupData._from_graphql(self.session, node)) yield GroupData._from_graphql(self.session, node)
elif node["__typename"] == "Page": elif node["__typename"] == "Page":
rtn.append(PageData._from_graphql(self.session, node)) yield PageData._from_graphql(self.session, node)
elif node["__typename"] == "Group": elif node["__typename"] == "Group":
# We don't handle Facebook "Groups" # We don't handle Facebook "Groups"
pass pass
@@ -181,39 +160,68 @@ class Client:
"Unknown type {} in {}".format(repr(node["__typename"]), node) "Unknown type {} in {}".format(repr(node["__typename"]), node)
) )
def _search_messages(self, query, offset, limit):
data = {"query": query, "offset": offset, "limit": limit}
j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data)
total_snippets = j["search_snippets"][query]
rtn = []
for node in j["graphql_payload"]["message_threads"]:
type_ = node["thread_type"]
if type_ == "GROUP":
thread = Group(
session=self.session, id=node["thread_key"]["thread_fbid"]
)
elif type_ == "ONE_TO_ONE":
thread = _thread.Thread(
session=self.session, id=node["thread_key"]["other_user_id"]
)
# if True: # TODO: This check!
# thread = UserData._from_graphql(self.session, node)
# else:
# thread = PageData._from_graphql(self.session, node)
else:
thread = None
log.warning("Unknown thread type %s, data: %s", type_, node)
if thread:
rtn.append((thread, total_snippets[thread.id]["num_total_snippets"]))
else:
rtn.append((None, 0))
return rtn return rtn
def search(self, query, fetch_messages=False, thread_limit=5, message_limit=5): def search_messages(
self, query: str, limit: Optional[int]
) -> Iterable[Tuple[_thread.ThreadABC, int]]:
"""Search for messages in all threads. """Search for messages in all threads.
Intended to be used alongside `ThreadABC.search_messages`
Warning! If someone send a message to a thread that matches the query, while
we're searching, some snippets will get returned twice.
Not sure if we should handle it, Facebook's implementation doesn't...
Args: Args:
query: Text to search for query: Text to search for
fetch_messages: Whether to fetch `Message` objects or IDs only limit: Max. number of threads to retrieve. If ``None``, all threads will be
thread_limit (int): Max. number of threads to retrieve retrieved.
message_limit (int): Max. number of messages to retrieve
Returns: Returns:
typing.Dict[str, typing.Iterable]: Dictionary with thread IDs as keys and iterables to get messages as values Iterable with tuples of threads, and the total amount of matches.
Raises:
FBchatException: If request failed
""" """
data = {"query": query, "snippetLimit": thread_limit} offset = 0
j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data) # The max limit is measured empirically to ~500, safe default chosen below
result = j["search_snippets"][query] for limit in _util.get_limits(limit, max_limit=100):
data = self._search_messages(query, offset, limit)
if not result: for thread, total_snippets in data:
return {} if thread:
yield (thread, total_snippets)
if fetch_messages: if len(data) < limit:
search_method = self.search_for_messages return # No more data to fetch
else: offset += limit
search_method = self.search_for_message_ids
return {
thread_id: search_method(query, limit=message_limit, thread_id=thread_id)
for thread_id in result
}
def _fetch_info(self, *ids): def _fetch_info(self, *ids):
data = {"ids[{}]".format(i): _id for i, _id in enumerate(ids)} data = {"ids[{}]".format(i): _id for i, _id in enumerate(ids)}
@@ -317,33 +325,10 @@ class Client:
return rtn return rtn
def fetch_thread_list( def _fetch_threads(self, limit, before, folders):
self, limit=20, thread_location=ThreadLocation.INBOX, before=None
):
"""Fetch the client's thread list.
Args:
limit (int): Max. number of threads to retrieve. Capped at 20
thread_location (ThreadLocation): INBOX, PENDING, ARCHIVED or OTHER
before (datetime.datetime): The point from which to retrieve threads
Returns:
list: `Thread` objects
Raises:
FBchatException: If request failed
"""
if limit > 20 or limit < 1:
raise ValueError("`limit` should be between 1 and 20")
if thread_location in ThreadLocation:
loc_str = thread_location.value
else:
raise TypeError('"thread_location" must be a value of ThreadLocation')
params = { params = {
"limit": limit, "limit": limit,
"tags": [loc_str], "tags": folders,
"before": _util.datetime_to_millis(before) if before else None, "before": _util.datetime_to_millis(before) if before else None,
"includeDeliveryReceipts": True, "includeDeliveryReceipts": True,
"includeSeqID": False, "includeSeqID": False,
@@ -358,15 +343,47 @@ class Client:
if _type == "GROUP": if _type == "GROUP":
rtn.append(GroupData._from_graphql(self.session, node)) rtn.append(GroupData._from_graphql(self.session, node))
elif _type == "ONE_TO_ONE": elif _type == "ONE_TO_ONE":
user = UserData._from_thread_fetch(self.session, node) rtn.append(UserData._from_thread_fetch(self.session, node))
if user:
rtn.append(user)
else: else:
raise FBchatException( rtn.append(None)
"Unknown thread type: {}, with data: {}".format(_type, node) log.warning("Unknown thread type: %s, data: %s", _type, node)
)
return rtn return rtn
def fetch_threads(
self, limit: Optional[int], location: ThreadLocation = ThreadLocation.INBOX,
) -> Iterable[_thread.ThreadABC]:
"""Fetch the client's thread list.
Args:
limit: Max. number of threads to retrieve. If ``None``, all threads will be
retrieved.
location: INBOX, PENDING, ARCHIVED or OTHER
"""
# This is measured empirically as 837, safe default chosen below
MAX_BATCH_LIMIT = 100
# TODO: Clean this up after implementing support for more threads types
seen_ids = set()
before = None
for limit in _util.get_limits(limit, MAX_BATCH_LIMIT):
threads = self._fetch_threads(limit, before, [location.value])
before = None
for thread in threads:
# Don't return seen and unknown threads
if thread and thread.id not in seen_ids:
seen_ids.add(thread.id)
# TODO: Ensure type-wise that .last_active is available
before = thread.last_active
yield thread
if len(threads) < MAX_BATCH_LIMIT:
return # No more data to fetch
# We check this here in case _fetch_threads only returned `None` threads
if not before:
raise ValueError("Too many unknown threads.")
def fetch_unread(self): def fetch_unread(self):
"""Fetch unread threads. """Fetch unread threads.

View File

@@ -91,8 +91,6 @@ class ImageAttachment(Attachment):
@classmethod @classmethod
def _from_list(cls, data): def _from_list(cls, data):
data = data["node"]
previews = { previews = {
Image._from_uri_or_none(data["image"]), Image._from_uri_or_none(data["image"]),
Image._from_uri(data["image1"]), Image._from_uri(data["image1"]),
@@ -156,7 +154,6 @@ class VideoAttachment(Attachment):
@classmethod @classmethod
def _from_list(cls, data): def _from_list(cls, data):
data = data["node"]
previews = { previews = {
Image._from_uri(data["image"]), Image._from_uri(data["image"]),
Image._from_uri(data["image1"]), Image._from_uri(data["image1"]),

View File

@@ -170,6 +170,34 @@ class Message:
return result, mentions return result, mentions
@attrs_default
class MessageSnippet(Message):
"""Represents data in a Facebook message snippet.
Inherits `Message`.
"""
#: ID of the sender
author = attr.ib()
#: Datetime of when the message was sent
created_at = attr.ib()
#: The actual message
text = attr.ib()
#: A dict with offsets, mapped to the matched text
matched_keywords = attr.ib()
@classmethod
def _parse(cls, thread, data):
return cls(
thread=thread,
id=data["message_id"],
author=data["author"].rstrip("fbid:"),
created_at=_util.millis_to_datetime(data["timestamp"]),
text=data["body"],
matched_keywords={int(k): v for k, v in data["matched_keywords"].items()},
)
@attrs_default @attrs_default
class MessageData(Message): class MessageData(Message):
"""Represents data in a Facebook message. """Represents data in a Facebook message.

View File

@@ -250,20 +250,9 @@ class ThreadABC(metaclass=abc.ABCMeta):
# ) # )
# return self.send(Message(text=payload, quick_replies=[new])) # return self.send(Message(text=payload, quick_replies=[new]))
def search_messages( def _search_messages(self, query, offset, limit):
self, query: str, offset: int = 0, limit: int = 5 from . import _message
) -> Iterable[str]:
"""Find and get message IDs by query.
Args:
query: Text to search for
offset (int): Number of messages to skip
limit (int): Max. number of messages to retrieve
Returns:
typing.Iterable: Found Message IDs
"""
# TODO: Return proper searchable iterator
data = { data = {
"query": query, "query": query,
"snippetOffset": offset, "snippetOffset": offset,
@@ -273,33 +262,54 @@ class ThreadABC(metaclass=abc.ABCMeta):
} }
j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data) j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data)
result = j["search_snippets"][query] result = j["search_snippets"][query].get(self.id)
snippets = result[self.id]["snippets"] if result.get(self.id) else [] if not result:
for snippet in snippets: return (0, [])
yield snippet["message_id"]
def fetch_messages(self, limit: int = 20, before: datetime.datetime = None): # TODO: May or may not be a good idea to attach the current thread?
"""Fetch messages in a thread, ordered by most recent. # For now, we just create a new thread:
thread = self.__class__(session=self.session, id=self.id)
snippets = [
_message.MessageSnippet._parse(thread, snippet)
for snippet in result["snippets"]
]
return (result["num_total_snippets"], snippets)
def search_messages(self, query: str, limit: int) -> Iterable["MessageSnippet"]:
"""Find and get message IDs by query.
Warning! If someone send a message to the thread that matches the query, while
we're searching, some snippets will get returned twice.
Not sure if we should handle it, Facebook's implementation doesn't...
Args: Args:
limit: Max. number of messages to retrieve query: Text to search for
before: The point from which to retrieve messages limit: Max. number of message snippets to retrieve
Returns:
list: `Message` objects
""" """
offset = 0
# The max limit is measured empirically to 420, safe default chosen below
for limit in _util.get_limits(limit, max_limit=50):
_, snippets = self._search_messages(query, offset, limit)
yield from snippets
if len(snippets) < limit:
return # No more data to fetch
offset += limit
def _fetch_messages(self, limit, before):
from . import _message from . import _message
# TODO: Return proper searchable iterator
params = { params = {
"id": self.id, "id": self.id,
"message_limit": limit, "message_limit": limit,
"load_messages": True, "load_messages": True,
"load_read_receipts": True, "load_read_receipts": True,
# "load_delivery_receipts": False,
# "is_work_teamwork_not_putting_muted_in_unreads": False,
"before": _util.datetime_to_millis(before) if before else None, "before": _util.datetime_to_millis(before) if before else None,
} }
(j,) = self.session._graphql_requests( (j,) = self.session._graphql_requests(
_graphql.from_doc_id("1860982147341344", params) _graphql.from_doc_id("1860982147341344", params) # 2696825200377124
) )
if j.get("message_thread") is None: if j.get("message_thread") is None:
@@ -307,48 +317,86 @@ class ThreadABC(metaclass=abc.ABCMeta):
"Could not fetch thread {}: {}".format(self.id, j) "Could not fetch thread {}: {}".format(self.id, j)
) )
# TODO: Should we parse the returned thread data, too?
read_receipts = j["message_thread"]["read_receipts"]["nodes"] read_receipts = j["message_thread"]["read_receipts"]["nodes"]
# TODO: May or may not be a good idea to attach the current thread? # TODO: May or may not be a good idea to attach the current thread?
# For now, we just create a new thread: # For now, we just create a new thread:
thread = self.__class__(session=self.session, id=self.id) thread = self.__class__(session=self.session, id=self.id)
messages = [ return [
_message.MessageData._from_graphql(thread, message, read_receipts) _message.MessageData._from_graphql(thread, message, read_receipts)
for message in j["message_thread"]["messages"]["nodes"] for message in j["message_thread"]["messages"]["nodes"]
] ]
messages.reverse()
return messages def fetch_messages(self, limit: Optional[int]) -> Iterable["_message.Message"]:
"""Fetch messages in a thread, with most recent messages first.
def fetch_images(self): Args:
"""Fetch images/videos posted in the thread.""" limit: Max. number of threads to retrieve. If ``None``, all threads will be
# TODO: Return proper searchable iterator retrieved.
data = {"id": self.id, "first": 48} """
# This is measured empirically as 210 in extreme cases, fairly safe default
# chosen below
MAX_BATCH_LIMIT = 100
before = None
for limit in _util.get_limits(limit, MAX_BATCH_LIMIT):
messages = self._fetch_messages(limit, before)
if before:
# Strip the first thread
yield from messages[1:]
else:
yield from messages
if len(messages) < MAX_BATCH_LIMIT:
return # No more data to fetch
before = messages[-1].created_at
def _fetch_images(self, limit, after):
data = {"id": self.id, "first": limit, "after": after}
(j,) = self.session._graphql_requests( (j,) = self.session._graphql_requests(
_graphql.from_query_id("515216185516880", data) _graphql.from_query_id("515216185516880", data)
) )
while True:
try:
i = j[self.id]["message_shared_media"]["edges"][0]
except IndexError:
if j[self.id]["message_shared_media"]["page_info"].get("has_next_page"):
data["after"] = j[self.id]["message_shared_media"]["page_info"].get(
"end_cursor"
)
(j,) = self.session._graphql_requests(
_graphql.from_query_id("515216185516880", data)
)
continue
else:
break
if i["node"].get("__typename") == "MessageImage": result = j[self.id]["message_shared_media"]
yield _file.ImageAttachment._from_list(i)
elif i["node"].get("__typename") == "MessageVideo": print(len(result["edges"]))
yield _file.VideoAttachment._from_list(i)
rtn = []
for edge in result["edges"]:
node = edge["node"]
type_ = node["__typename"]
if type_ == "MessageImage":
rtn.append(_file.ImageAttachment._from_list(node))
elif type_ == "MessageVideo":
rtn.append(_file.VideoAttachment._from_list(node))
else: else:
yield _attachment.Attachment(id=i["node"].get("legacy_attachment_id")) log.warning("Unknown image type %s, data: %s", type_, edge)
del j[self.id]["message_shared_media"]["edges"][0] rtn.append(None)
# result["page_info"]["has_next_page"] is not correct when limit > 12
return (result["page_info"]["end_cursor"], rtn)
def fetch_images(self, limit: int) -> Iterable[_attachment.Attachment]:
"""Fetch images/videos posted in the thread.
Args:
limit: Max. number of images to retrieve. If ``None``, all images will be
retrieved.
"""
cursor = None
# The max limit on this request is unknown, so we set it reasonably high
# This way `limit=None` also still works
for limit in _util.get_limits(limit, max_limit=1000):
cursor, images = self._fetch_images(limit, cursor)
if not images:
return # No more data to fetch
for image in images:
if image:
yield image
def set_nickname(self, user_id: str, nickname: str): def set_nickname(self, user_id: str, nickname: str):
"""Change the nickname of a user in the thread. """Change the nickname of a user in the thread.

View File

@@ -13,6 +13,8 @@ from ._exception import (
FBchatPleaseRefresh, FBchatPleaseRefresh,
) )
from typing import Iterable, Optional
#: Default list of user agents #: Default list of user agents
USER_AGENTS = [ USER_AGENTS = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36",
@@ -24,6 +26,24 @@ USER_AGENTS = [
] ]
def get_limits(limit: Optional[int], max_limit: int) -> Iterable[int]:
"""Helper that generates limits based on a max limit."""
if limit is None:
# Generate infinite items
while True:
yield max_limit
if limit < 0:
raise ValueError("Limit cannot be negative")
# Generate n items
yield from [max_limit] * (limit // max_limit)
remainder = limit % max_limit
if remainder:
yield remainder
def now(): def now():
return int(time.time() * 1000) return int(time.time() * 1000)

View File

@@ -46,7 +46,7 @@ def test_imageattachment_from_list():
height=988, height=988,
), ),
}, },
) == ImageAttachment._from_list({"node": data}) ) == ImageAttachment._from_list(data)
def test_videoattachment_from_list(): def test_videoattachment_from_list():
@@ -88,7 +88,7 @@ def test_videoattachment_from_list():
height=368, height=368,
), ),
}, },
) == VideoAttachment._from_list({"node": data}) ) == VideoAttachment._from_list(data)
def test_graphql_to_attachment_empty(): def test_graphql_to_attachment_empty():