Improve message searching in Client

This commit is contained in:
Mads Marquart
2020-01-13 16:54:34 +01:00
parent e76c6179fb
commit 55182e21b6

View File

@@ -3,7 +3,7 @@ import time
import requests import requests
from ._core import log from ._core import log
from . import _util, _graphql, _session, _poll, _user from . import _util, _graphql, _session, _poll, _user, _thread, _message
from ._exception import FBchatException, FBchatFacebookError from ._exception import FBchatException, FBchatFacebookError
from ._thread import ThreadLocation from ._thread import ThreadLocation
@@ -24,7 +24,7 @@ from ._quick_reply import (
) )
from ._plan import PlanData from ._plan import PlanData
from typing import Sequence from typing import Sequence, Iterable, Tuple, Optional
class Client: class Client:
@@ -183,37 +183,68 @@ class Client:
return rtn return rtn
def search(self, query, fetch_messages=False, thread_limit=5, message_limit=5): def _search_messages(self, query, offset, limit):
data = {"query": query, "offset": offset, "limit": limit}
j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data)
total_snippets = j["search_snippets"][query]
rtn = []
for node in j["graphql_payload"]["message_threads"]:
type_ = node["thread_type"]
if type_ == "GROUP":
thread = Group(
session=self.session, id=node["thread_key"]["thread_fbid"]
)
elif type_ == "ONE_TO_ONE":
thread = _thread.Thread(
session=self.session, id=node["thread_key"]["other_user_id"]
)
# if True: # TODO: This check!
# thread = UserData._from_graphql(self.session, node)
# else:
# thread = PageData._from_graphql(self.session, node)
else:
thread = None
log.warning("Unknown thread type %s, data: %s", type_, node)
if thread:
rtn.append((thread, total_snippets[thread.id]["num_total_snippets"]))
else:
rtn.append((None, 0))
return rtn
def search_messages(
self, query: str, limit: Optional[int]
) -> Iterable[Tuple[_thread.ThreadABC, int]]:
"""Search for messages in all threads. """Search for messages in all threads.
Intended to be used alongside `ThreadABC.search_messages`
Warning! If someone send a message to a thread that matches the query, while
we're searching, some snippets will get returned twice.
Not sure if we should handle it, Facebook's implementation doesn't...
Args: Args:
query: Text to search for query: Text to search for
fetch_messages: Whether to fetch `Message` objects or IDs only limit: Max. number of threads to retrieve. If ``None``, all threads will be
thread_limit (int): Max. number of threads to retrieve retrieved.
message_limit (int): Max. number of messages to retrieve
Returns: Returns:
typing.Dict[str, typing.Iterable]: Dictionary with thread IDs as keys and iterables to get messages as values Iterable with tuples of threads, and the total amount of matches.
Raises:
FBchatException: If request failed
""" """
data = {"query": query, "snippetLimit": thread_limit} offset = 0
j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data) # The max limit is measured empirically to ~500, safe default chosen below
result = j["search_snippets"][query] for limit in _util.get_limits(limit, max_limit=100):
data = self._search_messages(query, offset, limit)
if not result: for thread, total_snippets in data:
return {} if thread:
yield (thread, total_snippets)
if fetch_messages: if len(data) < limit:
search_method = self.search_for_messages return # No more data to fetch
else: offset += limit
search_method = self.search_for_message_ids
return {
thread_id: search_method(query, limit=message_limit, thread_id=thread_id)
for thread_id in result
}
def _fetch_info(self, *ids): def _fetch_info(self, *ids):
data = {"ids[{}]".format(i): _id for i, _id in enumerate(ids)} data = {"ids[{}]".format(i): _id for i, _id in enumerate(ids)}