Improve message searching in ThreadABC

This commit is contained in:
Mads Marquart
2020-01-13 15:54:09 +01:00
parent e4f2c6c403
commit e76c6179fb
2 changed files with 63 additions and 17 deletions

View File

@@ -170,6 +170,34 @@ class Message:
return result, mentions return result, mentions
@attrs_default
class MessageSnippet(Message):
"""Represents data in a Facebook message snippet.
Inherits `Message`.
"""
#: ID of the sender
author = attr.ib()
#: Datetime of when the message was sent
created_at = attr.ib()
#: The actual message
text = attr.ib()
#: A dict with offsets, mapped to the matched text
matched_keywords = attr.ib()
@classmethod
def _parse(cls, thread, data):
return cls(
thread=thread,
id=data["message_id"],
author=data["author"].rstrip("fbid:"),
created_at=_util.millis_to_datetime(data["timestamp"]),
text=data["body"],
matched_keywords={int(k): v for k, v in data["matched_keywords"].items()},
)
@attrs_default @attrs_default
class MessageData(Message): class MessageData(Message):
"""Represents data in a Facebook message. """Represents data in a Facebook message.

View File

@@ -250,20 +250,9 @@ class ThreadABC(metaclass=abc.ABCMeta):
# ) # )
# return self.send(Message(text=payload, quick_replies=[new])) # return self.send(Message(text=payload, quick_replies=[new]))
def search_messages( def _search_messages(self, query, offset, limit):
self, query: str, offset: int = 0, limit: int = 5 from . import _message
) -> Iterable[str]:
"""Find and get message IDs by query.
Args:
query: Text to search for
offset (int): Number of messages to skip
limit (int): Max. number of messages to retrieve
Returns:
typing.Iterable: Found Message IDs
"""
# TODO: Return proper searchable iterator
data = { data = {
"query": query, "query": query,
"snippetOffset": offset, "snippetOffset": offset,
@@ -273,10 +262,39 @@ class ThreadABC(metaclass=abc.ABCMeta):
} }
j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data) j = self.session._payload_post("/ajax/mercury/search_snippets.php?dpr=1", data)
result = j["search_snippets"][query] result = j["search_snippets"][query].get(self.id)
snippets = result[self.id]["snippets"] if result.get(self.id) else [] if not result:
for snippet in snippets: return (0, [])
yield snippet["message_id"]
# TODO: May or may not be a good idea to attach the current thread?
# For now, we just create a new thread:
thread = self.__class__(session=self.session, id=self.id)
snippets = [
_message.MessageSnippet._parse(thread, snippet)
for snippet in result["snippets"]
]
return (result["num_total_snippets"], snippets)
def search_messages(self, query: str, limit: int) -> Iterable["MessageSnippet"]:
"""Find and get message IDs by query.
Warning! If someone send a message to the thread that matches the query, while
we're searching, some snippets will get returned twice.
Not sure if we should handle it, Facebook's implementation doesn't...
Args:
query: Text to search for
limit: Max. number of message snippets to retrieve
"""
offset = 0
# The max limit is measured empirically to 420, safe default chosen below
for limit in _util.get_limits(limit, max_limit=50):
_, snippets = self._search_messages(query, offset, limit)
yield from snippets
if len(snippets) < limit:
return # No more data to fetch
offset += limit
def fetch_messages(self, limit: int = 20, before: datetime.datetime = None): def fetch_messages(self, limit: int = 20, before: datetime.datetime = None):
"""Fetch messages in a thread, ordered by most recent. """Fetch messages in a thread, ordered by most recent.