Source code for searchdocs

#!/usr/bin/env python3
#
#  __init__.py
"""
Search the Python documentation from your terminal.
"""
#
#  Copyright © 2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
import functools
import re
import shutil
import warnings
from base64 import urlsafe_b64encode
from typing import List, Tuple, Union, overload

# 3rd party
import appdirs
import diskcache  # type: ignore[import]
import sphobjinv  # type: ignore[import]
from apeye.requests_url import RequestsURL
from apeye.url import URL
from domdf_python_tools.paths import PathPlus
from fuzzywuzzy.fuzz import ratio  # type: ignore[import]
from typing_extensions import Literal

__all__ = [
		"cache_dir",
		"resolve_url",
		"cache_dir_for_url",
		"download_objects_inv",
		"find_url",
		]

__author__: str = "Dominic Davis-Foster"
__copyright__: str = "2021 Dominic Davis-Foster"
__license__: str = "MIT License"
__version__: str = "0.2.2"
__email__: str = "dominic@davis-foster.co.uk"

#: Directory in which cached files are stored.
cache_dir = PathPlus(appdirs.user_cache_dir("searchdocs"))
cache_dir.maybe_make(parents=True)


[docs]def resolve_url(url: Union[str, RequestsURL]) -> RequestsURL:
	"""
	Resolve any redirects in the given URL.

	:param url:
	"""

	return RequestsURL(RequestsURL(url).head(allow_redirects=True).url)


[docs]@functools.lru_cache()
def cache_dir_for_url(url: Union[str, URL]) -> PathPlus:
	"""
	Returns the path to the cache subdirectory for the given URL.

	:param url:
	"""

	return cache_dir / urlsafe_b64encode(str(url).encode("UTF-8")).decode("UTF-8")


[docs]def download_objects_inv(docs_url: Union[str, RequestsURL]) -> PathPlus:
	"""
	Download the Sphinx ``objects.inv`` file for the documentation available at the given URL.

	:param docs_url: The base URL for the documentation, e.g. ``"https://docs.python.org/3/"``.

	:returns: The filename of the cached file.

	.. latex:clearpage::
	"""

	docs_url = resolve_url(docs_url)
	objects_inv_url = docs_url / "objects.inv"

	docs_cache_dir = cache_dir_for_url(docs_url)

	if docs_cache_dir.exists():
		current_etag = objects_inv_url.head(allow_redirects=True).headers["etag"].strip('"')

		if (docs_cache_dir / current_etag).is_file():
			return docs_cache_dir / current_etag
		else:
			shutil.rmtree(docs_cache_dir)

	response = objects_inv_url.get()
	objects_inv_file = docs_cache_dir / response.headers["etag"].strip('"')
	objects_inv_file.parent.maybe_make(parents=True)
	objects_inv_file.write_bytes(response.content)

	return objects_inv_file


[docs]def find_url(docs_url: Union[str, RequestsURL], search_term: str) -> URL:
	"""
	Find the complete documentation URL for the given function, class, method etc.

	:param docs_url: The base URL for the documentation, e.g. ``"https://docs.python.org/3/"``.
	:param search_term: The object to search for, e.g. ``'TemporaryDirectory'``.

	:return: The url of the object in the documentation, e.g.
		``URL('https://docs.python.org/3/'library/tempfile.html#tempfile.TemporaryDirectory')``.
	"""

	docs_url = resolve_url(docs_url)
	docs_cache_dir = cache_dir_for_url(docs_url)

	objects_inv = download_objects_inv(docs_url)

	with diskcache.Cache(directory=str(docs_cache_dir)) as search_result_cache:
		if search_term in search_result_cache:
			return URL(search_result_cache[search_term])

		else:
			inventory = Inventory(objects_inv)

			# TODO: expose with_score as an option?
			suggestions: List[Tuple[str, int, int]] = inventory.suggest_from_name(
					search_term,
					with_index=True,
					with_score=True,
					)

			if not suggestions:
				raise ValueError(f"Object {search_term} not found.")

			desired_object = inventory.objects[suggestions[0][2]]
			url = docs_url / desired_object.uri_expanded

			search_result_cache.set(search_term, str(url))

			return url


class Inventory(sphobjinv.inventory.Inventory):

	# Based on https://github.com/bskinn/sphobjinv
	# Copyright (c) 2016-2021 Brian Skinn
	# MIT Licensed

	@overload
	def suggest_from_name(
			self,
			name: str,
			*,
			with_index: Literal[True],
			thresh: int = ...,
			with_score: Literal[False] = ...
			) -> List[Tuple[str, int]]: ...

	@overload
	def suggest_from_name(
			self,
			name: str,
			*,
			with_score: Literal[True],
			thresh: int = ...,
			with_index: Literal[False] = ...
			) -> List[Tuple[str, int]]: ...

	@overload
	def suggest_from_name(
			self,
			name: str,
			*,
			with_index: Literal[True],
			with_score: Literal[True],
			thresh: int = ...
			) -> List[Tuple[str, int, int]]: ...

	@overload
	def suggest_from_name(
			self,
			name: str,
			*,
			thresh: int = ...,
			with_index: Literal[False] = ...,
			with_score: Literal[False] = ...
			) -> List[str]: ...

	def suggest_from_name(
			self,
			name: str,
			*,
			thresh: int = 50,
			with_index: bool = False,
			with_score: bool = False
			) -> Union[List[str], List[Tuple[str, int]], List[Tuple[str, int, int]]]:
		"""
		Similar to :meth:`sphobjinv.inventory.Inventory.suggest`, but only searches the names of objects and not their types.

		:param name: Object name to search for.
		:param thresh: Match quality threshold
		:param with_index: Whether to include the index in the inventory of each match.
		:param with_score: Whether to include the match quality score for each matched name.

		| If both ``with_index`` and ``with_score`` are :py:obj:`True`, returns a list of 3-element tuples of ``(name, score, index)``.
		| If ``with_index`` is :py:obj:`True`, returns a list of 2-element tuples of ``(name, index)``.
		| If ``with_score`` is :py:obj:`True`, returns a list of 2-element tuples of ``(name, score)``.
		| If neither are :py:obj:`True`, returns a list of strings containing just the names.
		"""

		# Suppress any UserWarning about the speed issue
		with warnings.catch_warnings():
			warnings.simplefilter("ignore")
			# 3rd party
			from fuzzywuzzy import process as fwp  # type: ignore[import]

		# Must propagate list index to include in output
		# Search vals are rst prepended with list index
		srch_list = [f"{i} {o}" for i, o in enumerate([_.name for _ in self.objects])]

		#
		# if name in srch_list:
		# 	if with_index and with_score:
		# 		return [(name, 100, srch_list.index(name))]
		# 	elif with_index:
		# 		return (name, srch_list.index(name))
		# 	elif with_score:
		# 		return (name, 100)
		# 	else:
		# 		return name

		# Composite each string result extracted by fuzzywuzzy
		# and its match score into a single string. The match
		# and score are returned together in a tuple.
		initial_results = [
				"{} {}".format(*_)
				for _ in fwp.extract(name, srch_list, limit=None, scorer=ratio)
				if _[1] >= thresh
				]

		# Define regex for splitting the three components, and
		# use it to convert composite result string to tuple:
		# result --> (rst, score, index)
		p_idx = re.compile("^(\\d+)\\s+(.+?)\\s+(\\d+)$")
		results = []

		for m in map(p_idx.match, initial_results):
			assert m is not None
			results.append((m.group(2), int(m.group(3)), int(m.group(1))))

		# Return based on flags
		if with_score:
			if with_index:
				return results
			else:
				return [tup[:2] for tup in results]
		else:
			if with_index:
				return [tup[::2] for tup in results]
			else:
				return [tup[0] for tup in results]