Source code for GeoBases.GeoGridModule

#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
This module is grid implementation, in order
to provide geographical indexation features.

    >>> a = GeoGrid(radius=20)
    Setting grid precision to 4, avg radius to 20km
    >>> a.add('ORY', (48.72, 2.359))
    >>> a.add('CDG', (48.75, 2.361))
    >>> list(a._findInAdjacentCases(encode(48.72, 2.359, a._precision), N=2))
    ['ORY', 'CDG']
    >>> a._keys['ORY']
    {'case': 'u09t', 'lat_lng': (48.7..., 2.359)}
    >>> neighbors('t0db')
    ['t0d8', 't0e0', 't06z', 't06x', 't07p', 't0dc', 't0d9', 't0e1']
    >>> list(a._recursiveFrontier('t0dbr', N=2))
    [set(['t0dbr']), set(['t0e08', 't0e00', 't0dbn', 't0e02', 't0dbq', 't0dbp', 't0dbw', 't0dbx'])]
    >>> list(a._recursiveFrontier('t0dbr', N=1))
    [set(['t0dbr'])]
    >>> sum(len(f) for f in a._recursiveFrontier('t0dbr', N=2))
    9
    >>> sum(len(f) for f in a._recursiveFrontier('t0dbr', N=3))
    25
    >>> sum(len(f) for f in a._recursiveFrontier('t0dbr', N=4))
    49
    >>> sum(len(f) for f in a._recursiveFrontier('t0dbr', N=5))
    81
    >>> list(a.findNearKey('ORY', 20))
    [(0, 'ORY'), (0, 'CDG')]
    >>> list(a.findNearKey('ORY', 20, double_check=True))
    [(0.0, 'ORY'), (3.33..., 'CDG')]
    >>> list(a.findClosestFromPoint((48.75, 2.361), N=2, double_check=True))
    [(0.0, 'CDG'), (3.33..., 'ORY')]
"""


from __future__ import with_statement

import itertools
from geohash import encode, neighbors

from .GeoUtils import haversine


# Max recursion when iterating on frontiers
MAX_RECURSIVE_FRONTIER = 5000

# Thanks wikipedia
# hash length | lat bits | lng bits | lat error | lng error | km error
HASH_TO_ERROR = {
    1 : (2,  3,  23,       23,      2500),
    2 : (5,  5,  2.8,      5.6,     630),
    3 : (7,  8,  0.70,     0.7,     78),
    4 : (10, 10, 0.087,    0.18,    20),
    5 : (12, 13, 0.022,    0.022,   2.4),
    6 : (15, 15, 0.0027,   0.0055,  0.61),
    7 : (17, 18, 0.00068,  0.00068, 0.076),
    8 : (20, 20, 0.000085, 0.00017, 0.019)
}


[docs]class GeoGrid(object): """ This is the main and only class. """
[docs] def __init__(self, precision=5, radius=None, verbose=True): """Creates grid. :param radius: the grid accuracy, in kilometers :param precision: the hash length, if radius is given, this length is \ computed from the radius :param verbose: toggle verbosity :returns: None """ if radius is not None: get_error = lambda x: (x[1][4] < radius, abs(radius - x[1][4])) # Tricky, min of values only positive here precision = min(HASH_TO_ERROR.iteritems(), key=get_error)[0] self._precision = precision self._avg_radius = HASH_TO_ERROR[precision][4] # Double mapping self._keys = {} self._grid = {} if verbose: print 'Setting grid precision to %s, avg radius to %skm' % (precision, self._avg_radius)
def _computeCaseId(self, lat_lng): """ Computing the id the case for a (lat, lng). :param lat_lng: the lat_lng of the point (a tuple of (lat, lng)) :returns: the case_id """ return encode(*lat_lng, precision=self._precision)
[docs] def add(self, key, lat_lng, verbose=True): """ Add a point to the grid. :param key: the key to be added :param lat_lng: the lat_lng of the point (a tuple of (lat, lng)) :param verbose: toggle verbosity :returns: None """ try: case_id = self._computeCaseId(lat_lng) except (TypeError, Exception): # TypeError for wrong type (NoneType, str) # Exception for invalid coordinates if verbose: print 'Wrong coordinates %s for key %s, skipping point.' % (str(lat_lng), key) return self._keys[key] = { 'case' : case_id, 'lat_lng' : lat_lng } if case_id not in self._grid: self._grid[case_id] = [] self._grid[case_id].append(key)
def _recursiveFrontier(self, case_id, N=1, stop=True): """ Yield the successive frontiers from a case. A frontier is a set of case ids. """ if stop is True: gen = xrange(N) else: gen = itertools.count() frontier = set([case_id]) interior = frontier for i in gen: if i > MAX_RECURSIVE_FRONTIER: print '/!\ Recursion exceeded in recursiveFrontier' raise StopIteration yield frontier frontier = self._nextFrontier(frontier, interior) interior = interior | frontier @staticmethod def _nextFrontier(frontier, interior): """ Compute next frontier from a frontier and a matching interior. Interior is the set of case ids in the frontier. """ return set([k for cid in frontier for k in neighbors(cid) if k not in interior]) def _check_distance(self, candidate, ref_lat_lng, radius): """ Filter from a iterator of candidates, the ones who are within a radius if a ref_lat_lng. Yields the good ones. """ for can in candidate: dist = haversine(ref_lat_lng, self._keys[can]['lat_lng']) if dist <= radius: yield (dist, can) def _allKeysInCases(self, cases): """ Yields all keys in a iterable of case ids. """ for case_id in cases: if case_id in self._grid: for key in self._grid[case_id]: yield key def _findInAdjacentCases(self, case_id, N=1): """ Find keys in adjacent cases from a case_id. Yields found keys. """ for frontier in self._recursiveFrontier(case_id, N): for key in self._allKeysInCases(frontier): yield key def _findNearCase(self, case_id, radius=20): """ Same as _findInAdjacentCases, but the limitation is given with a radius and not with a recursive limit in adjacency computation. """ # Do your homework :D # A more accurate formula would be with # self._avg_radius = min(r1, r2) where r1 are r2 are # the size of one case if float(radius) == self._avg_radius: N = 2 else: N = int(float(radius) / self._avg_radius) + 2 return self._findInAdjacentCases(case_id, N)
[docs] def findNearPoint(self, lat_lng, radius=20, double_check=False): """ Returns a list of nearby things from a point (given latidude and longitude), and a radius for the search. Note that the haversine function, which compute distance at the surface of a sphere, here returns kilometers, so the radius should be in kms. :param lat_lng: the lat_lng of the point (a tuple of (lat, lng)) :param radius: the radius of the search (kilometers) :param double_check: when using grid, perform an additional check on results distance, \ this is useful because the grid is approximate, so the results are only as accurate \ as the grid size :returns: an iterable of (distance, key) like [(3.2, 'SFO'), (4.5, 'LAX')] """ if lat_lng is None: # Case where the lat_lng was missing from base return iter([]) candidate = self._findNearCase(self._computeCaseId(lat_lng), radius) if double_check: return self._check_distance(candidate, lat_lng, radius) else: return ((0, can) for can in candidate)
[docs] def findNearKey(self, key, radius=20, double_check=False): """ Same as findNearPoint, except the point is given not by a lat/lng, but with its key, like ORY or SFO. We just look up in the base to retrieve lat/lng, and call findNearPoint. :param key: the key :param radius: the radius of the search (kilometers) :param double_check: when using grid, perform an additional check on results distance, \ this is useful because the grid is approximate, so the results are only as accurate \ as the grid size :returns: an iterable of (distance, key) like [(3.2, 'SFO'), (4.5, 'LAX')] """ if key not in self._keys: # Case where the key probably did not have a proper geocode # and as such was never indexed return iter([]) candidate = self._findNearCase(self._keys[key]['case'], radius) if double_check: return self._check_distance(candidate, self._keys[key]['lat_lng'], radius) else: return ((0, can) for can in candidate)
def _findClosestFromCase(self, case_id, N=1, from_keys=None): """ Find closest keys from a case. """ found = set() for frontier in self._recursiveFrontier(case_id, stop=False): found = found | set(self._allKeysInCases(frontier)) if from_keys is not None: # If from_keys is empty this will turn # into an infinite loop # stopped by MAX_RECURSIVE_FRONTIER # This should not happen since we treated that case # at the beginning found = found & from_keys # Heuristic # We have to compare the distance of the farthest found # against the distance really covered by the search #print frontier if len(found) >= N and len(frontier) > 1: break return found
[docs] def findClosestFromPoint(self, lat_lng, N=1, double_check=False, from_keys=None): """ Concept close to findNearPoint, but here we do not look for the things radius-close to a point, we look for the closest thing from this point, given by latitude/longitude. Note that a similar implementation is done in the LocalHelper, to find efficiently N closest point in a graph, from a point (using heaps). :param lat_lng: the lat_lng of the point (a tuple of (lat, lng)) :param N: the N closest results wanted :param from_keys: if None, it takes all keys in consideration, else takes from_keys \ iterable of keys to perform findClosestFromPoint. This is useful when we have names \ and have to perform a matching based on name and location (see fuzzyGetAroundLatLng). :param double_check: when using grid, perform an additional check on results distance, \ this is useful because the grid is approximate, so the results are only as accurate \ as the grid size :returns: an iterable of (distance, key) like [(3.2, 'SFO'), (4.5, 'LAX')] """ if lat_lng is None: # Case where the lat_lng was missing from base return iter([]) if from_keys is not None: # We convert to set before testing to nullity # because of empty iterators from_keys = set(from_keys) # If from_keys is empty, the result is obvious if not from_keys: return [] # We cannot give what we do not have N = min(N, len(from_keys)) # Some precaution for the number of wanted keys N = min(N, len(self._keys)) # The case of the point is computed by _computeCaseId candidate = self._findClosestFromCase(self._computeCaseId(lat_lng), N, from_keys) if double_check: return sorted(self._check_distance(candidate, lat_lng, radius=float('inf')))[:N] else: return ((0, f) for f in candidate)
[docs] def findClosestFromKey(self, key, N=1, double_check=False, from_keys=None): """ Concept close to findNearPoint, but here we do not look for the things radius-close to a point, we look for the closest thing from this point, given by latitude/longitude. Note that a similar implementation is done in the LocalHelper, to find efficiently N closest point in a graph, from a point (using heaps). :param key: the key :param N: the N closest results wanted :param from_keys: if None, it takes all keys in consideration, else takes from_keys \ iterable of keys to perform findClosestFromPoint. This is useful when we have names \ and have to perform a matching based on name and location (see fuzzyGetAroundLatLng). :param double_check: when using grid, perform an additional check on results distance, \ this is useful because the grid is approximate, so the results are only as accurate \ as the grid size :returns: an iterable of (distance, key) like [(3.2, 'SFO'), (4.5, 'LAX')] """ if key not in self._keys: # Case where the key probably did not have a proper geocode # and as such was never indexed return iter([]) if from_keys is not None: # We convert to set before testing to nullity # because of empty iterators from_keys = set(from_keys) # If from_keys is empty, the result is obvious if not from_keys: return [] # We cannot give what we do not have N = min(N, len(from_keys)) # Some precaution for the number of wanted keys N = min(N, len(self._keys)) # The case of the point is just retrieved candidate = self._findClosestFromCase(self._keys[key]['case'], N, from_keys) if double_check: return sorted(self._check_distance(candidate, self._keys[key]['lat_lng'], radius=float('inf')))[:N] else: return ((0, f) for f in candidate)
def _test(): """ When called directly, launching doctests. """ import doctest extraglobs = {} opt = (doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_ONLY_FIRST_FAILURE | doctest.IGNORE_EXCEPTION_DETAIL) doctest.testmod(extraglobs=extraglobs, optionflags=opt) if __name__ == '__main__': _test()

Project Versions

This Page