Source code for libpysal.graph.base

import math
from functools import cached_property

import numpy as np
import pandas as pd
from scipy import sparse

from libpysal.weights import W

from ._contiguity import (
    _block_contiguity,
    _fuzzy_contiguity,
    _queen,
    _rook,
    _vertex_set_intersection,
)
from ._kernel import _distance_band, _kernel
from ._matching import _spatial_matching
from ._parquet import _read_parquet, _to_parquet
from ._plotting import _explore_graph, _plot
from ._set_ops import SetOpsMixin
from ._spatial_lag import _lag_spatial
from ._triangulation import _delaunay, _gabriel, _relative_neighborhood, _voronoi
from ._utils import (
    _evaluate_index,
    _neighbor_dict_to_edges,
    _resolve_islands,
    _sparse_to_arrays,
)

ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C")

# listed alphabetically
__author__ = """"
Martin Fleischmann (martin@martinfleischmann.net)
Eli Knaap (ek@knaaptime.com)
Serge Rey (sjsrey@gmail.com)
Levi John Wolf (levi.john.wolf@gmail.com)
"""


[docs] class Graph(SetOpsMixin): """Graph class encoding spatial weights matrices The :class:`Graph` is currently experimental and its API is incomplete and unstable. """
[docs] def __init__(self, adjacency, transformation="O", is_sorted=False): """Weights base class based on adjacency list It is recommenced to use one of the ``from_*`` or ``build_*`` constructors rather than invoking ``__init__`` directly. Each observation needs to be present in the focal, at least as a self-loop with a weight 0. Parameters ---------- adjacency : pandas.Series A MultiIndexed pandas.Series with ``"focal"`` and ``"neigbor"`` levels encoding adjacency, and values encoding weights. By convention, isolates are encoded as self-loops with a weight 0. transformation : str, default "O" weights transformation used to produce the table. - **O** -- Original - **B** -- Binary - **R** -- Row-standardization (global sum :math:`=n`) - **D** -- Double-standardization (global sum :math:`=1`) - **V** -- Variance stabilizing - **C** -- Custom is_sorted : bool, default False ``adjacency`` capturing the graph needs to be canonically sorted to initialize the class. The MultiIndex needs to be ordered i-->j on both focal and neighbor levels according to the order of ids in the original data from which the Graph is created. Sorting is performed by default based on the order of unique values in the focal level. Set ``is_sorted=True`` to skip this step if the adjacency is already canonically sorted. """ if not isinstance(adjacency, pd.Series): raise TypeError( f"The adjacency table needs to be a pandas.Series. {type(adjacency)}" ) if not adjacency.index.names == ["focal", "neighbor"]: raise ValueError( "The index of the adjacency table needs to be a MultiIndex named " "['focal', 'neighbor']." ) if not adjacency.name == "weight": raise ValueError( "The adjacency needs to be named 'weight'. " f"'{adjacency.name}' was given instead." ) if not pd.api.types.is_numeric_dtype(adjacency): raise ValueError( "The 'weight' needs to be of a numeric dtype. " f"'{adjacency.dtype}' dtype was given instead." ) if adjacency.isna().any(): raise ValueError("The adjacency table cannot contain missing values.") if transformation.upper() not in ALLOWED_TRANSFORMATIONS: raise ValueError( f"'transformation' needs to be one of {ALLOWED_TRANSFORMATIONS}. " f"'{transformation}' was given instead." ) if not is_sorted: # adjacency always ordered i-->j on both levels ids = adjacency.index.get_level_values(0).unique().values adjacency = adjacency.reindex(ids, level=0).reindex(ids, level=1) self._adjacency = adjacency self.transformation = transformation
def __getitem__(self, item): """Easy lookup based on focal index Parameters ---------- item : hashable hashable represting an index value Returns ------- pandas.Series subset of the adjacency table for `item` """ if item in self.isolates: return pd.Series( [], index=pd.Index([], name="neighbor"), name="weight", ) return self._adjacency.loc[item]
[docs] def copy(self, deep=True): """Make a copy of this Graph's adjacency table and transformation Parameters ---------- deep : bool, optional Make a deep copy of the adjacency table, by default True Returns ------- Graph libpysal.graph.Graph as a copy of the original """ return Graph( self._adjacency.copy(deep=deep), transformation=self.transformation, is_sorted=True, )
@cached_property def adjacency(self): """Return a copy of the adjacency list Returns ------- pandas.Series Underlying adjacency list """ return self._adjacency.copy()
[docs] @classmethod def from_W(cls, w): # noqa: N802 """Create an experimental Graph from libpysal.weights.W object Parameters ---------- w : libpysal.weights.W Returns ------- Graph libpysal.graph.Graph from W """ return cls.from_weights_dict(dict(w))
[docs] def to_W(self): # noqa: N802 """Convert Graph to a libpysal.weights.W object Returns ------- libpysal.weights.W representation of graph as a weights.W object """ ids, labels = pd.factorize( self._adjacency.index.get_level_values("focal"), sort=False ) neighbors = ( self._adjacency.reset_index(level=1) .groupby(ids) .apply( lambda group: list( group[ ~((group.index == group.neighbor) & (group.weight == 0)) ].neighbor ) ) ) neighbors.index = labels[neighbors.index] weights = ( self._adjacency.reset_index(level=1) .groupby(ids) .apply( lambda group: list( group[ ~((group.index == group.neighbor) & (group.weight == 0)) ].weight ) ) ) weights.index = labels[weights.index] return W(neighbors.to_dict(), weights.to_dict(), id_order=labels.tolist())
[docs] @classmethod def from_adjacency( cls, adjacency, focal_col="focal", neighbor_col="neighbor", weight_col="weight" ): """Create a Graph from a pandas DataFrame formatted as an adjacency list Parameters ---------- adjacency : pandas.DataFrame a dataframe formatted as an ajacency list. Should have columns "focal", "neighbor", and "weight", or columns that can be mapped to these (e.g. origin, destination, cost) focal : str, optional name of column holding focal/origin index, by default 'focal' neighbor : str, optional name of column holding neighbor/destination index, by default 'neighbor' weight : str, optional name of column holding weight values, by default 'weight' Returns ------- Graph libpysal.graph.Graph """ cols = dict( zip( [focal_col, neighbor_col, weight_col], ["focal_col", "neighbor_col", "weight_col"], strict=True, ) ) for col in cols: assert col in adjacency.columns.tolist(), ( f'"{col}" was given for `{cols[col]}`, but the ' f"columns available in `adjacency` are: {adjacency.columns.tolist()}." ) return cls.from_arrays( adjacency[focal_col].values, adjacency[neighbor_col].values, adjacency[weight_col].values, )
[docs] @classmethod def from_sparse(cls, sparse, ids=None): """Convert a ``scipy.sparse`` array to a PySAL ``Graph`` object. Parameters ---------- sparse : scipy.sparse array sparse representation of a graph ids : list-like, default None list-like of ids for geometries that is mappable to positions from sparse. If None, the positions are used as labels. Returns ------- Graph libpysal.graph.Graph based on sparse """ return cls.from_arrays(*_sparse_to_arrays(sparse, ids))
[docs] @classmethod def from_arrays(cls, focal_ids, neighbor_ids, weight, **kwargs): """Generate Graph from arrays of indices and weights of the same length The arrays needs to be sorted in a way ensuring that focal_ids.unique() is equal to the index of original observations from which the Graph is being built Parameters ---------- focal_index : array-like focal indices neighbor_index : array-like neighbor indices weight : array-like weights **kwargs keyword arguments passed to the class constructor Returns ------- Graph libpysal.graph.Graph based on arrays """ w = cls( pd.Series( weight, name="weight", index=pd.MultiIndex.from_arrays( [focal_ids, neighbor_ids], names=["focal", "neighbor"] ), ), **kwargs, ) return w
[docs] @classmethod def from_weights_dict(cls, weights_dict): """Generate Graph from a dict of dicts Parameters ---------- weights_dict : dictionary of dictionaries weights dictionary with the ``{focal: {neighbor: weight}}`` structure. Returns ------- Graph libpysal.graph.Graph based on weights dictionary of dictionaries """ idx = {f: list(neighbors) for f, neighbors in weights_dict.items()} data = {f: list(neighbors.values()) for f, neighbors in weights_dict.items()} return cls.from_dicts(idx, data)
[docs] @classmethod def from_dicts(cls, neighbors, weights=None): """Generate Graph from dictionaries of neighbors and weights Parameters ---------- neighbors : dict dictionary of neighbors with the ``{focal: [neighbor1, neighbor2]}`` structure weights : dict, optional dictionary of neighbors with the ``{focal: [weight1, weight2]}`` structure. If None, assumes binary weights. Returns ------- Graph libpysal.graph.Graph based on dictionaries """ head, tail, weight = _neighbor_dict_to_edges(neighbors, weights=weights) return cls.from_arrays(head, tail, weight)
[docs] @classmethod def build_block_contiguity(cls, regimes): """Generate Graph from block contiguity (regime neighbors) Block contiguity structures are relevant when defining neighbor relations based on membership in a regime. For example, all counties belonging to the same state could be defined as neighbors, in an analysis of all counties in the US. Parameters ---------- regimes : list-like list-like of regimes. If pandas.Series, its index is used to encode Graph. Otherwise a default RangeIndex is used. Returns ------- Graph libpysal.graph.Graph encoding block contiguity """ ids = _evaluate_index(regimes) return cls.from_dicts(_block_contiguity(regimes, ids=ids))
[docs] @classmethod def build_contiguity(cls, geometry, rook=True, by_perimeter=False, strict=False): """Generate Graph from geometry based on contiguity Contiguity builder assumes that all geometries are forming a coverage, i.e. a non-overlapping mesh and neighbouring geometries share only points or segments of their exterior boundaries. In practice, ``build_contiguity`` is capable of creating a Graph of partially overlapping geometries when ``strict=False, by_perimeter=False``, but that would not strictly follow the definition of queen or rook contiguity. Parameters ---------- geometry : array-like of shapely.Geometry objects Could be geopandas.GeoSeries or geopandas.GeoDataFrame, in which case the resulting Graph is indexed by the original index. If an array of shapely.Geometry objects is passed, Graph will assume a RangeIndex. rook : bool, optional Contiguity method. If True, two geometries are considered neighbours if they share at least one edge. If False, two geometries are considered neighbours if they share at least one vertex. By default True by_perimeter : bool, optional If True, ``weight`` represents the length of the shared boundary between adjacent units, by default False. For row-standardized version of perimeter weights, use ``Graph.build_contiguity(gdf, by_perimeter=True).transform("r")``. strict : bool, optional Use the strict topological method. If False, the contiguity is determined based on shared coordinates or coordinate sequences representing edges. This assumes geometry coverage that is topologically correct. This method is faster but can miss some relations. If True, the contiguity is determined based on geometric relations that do not require precise topology. This method is slower but will result in correct contiguity even if the topology of geometries is not optimal. By default False. Returns ------- Graph libpysal.graph.Graph encoding contiguity weights """ ids = _evaluate_index(geometry) if hasattr(geometry, "geometry"): # potentially cast GeoDataFrame to GeoSeries geometry = geometry.geometry if strict: # use shapely-based constructors if rook: return cls.from_arrays( *_rook(geometry, ids=ids, by_perimeter=by_perimeter) ) return cls.from_arrays( *_queen(geometry, ids=ids, by_perimeter=by_perimeter) ) # use vertex-based constructor return cls.from_arrays( *_vertex_set_intersection( geometry, rook=rook, ids=ids, by_perimeter=by_perimeter ) )
[docs] @classmethod def build_distance_band( cls, data, threshold, binary=True, alpha=-1.0, kernel=None, bandwidth=None ): """Generate Graph from geometry based on a distance band Parameters ---------- data : numpy.ndarray, geopandas.GeoSeries, geopandas.GeoDataFrame geometries containing locations to compute the delaunay triangulation. If a geopandas object with Point geometry is provided, the .geometry attribute is used. If a numpy.ndarray with shapely geometry is used, then the coordinates are extracted and used. If a numpy.ndarray of a shape (2,n) is used, it is assumed to contain x, y coordinates. threshold : float distance band binary : bool, optional If True :math:`w_{ij}=1` if :math:`d_{i,j}<=threshold`, otherwise :math:`w_{i,j}=0`. If False :math:`wij=dij^{alpha}`, by default True. alpha : float, optional distance decay parameter for weight (default -1.0) if alpha is positive the weights will not decline with distance. Ignored if ``binary=True`` or ``kernel`` is not None. kernel : str, optional kernel function to use in order to weight the output graph. See :meth:`Graph.build_kernel` for details. Ignored if ``binary=True``. bandwidth : float (default: None) distance to use in the kernel computation. Should be on the same scale as the input coordinates. Ignored if ``binary=True`` or ``kernel=None``. Returns ------- Graph libpysal.graph.Graph encoding distance band weights """ ids = _evaluate_index(data) dist = _distance_band(data, threshold) if binary: head, tail, weight = _kernel( dist, kernel="boxcar", metric="precomputed", ids=ids, bandwidth=np.inf, ) elif kernel is not None: head, tail, weight = _kernel( dist, kernel=kernel, metric="precomputed", ids=ids, bandwidth=bandwidth, ) else: head, tail, weight = _kernel( dist, kernel=lambda distances, alpha: np.power(distances, alpha), metric="precomputed", ids=ids, bandwidth=alpha, ) adjacency = pd.DataFrame.from_dict( {"focal": head, "neighbor": tail, "weight": weight} ).set_index("focal") # drop diagonal counts = adjacency.index.value_counts() no_isolates = counts[counts > 1] adjacency = adjacency[ ~( adjacency.index.isin(no_isolates.index) & (adjacency.index == adjacency.neighbor) ) ] # set isolates to 0 - distance band should never contain self-weight adjacency.loc[~adjacency.index.isin(no_isolates.index), "weight"] = 0 return cls.from_arrays( adjacency.index.values, adjacency.neighbor.values, adjacency.weight.values )
[docs] @classmethod def build_fuzzy_contiguity( cls, geometry, tolerance=None, buffer=None, predicate="intersects", ): """Generate Graph from fuzzy contiguity Fuzzy contiguity relaxes the notion of contiguity neighbors for the case of geometry collections that violate the condition of planar enforcement. It handles three types of conditions present in such collections that would result in missing links when using the regular contiguity methods. The first are edges for nearby polygons that should be shared, but are digitized separately for the individual polygons and the resulting edges do not coincide, but instead the edges intersect. This case can also be covered by ``build_contiguty`` with the ``strict=False`` parameter. The second case is similar to the first, only the resultant edges do not intersect but are "close". The optional buffering of geometry then closes the gaps between the polygons and a resulting intersection is encoded as a link. The final case arises when one polygon is "inside" a second polygon but is not encoded to represent a hole in the containing polygon. It is also possible to create a contiguity based on a custom spatial predicate. Parameters ---------- geoms : array-like of shapely.Geometry objects Could be geopandas.GeoSeries or geopandas.GeoDataFrame, in which case the resulting Graph is indexed by the original index. If an array of shapely.Geometry objects is passed, Graph will assume a RangeIndex. tolerance : float, optional The percentage of the length of the minimum side of the bounding rectangle for the ``geoms`` to use in determining the buffering distance. Either ``tolerance`` or ``buffer`` may be specified but not both. By default None. buffer : float, optional Exact buffering distance in the units of ``geoms.crs``. Either ``tolerance`` or ``buffer`` may be specified but not both. By default None. predicate : str, optional The predicate to use for determination of neighbors. Default is 'intersects'. If None is passed, neighbours are determined based on the intersection of bounding boxes. See the documentation of ``geopandas.GeoSeries.sindex.query`` for allowed predicates. Returns ------- Graph libpysal.graph.Graph encoding fuzzy contiguity """ ids = _evaluate_index(geometry) heads, tails, weights = _fuzzy_contiguity( geometry, ids, tolerance=tolerance, buffer=buffer, predicate=predicate ) return cls.from_arrays(heads, tails, weights)
[docs] @classmethod def build_kernel( cls, data, kernel="gaussian", k=None, bandwidth=None, metric="euclidean", p=2, coincident="raise", ): """Generate Graph from geometry data based on a kernel function Parameters ---------- data : numpy.ndarray, geopandas.GeoSeries, geopandas.GeoDataFrame geometries over which to compute a kernel. If a geopandas object with Point geoemtry is provided, the .geometry attribute is used. If a numpy.ndarray with shapely geoemtry is used, then the coordinates are extracted and used. If a numpy.ndarray of a shape (2,n) is used, it is assumed to contain x, y coordinates. If metric="precomputed", data is assumed to contain a precomputed distance metric. kernel : string or callable (default: 'gaussian') kernel function to apply over the distance matrix computed by `metric`. The following kernels are supported: - ``"triangular"``: - ``"parabolic"``: - ``"gaussian"``: - ``"bisquare"``: - ``"cosine"``: - ``'boxcar'``/discrete: all distances less than `bandwidth` are 1, and all other distances are 0 - ``"identity"``/None : do nothing, weight similarity based on raw distance - ``callable`` : a user-defined function that takes the distance vector and the bandwidth and returns the kernel: kernel(distances, bandwidth) k : int (default: None) number of nearest neighbors used to truncate the kernel. This is assumed to be constant across samples. If None, no truncation is conduted. bandwidth : float (default: None) distance to use in the kernel computation. Should be on the same scale as the input coordinates. metric : string or callable (default: 'euclidean') distance function to apply over the input coordinates. Supported options depend on whether or not scikit-learn is installed. If so, then any distance function supported by scikit-learn is supported here. Otherwise, only euclidean, minkowski, and manhattan/cityblock distances are admitted. p : int (default: 2) parameter for minkowski metric, ignored if metric != "minkowski". coincident: str, optional (default "raise") Method for handling coincident points when ``k`` is not None. Options are ``'raise'`` (raising an exception when coincident points are present), ``'jitter'`` (randomly displace coincident points to produce uniqueness), & ``'clique'`` (induce fully-connected sub cliques for coincident points). Returns ------- Graph libpysal.graph.Graph encoding kernel weights """ ids = _evaluate_index(data) head, tail, weight = _kernel( data, bandwidth=bandwidth, metric=metric, kernel=kernel, k=k, p=p, ids=ids, coincident=coincident, ) return cls.from_arrays(head, tail, weight)
[docs] @classmethod def build_knn(cls, data, k, metric="euclidean", p=2, coincident="raise"): """Generate Graph from geometry data based on k-nearest neighbors search Parameters ---------- data : numpy.ndarray, geopandas.GeoSeries, geopandas.GeoDataFrame geometries over which to compute a kernel. If a geopandas object with Point geoemtry is provided, the .geometry attribute is used. If a numpy.ndarray with shapely geoemtry is used, then the coordinates are extracted and used. If a numpy.ndarray of a shape (2,n) is used, it is assumed to contain x, y coordinates. k : int number of nearest neighbors. metric : string or callable (default: 'euclidean') distance function to apply over the input coordinates. Supported options depend on whether or not scikit-learn is installed. If so, then any distance function supported by scikit-learn is supported here. Otherwise, only euclidean, minkowski, and manhattan/cityblock distances are admitted. p : int (default: 2) parameter for minkowski metric, ignored if metric != "minkowski". coincident: str, optional (default "raise") Method for handling coincident points. Options include ``'raise'`` (raising an exception when coincident points are present), ``'jitter'`` (randomly displace coincident points to produce uniqueness), & ``'clique'`` (induce fully-connected sub cliques for coincident points). Returns ------- Graph libpysal.graph.Graph encoding KNN weights """ ids = _evaluate_index(data) head, tail, weight = _kernel( data, bandwidth=np.inf, metric=metric, kernel="boxcar", k=k, p=p, ids=ids, coincident=coincident, ) return cls.from_arrays(head, tail, weight)
[docs] @classmethod def build_spatial_matches( cls, data, k, metric="euclidean", solver=None, allow_partial_match=False, **metric_kwargs, ): """ Match locations in one dataset to at least `n_matches` locations in another (possibly identical) dataset by minimizing the total distance between matched locations. Letting :math:`d_{ij}` be .. math:: \\text{minimize} \\sum_i^n \\sum_j^n d_{ij}m_{ij} \\text{subject to} \\sum_j^n m_{ij} >= k \\forall i m_{ij} \\in {0,1} \\forall ij Parameters ---------- x : numpy.ndarray, geopandas.GeoSeries, geopandas.GeoDataFrame geometries that need matches. If a geopandas.Geo* object is provided, the .geometry attribute is used. If a numpy.ndarray with a geometry dtype is used, then the coordinates are extracted and used. y : numpy.ndarray, geopandas.GeoSeries, geopandas.GeoDataFrame (default: None) geometries that are used as a source for matching. If a geopandas object is provided, the .geometry attribute is used. If a numpy.ndarray with a geometry dtype is used, then the coordinates are extracted and used. If none, matches are made within `x`. n_matches : int (default: None) number of matches metric : string or callable (default: 'euclidean') distance function to apply over the input coordinates. Supported options depend on whether or not scikit-learn is installed. If so, then any distance function supported by scikit-learn is supported here. Otherwise, only euclidean, minkowski, and manhattan/cityblock distances are admitted. solver : solver from pulp (default: None) a solver defined by the pulp optimization library. If no solver is provided, pulp's default solver will be used. This is generally pulp.COIN(), but this may vary depending on your configuration. return_mip : bool (default: False) whether or not to return the instance of the pulp.LpProblem. By default, the problem is not returned to the user. allow_partial_match : bool (default: False) whether to allow for partial matching. A partial match may have a weight between zero and one, while a "full" match (by default) must have a weight of either zero or one. A partial matching may have a shorter total distance, but will result in a weighted graph. """ head, tail, weight = _spatial_matching( x=data, metric=metric, n_matches=k, solver=solver, allow_partial_match=allow_partial_match, **metric_kwargs, ) # ids need to be addressed here, rather than in the matching # because x and y can have different id sets. It's only # in W where we *know* we can just use one id vector. return cls.from_arrays(head, tail, weight)
[docs] @classmethod def build_triangulation( cls, data, method="delaunay", bandwidth=np.inf, kernel="boxcar", clip="bounding_box", rook=True, coincident="raise", ): """Generate Graph from geometry based on triangulation Parameters ---------- data : numpy.ndarray, geopandas.GeoSeries, geopandas.GeoDataFrame geometries containing locations to compute the delaunay triangulation. If a geopandas object with Point geoemtry is provided, the .geometry attribute is used. If a numpy.ndarray with shapely geoemtry is used, then the coordinates are extracted and used. If a numpy.ndarray of a shape (2,n) is used, it is assumed to contain x, y coordinates. method : str, (default "delaunay") method of extracting the weights from triangulation. Supports: - ``"delaunay"`` - ``"gabriel"`` - ``"relative_neighborhood"`` - ``"voronoi"`` bandwidth : float, optional distance to use in the kernel computation. Should be on the same scale as the input coordinates, by default numpy.inf kernel : str, optional kernel function to use in order to weight the output graph. See :meth:`Graph.build_kernel` for details. By default "boxcar" clip : str (default: 'bbox') Clipping method when ``method="voronoi"``. Ignored otherwise. Default is ``'bounding_box'``. Options are as follows. * ``None`` -- No clip is applied. Voronoi cells may be arbitrarily larger that the source map. Note that this may lead to cells that are many orders of magnitude larger in extent than the original map. Not recommended. * ``'bounding_box'`` -- Clip the voronoi cells to the bounding box of the input points. * ``'convex_hull'`` -- Clip the voronoi cells to the convex hull of the input points. * ``'alpha_shape'`` -- Clip the voronoi cells to the tightest hull that contains all points (e.g. the smallest alpha shape, using :func:`libpysal.cg.alpha_shape_auto`). * ``shapely.Polygon`` -- Clip to an arbitrary Polygon. rook : bool, optional Contiguity method when ``method="voronoi"``. Ignored otherwise. If True, two geometries are considered neighbours if they share at least one edge. If False, two geometries are considered neighbours if they share at least one vertex. By default True coincident: str, optional (default "raise") Method for handling coincident points. Options include ``'raise'`` (raising an exception when coincident points are present), ``'jitter'`` (randomly displace coincident points to produce uniqueness), & ``'clique'`` (induce fully-connected sub cliques for coincident points). Returns ------- Graph libpysal.graph.Graph encoding triangulation weights """ ids = _evaluate_index(data) if method == "delaunay": head, tail, weights = _delaunay( data, ids=ids, bandwidth=bandwidth, kernel=kernel, coincident=coincident ) elif method == "gabriel": head, tail, weights = _gabriel( data, ids=ids, bandwidth=bandwidth, kernel=kernel, coincident=coincident ) elif method == "relative_neighborhood": head, tail, weights = _relative_neighborhood( data, ids=ids, bandwidth=bandwidth, kernel=kernel, coincident=coincident ) elif method == "voronoi": head, tail, weights = _voronoi( data, ids=ids, clip=clip, rook=rook, coincident=coincident ) else: raise ValueError( f"Method '{method}' is not supported. Use one of ['delaunay', " "'gabriel', 'relative_neighborhood', 'voronoi']." ) return cls.from_arrays(head, tail, weights)
@cached_property def neighbors(self): """Get neighbors dictionary Notes ----- It is recommended to work directly with :meth:`Graph.adjacency` rather than using the :meth:`Graph.neighbors`. Returns ------- dict dict of tuples representing neighbors """ return ( self._adjacency.reset_index(level=1) .groupby(level=0) .apply( lambda group: tuple( group[ ~((group.index == group.neighbor) & (group.weight == 0)) ].neighbor ) ) .to_dict() ) @cached_property def weights(self): """Get weights dictionary Notes ----- It is recommended to work directly with :meth:`Graph.adjacency` rather than using the :meth:`Graph.weights`. Returns ------- dict dict of tuples representing weights """ return ( self._adjacency.reset_index(level=1) .groupby(level=0) .apply( lambda group: tuple( group[ ~((group.index == group.neighbor) & (group.weight == 0)) ].weight ) ) .to_dict() ) @cached_property def sparse(self): """Return a scipy.sparse array (COO) Returns ------- scipy.sparse.COO sparse representation of the adjacency """ # pivot to COO sparse matrix and cast to array return sparse.coo_array( self._adjacency.astype("Sparse[float]").sparse.to_coo(sort_labels=True)[0] )
[docs] def transform(self, transformation): """Transformation of weights Parameters ---------- transformation : str | callable Transformation method. The following are valid transformations. - **B** -- Binary - **R** -- Row-standardization (global sum :math:`=n`) - **D** -- Double-standardization (global sum :math:`=1`) - **V** -- Variance stabilizing Alternatively, you can pass your own callable passed to ``self.adjacency.groupby(level=0).transform()``. Returns ------- Graph transformed weights Raises ------ ValueError Value error for unsupported transformation """ if isinstance(transformation, str): transformation = transformation.upper() if self.transformation == transformation: return self.copy() if transformation == "R": standardized = ( (self._adjacency / self._adjacency.groupby(level=0).transform("sum")) .fillna(0) .values ) # isolate comes as NaN -> 0 elif transformation == "D": standardized = (self._adjacency / self._adjacency.sum()).values elif transformation == "B": standardized = self._adjacency.astype(bool).astype(int) elif transformation == "V": s = self._adjacency.groupby(level=0).transform( lambda group: group / math.sqrt((group**2).sum()) ) n_q = self.n / s.sum() standardized = (s * n_q).fillna(0).values # isolate comes as NaN -> 0 elif callable(transformation): standardized = self._adjacency.groupby(level=0).transform(transformation) transformation = "C" else: raise ValueError( f"Transformation '{transformation}' is not supported. " f"Use one of {ALLOWED_TRANSFORMATIONS[1:]} or pass a callable." ) standardized_adjacency = pd.Series( standardized, name="weight", index=self._adjacency.index ) return Graph(standardized_adjacency, transformation, is_sorted=True)
@cached_property def _components(self): """helper for n_components and component_labels""" # TODO: remove casting to matrix once scipy supports arrays here return sparse.csgraph.connected_components(sparse.coo_matrix(self.sparse)) @cached_property def n_components(self): """Get a number of connected components Returns ------- int number of components """ return self._components[0] @cached_property def component_labels(self): """Get component labels per observation Returns ------- numpy.array Array of component labels """ return pd.Series( self._components[1], index=self.unique_ids, name="component labels" ) @cached_property def cardinalities(self): """Number of neighbors for each observation Returns ------- pandas.Series Series with a number of neighbors per each observation """ cardinalities = self._adjacency.astype(bool).groupby(level=0).sum() cardinalities.name = "cardinalities" return cardinalities @cached_property def isolates(self): """Index of observations with no neighbors Isolates are encoded as a self-loop with the weight == 0 in the adjacency table. Returns ------- pandas.Index Index with a subset of observations that do not have any neighbor """ nulls = self._adjacency[self._adjacency == 0] # since not all zeros are necessarily isolates, do the focal == neighbor check return ( nulls[nulls.index.codes[0] == nulls.index.codes[1]] .index.get_level_values(0) .unique() ) @cached_property def unique_ids(self): """Unique IDs used in the Graph""" return self._adjacency.index.get_level_values("focal").unique() @cached_property def n(self): """Number of observations.""" return self.unique_ids.shape[0] @cached_property def n_nodes(self): """Number of observations.""" return self.unique_ids.shape[0] @cached_property def n_edges(self): """Number of observations.""" return self._adjacency.shape[0] - self.isolates.shape[0] @cached_property def pct_nonzero(self): """Percentage of nonzero weights.""" p = 100.0 * self.sparse.nnz / (1.0 * self.n**2) return p @cached_property def nonzero(self): """Number of nonzero weights.""" return (self._adjacency.drop(self.isolates) > 0).sum()
[docs] def asymmetry(self, intrinsic=True): r"""Asymmetry check. Parameters ---------- intrinsic : bool, optional Default is ``True``. Intrinsic symmetry is defined as: .. math:: w_{i,j} == w_{j,i} If ``intrinsic`` is ``False`` symmetry is defined as: .. math:: i \in N_j \ \& \ j \in N_i where :math:`N_j` is the set of neighbors for :math:`j`, e.g., ``True`` requires equality of the weight to consider two links equal, ``False`` requires only a presence of a link with a non-zero weight. Returns ------- pandas.Series A ``Series`` of ``(i,j)`` pairs of asymmetries sorted ascending by the focal observation (index value), where ``i`` is the focal and ``j`` is the neighbor. An empty ``Series`` is returned if no asymmetries are found. """ if intrinsic: wd = self.sparse.transpose() - self.sparse else: transformed = self.transform("b") wd = transformed.sparse.transpose() - transformed.sparse ids = np.nonzero(wd) if len(ids[0]) == 0: return pd.Series( index=pd.Index([], name="focal"), name="neighbor", dtype=self._adjacency.index.dtypes["focal"], ) else: i2id = dict( zip(np.arange(self.unique_ids.shape[0]), self.unique_ids, strict=True) ) focal, neighbor = np.nonzero(wd) focal = focal.astype(self._adjacency.index.dtypes["focal"]) neighbor = neighbor.astype(self._adjacency.index.dtypes["focal"]) for i in i2id: focal[focal == i] = i2id[i] neighbor[neighbor == i] = i2id[i] ijs = pd.Series( neighbor, index=pd.Index(focal, name="focal"), name="neighbor" ).sort_index() return ijs
[docs] def higher_order(self, k=2, shortest_path=True, diagonal=False, lower_order=False): """Contiguity weights object of order :math:`k`. Proper higher order neighbors are returned such that :math:`i` and :math:`j` are :math:`k`-order neighbors if the shortest path from :math:`i-j` is of length :math:`k`. Parameters ---------- k : int, optional Order of contiguity. By default 2. shortest_path : bool, optional If True, :math:`i,j` and :math:`k`-order neighbors if the shortest path for :math:`i,j` is :math:`k`. If False, :math:`i,j` are `k`-order neighbors if there is a path from :math:`i,j` of length :math:`k`. By default True. diagonal : bool, optional If True, keep :math:`k`-order (:math:`i,j`) joins when :math:`i==j`. If False, remove :math:`k`-order (:math:`i,j`) joins when :math:`i==j`. By default False. lower_order : bool, optional If True, include lower order contiguities. If False return only weights of order :math:`k`. By default False. Returns ------- Graph higher order weights """ # TODO: remove casting to matrix once scipy implements matrix_power on array. binary = self.transform("B") sp = sparse.csr_matrix(binary.sparse) if lower_order: wk = sum(sp**x for x in range(2, k + 1)) shortest_path = False else: wk = sp**k rk, ck = wk.nonzero() sk = set(zip(rk, ck, strict=True)) if shortest_path: for j in range(1, k): wj = sp**j rj, cj = wj.nonzero() sj = set(zip(rj, cj, strict=True)) sk.difference_update(sj) if not diagonal: sk = {(i, j) for i, j in sk if i != j} return Graph.from_sparse( sparse.coo_array( ( np.ones(len(sk), dtype=np.int8), ([s[0] for s in sk], [s[1] for s in sk]), ), shape=sp.shape, ), ids=self.unique_ids, )
[docs] def lag(self, y): """Spatial lag operator If weights are row standardized, returns the mean of each observation's neighbors; if not, returns the weighted sum of each observation's neighbors. Parameters ---------- y : array-like array-like (N,) shape where N is equal to number of observations in self. Returns ------- numpy.ndarray array of numeric values for the spatial lag """ return _lag_spatial(self, y)
[docs] def to_parquet(self, path, **kwargs): """Save Graph to a Apache Parquet Graph is serialized to the Apache Parquet using the underlying adjacency object stored as a Parquet table and custom metadata containing transformation. Requires pyarrow package. Parameters ---------- path : str | pyarrow.NativeFile path or any stream supported by pyarrow **kwargs additional keyword arguments passed to pyarrow.parquet.write_table See also -------- read_parquet """ _to_parquet(self, path, **kwargs)
[docs] def to_networkx(self): """Convert Graph to a ``networkx`` graph. If Graph is symmetric, returns ``nx.Graph``, otherwise returns a ``nx.DiGraph``. Returns ------- networkx.Graph | networkx.DiGraph Representation of libpysal Graph as networkx graph """ try: import networkx as nx except ImportError: raise ImportError("NetworkX is required.") from None graph_type = nx.Graph if self.asymmetry().empty else nx.DiGraph return nx.from_pandas_edgelist( self._adjacency.reset_index(), source="focal", target="neighbor", edge_attr="weight", create_using=graph_type, )
[docs] def plot( self, gdf, focal=None, nodes=True, color="k", edge_kws=None, node_kws=None, focal_kws=None, ax=None, figsize=None, limit_extent=False, ): """Plot edges and nodes of the Graph Creates a ``maptlotlib`` plot based on the topology stored in the Graph and spatial location defined in ``gdf``. Parameters ---------- gdf : geopandas.GeoDataFrame Geometries indexed using the same index as Graph. Geometry types other than points are converted to centroids encoding start and end point of Graph edges. focal : hashable | array-like[hashable] | None, optional ID or an array-like of IDs of focal geometries whose weights shall be plotted. If None, all weights from all focal geometries are plotted. By default None nodes : bool, optional Plot nodes as points, by default True color : str, optional The color of all objects, by default "k" edge_kws : dict, optional Keyword arguments dictionary to send to ``LineCollection``, which provides fine-grained control over the aesthetics of the edges in the plot. By default None node_kws : dict, optional Keyword arguments dictionary to send to ``ax.scatter``, which provides fine-grained control over the aesthetics of the nodes in the plot. By default None focal_kws : dict, optional Keyword arguments dictionary to send to ``ax.scatter``, which provides fine-grained control over the aesthetics of the focal nodes in the plot on top of generic ``node_kws``. Values of ``node_kws`` are updated from ``focal_kws``. Ignored if ``focal=None``. By default None ax : matplotlib.axes.Axes, optional Axis on which to plot the weights. If None, a new figure and axis are created. By default None figsize : tuple, optional figsize used to create a new axis. By default None limit_extent : bool, optional limit the extent of the axis to the extent of the plotted graph, by default False Returns ------- matplotlib.axes.Axes Axis with the resulting plot Notes ----- If you'd like to overlay the actual geometries from the ``geopandas.GeoDataFrame``, create an axis by plotting the ``GeoDataFrame`` and plot the Graph on top. ax = gdf.plot() gdf_graph.plot(gdf, ax=ax) """ return _plot( self, gdf, focal=focal, nodes=nodes, color=color, node_kws=node_kws, edge_kws=edge_kws, focal_kws=focal_kws, ax=ax, figsize=figsize, limit_extent=limit_extent, )
[docs] def explore( self, gdf, focal=None, nodes=True, color="black", edge_kws=None, node_kws=None, focal_kws=None, m=None, **kwargs, ): """Plot graph as an interactive Folium Map Parameters ---------- gdf : geopandas.GeoDataFrame geodataframe used to instantiate to Graph focal : list, optional subset of focal observations to plot in the map, by default None. If none, all relationships are plotted nodes : bool, optional whether to display observations as nodes in the map, by default True color : str, optional color applied to nodes and edges, by default "black" edge_kws : dict, optional additional keyword arguments passed to geopandas explore function when plotting edges, by default None node_kws : dict, optional additional keyword arguments passed to geopandas explore function when plotting nodes, by default None focal_kws : dict, optional additional keyword arguments passed to geopandas explore function when plotting focal observations, by default None. Only applicable when passing a subset of nodes with the `focal` argument m : Folilum.Map, optional folium map objecto to plot on top of, by default None **kwargs : dict, optional additional keyword arguments are passed directly to geopandas.explore, when ``m=None`` by default None Returns ------- folium.Map folium map """ return _explore_graph( self, gdf, focal=focal, nodes=nodes, color=color, edge_kws=edge_kws, node_kws=node_kws, focal_kws=focal_kws, m=m, **kwargs, )
[docs] def subgraph(self, ids): """Returns a subset of Graph containing only nodes specified in ids The resulting subgraph contains only the nodes in ``ids`` and the edges between them or zero-weight self-loops in case of isolates. The order of ``ids`` reflects a new canonical order of the resulting subgraph. This means ``ids`` should be equal to the index of the DataFrame containing data linked to the graph to ensure alignment of sparse representation of subgraph. Parameters ---------- ids : array-like An array of node IDs to be retained Returns ------- Graph A new Graph that is a subset of the original Notes ----- Unlike the implementation in ``networkx``, this creates a copy since Graphs in ``libpysal`` are immutable. """ masked_adj = self._adjacency[ids] filtered_adj = masked_adj[ masked_adj.index.get_level_values("neighbor").isin(ids) ] return Graph.from_arrays( *_resolve_islands( filtered_adj.index.get_level_values("focal"), filtered_adj.index.get_level_values("neighbor"), ids, filtered_adj.values, ) )
[docs] def eliminate_zeros(self): """Remove graph edges with zero weight Eliminates edges with weight == 0 that do not encode an isolate. This is useful to clean-up edges that will make no effect in operations like :meth:`lag`. Returns ------- Graph subset of Graph with zero-weight edges eliminated """ # get a mask for isolates isolates = self._adjacency.index.codes[0] == self._adjacency.index.codes[1] # substract isolates from mask of zeros zeros = (self._adjacency == 0) != isolates return Graph(self._adjacency[~zeros], is_sorted=True)
[docs] def assign_self_weight(self, weight=1): """Assign values to edges representing self-weight. The value for each ``focal == neighbor`` location in the graph is set to ``weight``. Parameters ---------- weight : float | array-like Defines the value(s) to which the weight representing the relationship with itself should be set. If a constant is passed then each self-weight will get this value (default is 1). An array of length ``Graph.n`` can be passed to set explicit values to each self-weight (assumed to be in the same order as original data). Returns ------- Graph A new ``Graph`` with added self-weights. """ addition = pd.Series( weight, index=pd.MultiIndex.from_arrays( [self.unique_ids, self.unique_ids], names=["focal", "neighbor"] ), name="weight", ) adj = ( pd.concat([self.adjacency.drop(self.isolates), addition]) .reindex(self.unique_ids, level=0) .reindex(self.unique_ids, level=1) ) return Graph(adj, is_sorted=True)
[docs] def apply(self, y, func, **kwargs): """Apply a reduction across the neighbor sets Applies ``func`` over groups of ``y`` defined by neighbors for each focal. Parameters ---------- y : array_like array of values to be grouped. Can be 1-D or 2-D and will be coerced to a pandas object func : function, str, list, dict or None Function to use for aggregating the data passed to pandas ``GroupBy.apply``. Returns ------- Series | DataFrame pandas object indexed by unique_ids """ if not isinstance(y, pd.Series | pd.DataFrame): y = pd.DataFrame(y) if hasattr(y, "ndim") and y.ndim == 2 else pd.Series(y) grouper = y.take(self._adjacency.index.codes[1]).groupby( self._adjacency.index.codes[0] ) result = grouper.apply(func, **kwargs) result.index = self.unique_ids if isinstance(result, pd.Series): result.name = None return result
[docs] def aggregate(self, func): """Aggregate weights within a neighbor set Apply a custom aggregation function to a group of weights of the same focal geometry. Parameters ---------- func : callable A callable accepted by pandas ``groupby.agg`` method Returns ------- pd.Series Aggregated weights """ return self._adjacency.groupby(level=0).agg(func)
def _arrange_arrays(heads, tails, weights, ids=None): """ Rearrange input arrays so that observation indices are well-ordered with respect to the input ids. That is, an "early" identifier should always preceed a "later" identifier in the heads, but the tails should be sorted with respect to heads *first*, then sorted within the tails. """ if ids is None: ids = np.unique(np.hstack((heads, tails))) lookup = list(ids).index input_df = pd.DataFrame.from_dict( {"focal": heads, "neighbor": tails, "weight": weights} ) return ( input_df.set_index(["focal", "neighbor"]) .assign( focal_loc=input_df.focal.apply(lookup).values, neighbor_loc=input_df.neighbor.apply(lookup).values, ) .sort_values(["focal_loc", "neighbor_loc"]) .reset_index() .drop(["focal_loc", "neighbor_loc"], axis=1) .values.T ) def read_parquet(path, **kwargs): """Read Graph from a Apache Parquet Read Graph serialized using `Graph.to_parquet()` back into the `Graph` object. The Parquet file needs to contain adjacency table with a structure required by the `Graph` constructor and optional metadata with the type of transformation. Parameters ---------- path : str | pyarrow.NativeFile | file-like object path or any stream supported by pyarrow **kwargs additional keyword arguments passed to pyarrow.parquet.read_table Returns ------- Graph deserialized Graph """ adjacency, transformation = _read_parquet(path, **kwargs) return Graph(adjacency, transformation, is_sorted=True)